In [2]:
# First, historical stock prices data is extracted from Yahoo! Finance in CSV file format. 
# This will be an analysis on which silicon chip manufacturer to buy, starting with NVIDIA Corp
# The data will be downloaded from the historical data tab on google finance.
# $TSM Taiwan Semiconductor Manufacturing Co. Ltd.
# $INTC Intel Corp.
# $AMD Advanced Micro Devices Ltd.
# $QCOM Qualcomm Inc.
# $AVGO Broadcom Inc.
# $MU Micron Technology Inc.
# $NVDA NVIDIA Corp.
# $AMAT Applied MAterials, Inc.
# $ASX ASE Technology Holding Co. Ltd.
# Secondly,the data is parsed and indexed.

# Techniques: reading data from CSV files, understanding data types, set dexes, parse dates, use index with
# loc, use indexing with iloc.

In [3]:
import pandas as pd

In [4]:
data = pd.read_csv("NVDA.csv", index_col=0, parse_dates=True)
# Formatting the data:
# Use index_col to remove the automatic indexing and instead use the dates as the index.
# Use parse_dates to convert the date column from an object data type to datetime.

In [5]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.0,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.98999,279.728394,38341300


In [6]:
data.dtypes

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [7]:
data.index

DatetimeIndex(['2022-01-06', '2022-01-07', '2022-01-10', '2022-01-11',
               '2022-01-12', '2022-01-13', '2022-01-14', '2022-01-18',
               '2022-01-19', '2022-01-20',
               ...
               '2022-12-21', '2022-12-22', '2022-12-23', '2022-12-27',
               '2022-12-28', '2022-12-29', '2022-12-30', '2023-01-03',
               '2023-01-04', '2023-01-05'],
              dtype='datetime64[ns]', name='Date', length=251, freq=None)

In [8]:
data.loc['2022-01-06']
# Accessing specific data record with a date.

Open         2.764000e+02
High         2.843800e+02
Low          2.706500e+02
Close        2.817800e+02
Adj Close    2.815167e+02
Volume       4.541860e+07
Name: 2022-01-06 00:00:00, dtype: float64

In [9]:
data.loc['2022-01-06':]

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.000000,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.989990,279.728394,38341300
...,...,...,...,...,...,...
2022-12-29,144.020004,146.830002,142.270004,146.029999,146.029999,35492300
2022-12-30,143.339996,146.289993,142.330002,146.139999,146.139999,31027300
2023-01-03,148.509995,149.960007,140.960007,143.149994,143.149994,40127700
2023-01-04,145.669998,148.529999,142.410004,147.490005,147.490005,43132400


In [10]:
# A range of data, for example a financial quarter:
data.loc['2022-01-06':'2022-04-06']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.000000,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.989990,279.728394,38341300
...,...,...,...,...,...,...
2022-03-31,277.820007,282.480011,272.700012,272.859985,272.651520,52344000
2022-04-01,273.750000,274.959991,262.670013,267.119995,266.915894,51723500
2022-04-04,267.279999,275.579987,266.130005,273.600006,273.390961,39712000
2022-04-05,272.540009,273.190002,258.200012,259.309998,259.111877,43661500


In [11]:
# The first day and the last day
data.iloc[0]

Open         2.764000e+02
High         2.843800e+02
Low          2.706500e+02
Close        2.817800e+02
Adj Close    2.815167e+02
Volume       4.541860e+07
Name: 2022-01-06 00:00:00, dtype: float64

In [12]:
data.iloc[-1]
# The data has been indexed with location and ilocation

Open         1.449100e+02
High         1.456400e+02
Low          1.414800e+02
Close        1.426500e+02
Adj Close    1.426500e+02
Volume       3.887120e+07
Name: 2023-01-05 00:00:00, dtype: float64

In [13]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-29,144.020004,146.830002,142.270004,146.029999,146.029999,35492300
2022-12-30,143.339996,146.289993,142.330002,146.139999,146.139999,31027300
2023-01-03,148.509995,149.960007,140.960007,143.149994,143.149994,40127700
2023-01-04,145.669998,148.529999,142.410004,147.490005,147.490005,43132400
2023-01-05,144.910004,145.639999,141.479996,142.649994,142.649994,38871200


In [14]:
# Initial phase: the data has been extracted from a website, downloaded into a csv file manually, 
# and python has been used to extract data from a csv file.
# The data types have been parsed.
# The data has been indexed properly.

# NEXT PHASE: manipulating columns, known as series, and performing calculations..
# The aim is to calculate daily change, daily percentage change and normalise the data.
# In the series, type function is used to get the data type.

type(data)

pandas.core.frame.DataFrame

In [15]:
# Acessing the closing column
data['Close']

Date
2022-01-06    281.779999
2022-01-07    272.470001
2022-01-10    274.000000
2022-01-11    278.170013
2022-01-12    279.989990
                 ...    
2022-12-29    146.029999
2022-12-30    146.139999
2023-01-03    143.149994
2023-01-04    147.490005
2023-01-05    142.649994
Name: Close, Length: 251, dtype: float64

In [16]:
type(data['Close'])
# This is a series instead of a data frame.

pandas.core.series.Series

In [17]:
data.dtypes

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [18]:
# Unlike excel, the series has a restriction on the type of data that it contain, because
# if there are restrictions on the data, calculations can be executed more quickly.
# In excel, an individual record within a series can be a different data type.

# Calculating the difference between the opening price and the close price:

In [19]:
data['Open'] - data['Close']

Date
2022-01-06   -5.380005
2022-01-07    8.940003
2022-01-10   -8.190002
2022-01-11   -4.940002
2022-01-12    0.680023
                ...   
2022-12-29   -2.009995
2022-12-30   -2.800003
2023-01-03    5.360001
2023-01-04   -1.820007
2023-01-05    2.260010
Length: 251, dtype: float64

In [20]:
data.head()
# Open - Close is equal to what is shown

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.0,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.98999,279.728394,38341300


In [21]:
daily_change = data['Open'] - data['Close']
daily_change

Date
2022-01-06   -5.380005
2022-01-07    8.940003
2022-01-10   -8.190002
2022-01-11   -4.940002
2022-01-12    0.680023
                ...   
2022-12-29   -2.009995
2022-12-30   -2.800003
2023-01-03    5.360001
2023-01-04   -1.820007
2023-01-05    2.260010
Length: 251, dtype: float64

In [22]:
type(daily_change)

pandas.core.series.Series

In [23]:
#Calculating the daily percentage change
daily_percentage_change = (data['Close'] - data['Open'])/data['Open']*100
daily_percentage_change

Date
2022-01-06    1.946456
2022-01-07   -3.176860
2022-01-10    3.081149
2022-01-11    1.808001
2022-01-12   -0.242286
                ...   
2022-12-29    1.395636
2022-12-30    1.953400
2023-01-03   -3.609185
2023-01-04    1.249404
2023-01-05   -1.559596
Length: 251, dtype: float64

In [24]:
# Normalising the data: to calculate the growth from an arbritrary baseline, the data can be 
# 'normalised'. 

# First record
data['Close'].iloc[0]

281.779999

In [25]:
# Last record
data['Close'].iloc[-1]

142.649994

In [26]:
# How much did it grow?
norm = data['Close']/data['Close'].iloc[0]
norm

Date
2022-01-06    1.000000
2022-01-07    0.966960
2022-01-10    0.972390
2022-01-11    0.987189
2022-01-12    0.993647
                ...   
2022-12-29    0.518241
2022-12-30    0.518632
2023-01-03    0.508020
2023-01-04    0.523423
2023-01-05    0.506246
Name: Close, Length: 251, dtype: float64

In [27]:
data['Close'].iloc[0]*norm.iloc[-1]

142.64999399999996

In [28]:
data['Close'].iloc[-1]

142.649994

In [38]:
# One year of investvestment in NVDA stock from 2022 to 2023 has resulted in an increase of 50.6%
# This second phase: calculated daily change, daily percentage change, normalised the data

# THIRD PHASE: modify the dataframe so that new columns are added with useful calculations for 
# continuing the financial analysis.
# Calculate minimum, maximum, argmin and argmax values in the columns
# Calculate the average

# Taking the current data frame and adding a column that calculates the daily change:
data['Daily chg'] = data['Close']-data['Open']
data.head()


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily chg,Normalised
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600,5.380005,1.0
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900,-8.940003,0.96696
2022-01-10,265.809998,274.690002,256.440002,274.0,273.744049,59468100,8.190002,0.97239
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900,4.940002,0.987189
2022-01-12,280.670013,285.950012,276.079987,279.98999,279.728394,38341300,-0.680023,0.993647


In [41]:
# Adding a normalised column to the data frame:
data['Normalised'] = data['Close']/data['Close'].iloc[0]
data.head()


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily chg,Normalised
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600,5.380005,1.0
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900,-8.940003,0.96696
2022-01-10,265.809998,274.690002,256.440002,274.0,273.744049,59468100,8.190002,0.97239
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900,4.940002,0.987189
2022-01-12,280.670013,285.950012,276.079987,279.98999,279.728394,38341300,-0.680023,0.993647


In [43]:
# What is the minimum value (price of stock) at close of day?
data['Close'].min()

112.269997

In [45]:
# Across all of the trading days, what is the minimum value of all the columns in the data frame?
data.min()

Open          1.097100e+02
High          1.173500e+02
Low           1.081300e+02
Close         1.122700e+02
Adj Close     1.122413e+02
Volume        1.679340e+07
Daily chg    -1.967000e+01
Normalised    3.984314e-01
dtype: float64

In [46]:
# What is the minimum close price at the end of the trading day, from the entire stock price history?
data['Close'].argmin()

# This means that the 194th record holds the minimum closing price for the stock price history.
# This corresponds to the data of 2022-10-14

194

In [48]:
# Using the minimum trading close price, what were the other series values for that trading day?
data.iloc[194]

Open          1.205700e+02
High          1.211100e+02
Low           1.120400e+02
Close         1.122700e+02
Adj Close     1.122413e+02
Volume        7.134320e+07
Daily chg    -8.300003e+00
Normalised    3.984314e-01
Name: 2022-10-14 00:00:00, dtype: float64

In [49]:
# Looking at the range of stock price history just before, and just after:
data.iloc[190:199]
# This shows that on 2022-10-14, the closing price of the stock was the lowest, for all the stock price
# hisotry included in the csv extracted from yahoo finance.
# This lowest closing price was $112.27 

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily chg,Normalised
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-10-10,120.370003,121.239998,114.080002,116.699997,116.670151,69869900,-3.670006,0.414153
2022-10-11,115.610001,118.459999,112.830002,115.860001,115.830368,66748200,0.25,0.411172
2022-10-12,115.790001,117.349998,113.449997,115.0,114.970589,49259000,-0.790001,0.40812
2022-10-13,109.709999,120.779999,108.129997,119.599998,119.569412,85010300,9.889999,0.424445
2022-10-14,120.57,121.110001,112.040001,112.269997,112.24128,71343200,-8.300003,0.398431
2022-10-17,115.18,119.480003,115.169998,118.879997,118.849594,58077400,3.699997,0.421889
2022-10-18,123.440002,124.919998,116.18,119.669998,119.639389,65936200,-3.770004,0.424693
2022-10-19,118.790001,123.5,118.339996,120.510002,120.479179,52313100,1.720001,0.427674
2022-10-20,121.129997,127.690002,121.050003,121.940002,121.908813,65239800,0.810005,0.432749


In [50]:
# Taking the normalised price
data['Normalised'].min()

0.3984313911506544

In [51]:
data['Normalised'].argmin()

194

In [59]:
# What was the maximum close stock price of NVDA?
data['Close'].max()

286.559998

In [60]:
data['Close'].argmax()
# This will provide the index of this record.

56

In [61]:
data.iloc[56:]
# The 56th index shows 2022-03-29 was the date when the closing price was the maxmimum.
# The maximum closing price was $286.56

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Daily chg,Normalised
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-03-29,286.950012,289.459991,279.799988,286.559998,286.341034,48898400,-0.390014,1.016964
2022-03-30,283.040009,284.959991,275.029999,276.899994,276.688416,46348700,-6.140015,0.982682
2022-03-31,277.820007,282.480011,272.700012,272.859985,272.651520,52344000,-4.960022,0.968344
2022-04-01,273.750000,274.959991,262.670013,267.119995,266.915894,51723500,-6.630005,0.947974
2022-04-04,267.279999,275.579987,266.130005,273.600006,273.390961,39712000,6.320007,0.970970
...,...,...,...,...,...,...,...,...
2022-12-29,144.020004,146.830002,142.270004,146.029999,146.029999,35492300,2.009995,0.518241
2022-12-30,143.339996,146.289993,142.330002,146.139999,146.139999,31027300,2.800003,0.518632
2023-01-03,148.509995,149.960007,140.960007,143.149994,143.149994,40127700,-5.360001,0.508020
2023-01-04,145.669998,148.529999,142.410004,147.490005,147.490005,43132400,1.820007,0.523423


In [62]:
data['Close'].mean()
# This calculates the mean value of all the closing prices. That is not a useful calculation,
# but it does help one to start to visualise the volatility of the NVDA sotck.

183.94908357768924

In [66]:
# The average of all the closing prices was $183.95 and since the maximum close price was
# $286.56, it shows there is a large difference between the maximum close price and the average.
# The difference is 286.56 - 183.95 = $102.61
# Or a % change of 102.61/286.56 = 35.8%

# Removing uneeded columns from the data frame:
# data.drop(labels='High','Low', 'Adj Close', 'Volume'), axis=1, inplace=True)
# data.head()
# Instead of keeping large dataframes in the memory, it slows down the computation time.
# Removing uneeded columns speeds up the calculations. It also increases complexity.

# FOURTH PHASE: visualising the stock price history and calculations.
# Using maplotlib to create:
# Subplots
# Multiple plots
# Bar plots
