In [3]:
# First, historical stock prices data is extracted from Yahoo! Finance in CSV file format. 
# This will be an analysis on which silicon chip manufacturer to buy, starting with NVIDIA Corp
# The data will be downloaded from the historical data tab on google finance.
# $TSM Taiwan Semiconductor Manufacturing Co. Ltd.
# $INTC Intel Corp.
# $AMD Advanced Micro Devices Ltd.
# $QCOM Qualcomm Inc.
# $AVGO Broadcom Inc.
# $MU Micron Technology Inc.
# $NVDA NVIDIA Corp.
# $AMAT Applied MAterials, Inc.
# $ASX ASE Technology Holding Co. Ltd.
# Secondly,the data is parsed and indexed.

# Tchniques: reading data from CSV files, understanding data types, set dexes, parse dates, use index with
# loc, use indexing with iloc.

In [4]:
import pandas as pd

In [5]:
data = pd.read_csv("NVDA.csv", index_col=0, parse_dates=True)
# Formatting the data:
# Use index_col to remove the automatic indexing and instead use the dates as the index.
# Use parse_dates to convert the date column from an object data type to datetime.

In [6]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.0,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.98999,279.728394,38341300


In [7]:
data.dtypes

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [8]:
data.index

DatetimeIndex(['2022-01-06', '2022-01-07', '2022-01-10', '2022-01-11',
               '2022-01-12', '2022-01-13', '2022-01-14', '2022-01-18',
               '2022-01-19', '2022-01-20',
               ...
               '2022-12-21', '2022-12-22', '2022-12-23', '2022-12-27',
               '2022-12-28', '2022-12-29', '2022-12-30', '2023-01-03',
               '2023-01-04', '2023-01-05'],
              dtype='datetime64[ns]', name='Date', length=251, freq=None)

In [9]:
data.loc['2022-01-06']
# Accessing specific data record with a data.

Open         2.764000e+02
High         2.843800e+02
Low          2.706500e+02
Close        2.817800e+02
Adj Close    2.815167e+02
Volume       4.541860e+07
Name: 2022-01-06 00:00:00, dtype: float64

In [10]:
data.loc['2022-01-06':]

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.000000,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.989990,279.728394,38341300
...,...,...,...,...,...,...
2022-12-29,144.020004,146.830002,142.270004,146.029999,146.029999,35492300
2022-12-30,143.339996,146.289993,142.330002,146.139999,146.139999,31027300
2023-01-03,148.509995,149.960007,140.960007,143.149994,143.149994,40127700
2023-01-04,145.669998,148.529999,142.410004,147.490005,147.490005,43132400


In [11]:
# A range of data. for example a financial quarter.
data.loc['2022-01-06':'2022-04-06']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.000000,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.989990,279.728394,38341300
...,...,...,...,...,...,...
2022-03-31,277.820007,282.480011,272.700012,272.859985,272.651520,52344000
2022-04-01,273.750000,274.959991,262.670013,267.119995,266.915894,51723500
2022-04-04,267.279999,275.579987,266.130005,273.600006,273.390961,39712000
2022-04-05,272.540009,273.190002,258.200012,259.309998,259.111877,43661500


In [12]:
# The first day and the last day
data.iloc[0]

Open         2.764000e+02
High         2.843800e+02
Low          2.706500e+02
Close        2.817800e+02
Adj Close    2.815167e+02
Volume       4.541860e+07
Name: 2022-01-06 00:00:00, dtype: float64

In [13]:
data.iloc[-1]
# We've indexed this data with location and ilocation

Open         1.449100e+02
High         1.456400e+02
Low          1.414800e+02
Close        1.426500e+02
Adj Close    1.426500e+02
Volume       3.887120e+07
Name: 2023-01-05 00:00:00, dtype: float64

In [14]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-29,144.020004,146.830002,142.270004,146.029999,146.029999,35492300
2022-12-30,143.339996,146.289993,142.330002,146.139999,146.139999,31027300
2023-01-03,148.509995,149.960007,140.960007,143.149994,143.149994,40127700
2023-01-04,145.669998,148.529999,142.410004,147.490005,147.490005,43132400
2023-01-05,144.910004,145.639999,141.479996,142.649994,142.649994,38871200


In [15]:
# How to look at columns, which are series, and make calculations on them. Data frames contain series.
# In the series, we'll use type to get the data type
# Daily change, daily percentage change and normalise the data 
type(data)

pandas.core.frame.DataFrame

In [16]:
# Acessing the closing column
data['Close']

Date
2022-01-06    281.779999
2022-01-07    272.470001
2022-01-10    274.000000
2022-01-11    278.170013
2022-01-12    279.989990
                 ...    
2022-12-29    146.029999
2022-12-30    146.139999
2023-01-03    143.149994
2023-01-04    147.490005
2023-01-05    142.649994
Name: Close, Length: 251, dtype: float64

In [17]:
type(data['Close'])
# This is a series instead of a data frame.

pandas.core.series.Series

In [18]:
data.dtypes

Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [19]:
# Unlike excel, the series has a restriction on the type of data that it contain, because
# if there are restrictions on the data, calculations can executed more quickly.

# Calculating the difference between the opening price and the close price:

In [20]:
data['Open'] - data['Close']

Date
2022-01-06   -5.380005
2022-01-07    8.940003
2022-01-10   -8.190002
2022-01-11   -4.940002
2022-01-12    0.680023
                ...   
2022-12-29   -2.009995
2022-12-30   -2.800003
2023-01-03    5.360001
2023-01-04   -1.820007
2023-01-05    2.260010
Length: 251, dtype: float64

In [21]:
data.head()
# You can see that Open - Close is equal to what is shown

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-01-06,276.399994,284.380005,270.649994,281.779999,281.516724,45418600
2022-01-07,281.410004,284.220001,270.570007,272.470001,272.215424,40993900
2022-01-10,265.809998,274.690002,256.440002,274.0,273.744049,59468100
2022-01-11,273.230011,280.649994,268.390015,278.170013,277.910156,40408900
2022-01-12,280.670013,285.950012,276.079987,279.98999,279.728394,38341300


In [24]:
daily_change = data['Open'] - data['Close']
daily_change

Date
2022-01-06   -5.380005
2022-01-07    8.940003
2022-01-10   -8.190002
2022-01-11   -4.940002
2022-01-12    0.680023
                ...   
2022-12-29   -2.009995
2022-12-30   -2.800003
2023-01-03    5.360001
2023-01-04   -1.820007
2023-01-05    2.260010
Length: 251, dtype: float64

In [27]:
type(daily_change)

pandas.core.series.Series

In [30]:
#Calculating the daily percentage change
daily_percentage_change = (data['Close'] - data['Open'])/data['Open']*100
daily_percentage_change

Date
2022-01-06    1.946456
2022-01-07   -3.176860
2022-01-10    3.081149
2022-01-11    1.808001
2022-01-12   -0.242286
                ...   
2022-12-29    1.395636
2022-12-30    1.953400
2023-01-03   -3.609185
2023-01-04    1.249404
2023-01-05   -1.559596
Length: 251, dtype: float64

In [35]:
# Normalising the data
# First record
data['Close'].iloc[0]

281.779999

In [36]:
# Last record
data['Close'].iloc[-1]

142.649994

In [37]:
norm = data['Close']/data['Close'].iloc[0]
norm

Date
2022-01-06    1.000000
2022-01-07    0.966960
2022-01-10    0.972390
2022-01-11    0.987189
2022-01-12    0.993647
                ...   
2022-12-29    0.518241
2022-12-30    0.518632
2023-01-03    0.508020
2023-01-04    0.523423
2023-01-05    0.506246
Name: Close, Length: 251, dtype: float64

In [39]:
data['Close'].iloc[0]*norm.iloc[-1]

142.64999399999996

In [40]:
data['Close'].iloc[-1]

142.649994

In [None]:
# One year of investvestment in NVDA stock from 2022 to 2023 has resulted in an increase of 50%
