In [1]:
# initial imports
import pandas as pd
from path import Path

from sklearn.cluster import KMeans
import plotly.express as px
import hvplot.pandas
import numpy as np
from sklearn.preprocessing import StandardScaler
data_scaler = StandardScaler()

In [2]:
# Import and prepare GameStop (GME) stock data
file_path1 = Path("Resources/GME.csv")
GME_df = pd.read_csv(file_path1)
print(GME_df.shape)
GME_df.head(5)

(461, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,6.14,6.47,6.07,6.31,6.31,4453600
1,2020-01-03,6.21,6.25,5.84,5.88,5.88,3543900
2,2020-01-06,5.8,5.91,5.6,5.85,5.85,3394800
3,2020-01-07,5.77,5.83,5.44,5.52,5.52,5228000
4,2020-01-08,5.49,5.85,5.41,5.72,5.72,5629400


In [3]:
# GME_stock_df = mstock_df[mstock_df[("Ticker")] == "GME"]
GME_copy = GME_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
GME_copy["ref_Close"] = GME_copy["Adj Close"] - 6.31

# Calculate the % change from the reference day
GME_copy["Perc_ref"] = (GME_copy["Adj Close"] - 6.31)/6.31

# Calculate % change in each day
GME_copy["Perc_Change"] = (GME_copy["Adj Close"] - GME_copy["Open"])/GME_copy["Adj Close"]

# Make a column of rolling average
GME_px = GME_copy['Perc_ref']
GME_copy['mavg'] = GME_px.rolling(window=30).mean()

print(GME_copy.shape)
GME_copy.tail()

(461, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
456,2021-10-22,178.100006,179.490005,167.800003,169.800003,169.800003,2800400,163.490003,25.909668,-0.048881,28.249393
457,2021-10-25,169.419998,174.800003,167.259995,173.970001,173.970001,1442800,167.660001,26.570523,0.026154,28.093925
458,2021-10-26,173.360001,185.0,172.5,177.839996,177.839996,2176700,171.529996,27.183835,0.025191,27.980877
459,2021-10-27,180.0,183.089996,172.330002,173.509995,173.509995,1107000,167.199995,26.497622,-0.037404,27.817063
460,2021-10-28,175.160004,183.139999,175.0,182.850006,182.850006,1696200,176.540006,27.977814,0.042056,27.692816


#### Import and prepare AMC stocks

In [4]:
# Import AMC stocks
file_path2 = Path("Resources/AMC.csv")
AMC_df = pd.read_csv(file_path2)
# Drop the "Close" price and keep "Adj Close" price
# AMC_clean_df = AMC_df.drop(columns = "Close")
print(AMC_df.shape)
AMC_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,7.3,7.56,7.23,7.46,7.414512,4545900
1,2020-01-03,7.45,7.6,7.31,7.32,7.275366,2218000
2,2020-01-06,7.25,7.3,7.11,7.12,7.076585,2903400
3,2020-01-07,7.11,7.2,7.0,7.13,7.086524,3205300
4,2020-01-08,7.11,7.15,6.61,6.62,6.579634,6557000


In [5]:
AMC_copy = AMC_df.copy()
# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
AMC_copy["ref_Close"] = AMC_copy["Adj Close"] - 7.414512

# Calculate the % change from the reference day
AMC_copy["Perc_ref"] = (AMC_copy["Adj Close"] - 7.414512)/7.414512

# Calculate % change in each day
AMC_copy["Perc_Change"] = (AMC_copy["Adj Close"] - AMC_copy["Open"])/AMC_copy["Adj Close"]

# Make a column of rolling average
AMC_px = AMC_copy['Perc_ref']
AMC_copy['mavg'] = AMC_px.rolling(window=30).mean()

print(AMC_copy.shape)
AMC_copy.tail()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
457,2021-10-25,36.23,37.849998,35.779999,36.830002,36.830002,30905400,29.41549,3.967286,0.016291,4.341192
458,2021-10-26,36.529999,37.400002,35.779999,36.049999,36.049999,27674800,28.635487,3.862087,-0.013315,4.290616
459,2021-10-27,36.330002,36.790001,34.580002,34.759998,34.759998,25904100,27.345486,3.688103,-0.045167,4.236308
460,2021-10-28,35.09,36.07,34.860001,35.23,35.23,23812200,27.815488,3.751493,0.003974,4.187709
461,2021-10-29,35.34,36.630001,34.529999,35.369999,35.369999,32841500,27.955487,3.770375,0.000848,4.148012


#### Import and prepare BBBY stocks

In [6]:
# Import Bed Batch and Beyond (BBBY) stocks
file_path3 = Path("Resources/BBBY.csv")
BBBY_df = pd.read_csv(file_path3)
# # Drop the "Close" price and keep "Adj Close" price
# BBBY_clean_df = BBBY_df.drop(columns = "Close")
print(BBBY_df.shape)
BBBY_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,17.43,17.440001,16.16,16.33,15.967111,6695000
1,2020-01-03,16.08,16.370001,15.86,16.08,15.722667,4116400
2,2020-01-06,16.48,16.93,16.200001,16.559999,16.191999,8120600
3,2020-01-07,16.6,16.93,16.110001,16.84,16.465778,7960500
4,2020-01-08,16.74,17.110001,16.540001,16.65,16.279999,13688800


In [7]:
# Shift the reference point to the start date
BBBY_copy = BBBY_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
BBBY_copy["ref_Close"] = BBBY_copy["Adj Close"] - 15.967111
# Calculate the % change from the reference day
BBBY_copy["Perc_ref"] = (BBBY_copy["Adj Close"] - 15.967111)/15.967111
# Calculate % change in each day
BBBY_copy["Perc_Change"] = (BBBY_copy["Adj Close"] - BBBY_copy["Open"])/BBBY_copy["Adj Close"]

# Make a column of rolling average
BBBY_px = BBBY_copy['Perc_ref']
BBBY_copy['mavg'] = BBBY_px.rolling(window=30).mean()

print(BBBY_copy.shape)
BBBY_copy.tail()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
457,2021-10-25,14.24,14.8,14.04,14.46,14.46,3390500,-1.507111,-0.094388,0.015214,0.14548
458,2021-10-26,14.45,14.66,14.01,14.13,14.13,4089200,-1.837111,-0.115056,-0.022647,0.12523
459,2021-10-27,14.11,14.13,13.38,13.67,13.67,8415400,-2.297111,-0.143865,-0.032187,0.103644
460,2021-10-28,13.85,14.41,13.55,14.39,14.39,7038200,-1.577111,-0.098772,0.037526,0.084709
461,2021-10-29,14.45,14.73,13.98,14.04,14.04,5368700,-1.927111,-0.120693,-0.029202,0.063624


#### Import and prepare VOO (Vanguard S&P 500 ETF)

In [8]:
# Import Vanguard S&P 500 ETF (VOO) index fund
file_path4 = Path("Resources/VOO.csv")
VOO_df = pd.read_csv(file_path4)
# # Drop the "Close" price and keep "Adj Close" price
# VOO_clean_df = VOO_df.drop(columns = "Close")
print(VOO_df.shape)
VOO_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,297.290009,298.450012,296.339996,298.420013,290.068878,3142400
1,2020-01-03,295.220001,297.359985,295.040009,296.23999,287.949829,3247900
2,2020-01-06,294.420013,297.420013,294.350006,297.350006,289.028778,2777100
3,2020-01-07,296.820007,297.269989,296.059998,296.529999,288.23172,2251800
4,2020-01-08,296.709991,299.309998,296.470001,298.059998,289.718933,3719500


In [9]:
# Shift the reference point to the start date
VOO_copy = VOO_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
VOO_copy["ref_Close"] = VOO_copy["Adj Close"] - 290.068878
# Calculate the % change from the reference day
VOO_copy["Perc_ref"] = (VOO_copy["Adj Close"] - 290.068878)/290.068878
# Calculate % change in each day
VOO_copy["Perc_Change"] = (VOO_copy["Adj Close"] - VOO_copy["Open"])/VOO_copy["Adj Close"]

# Make a column of rolling average
VOO_px = VOO_copy['Perc_ref']
VOO_copy['mavg'] = VOO_px.rolling(window=30).mean()

print(VOO_copy.shape)
VOO_copy.tail()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
457,2021-10-25,417.640015,419.140015,415.929993,418.75,418.75,2511500,128.681122,0.443623,0.002651,0.396748
458,2021-10-26,420.269989,421.529999,418.859985,419.149994,419.149994,3085300,129.081116,0.445002,-0.002672,0.398135
459,2021-10-27,419.660004,420.290009,417.279999,417.329987,417.329987,2873500,127.261109,0.438727,-0.005583,0.398923
460,2021-10-28,418.779999,421.440002,418.75,421.410004,421.410004,3643600,131.341126,0.452793,0.006241,0.400261
461,2021-10-29,419.149994,422.519989,418.869995,422.160004,422.160004,4637400,132.091126,0.455378,0.00713,0.402147


In [10]:
# Import and prepare Biontech (BNTX) stocks
file_path5 = Path("Resources/BNTX.csv")
BNTX_df = pd.read_csv(file_path5)
print(BNTX_df.shape)
BNTX_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,35.75,38.5,35.360001,38.5,38.5,139500
1,2020-01-03,38.0,41.25,36.419998,40.049999,40.049999,176000
2,2020-01-06,40.110001,45.0,40.099998,44.580002,44.580002,333300
3,2020-01-07,47.400002,48.849998,41.18,43.34,43.34,562100
4,2020-01-08,44.299999,46.330002,44.18,45.099998,45.099998,193900


In [11]:
# Shift the reference point to the start date
BNTX_copy = BNTX_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
BNTX_copy["ref_Close"] = BNTX_copy["Adj Close"] - 38.500000
# Calculate the % change from the reference day
BNTX_copy["Perc_ref"] = (BNTX_copy["Adj Close"] - 38.500000)/38.500000
# Calculate % change in each day
BNTX_copy["Perc_Change"] = (BNTX_copy["Adj Close"] - BNTX_copy["Open"])/BNTX_copy["Adj Close"]

# Make a column of rolling average
BNTX_px = BNTX_copy['Perc_ref']
BNTX_copy['mavg'] = BNTX_px.rolling(window=30).mean()

print(BNTX_copy.shape)
BNTX_copy.tail()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
457,2021-10-25,278.677002,295.51001,278.677002,294.920013,294.920013,2853000,256.420013,6.66026,0.055076,6.473368
458,2021-10-26,295.51001,304.350006,290.630005,292.390015,292.390015,2272400,253.890015,6.594546,-0.010671,6.43658
459,2021-10-27,294.540009,297.950012,277.839996,278.769989,278.769989,1914300,240.269989,6.240779,-0.05657,6.370364
460,2021-10-28,282.5,288.709991,277.100006,283.980011,283.980011,1468500,245.480011,6.376104,0.005212,6.293593
461,2021-10-29,283.75,284.850006,273.0,278.730011,278.730011,1462600,240.230011,6.239741,-0.01801,6.223939


In [12]:
# Import and prepare American Express (AXP) stocks
file_path6 = Path("Resources/AXP.csv")
AXP_df = pd.read_csv(file_path6)
print(AXP_df.shape)
AXP_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,124.660004,126.269997,124.230003,125.849998,122.641548,2708000
1,2020-01-03,124.32,125.099998,123.940002,124.599998,121.423416,2090600
2,2020-01-06,123.370003,124.160004,123.120003,124.059998,120.897179,2855200
3,2020-01-07,123.470001,124.400002,123.360001,123.410004,120.263756,2042300
4,2020-01-08,124.050003,126.360001,123.800003,125.540001,122.339447,3458200


In [13]:
# Shift the reference point to the start date, Jan 1, 2020
AXP_copy = AXP_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
AXP_copy["ref_Close"] = AXP_copy["Adj Close"] - 122.641548
# Calculate the % change from the reference day
AXP_copy["Perc_ref"] = (AXP_copy["Adj Close"] - 122.641548)/122.641548
# Calculate % change in each day
AXP_copy["Perc_Change"] = (AXP_copy["Adj Close"] - AXP_copy["Open"])/AXP_copy["Adj Close"]

# Make a column of rolling average
AXP_px = AXP_copy['Perc_ref']
AXP_copy['mavg'] = AXP_px.rolling(window=30).mean()

print(AXP_copy.shape)
AXP_copy.head()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
0,2020-01-02,124.660004,126.269997,124.230003,125.849998,122.641548,2708000,0.0,0.0,-0.016458,
1,2020-01-03,124.32,125.099998,123.940002,124.599998,121.423416,2090600,-1.218132,-0.009932,-0.023855,
2,2020-01-06,123.370003,124.160004,123.120003,124.059998,120.897179,2855200,-1.744369,-0.014223,-0.020454,
3,2020-01-07,123.470001,124.400002,123.360001,123.410004,120.263756,2042300,-2.377792,-0.019388,-0.02666,
4,2020-01-08,124.050003,126.360001,123.800003,125.540001,122.339447,3458200,-0.302101,-0.002463,-0.013982,


In [14]:
# Import and prepare Microsoft (MSFT) stocks
file_path7 = Path("Resources/MSFT.csv")
MSFT_df = pd.read_csv(file_path7)
print(MSFT_df.shape)
MSFT_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,158.779999,160.729996,158.330002,160.619995,157.615128,22622100
1,2020-01-03,158.320007,159.949997,158.059998,158.619995,155.652512,21116200
2,2020-01-06,157.080002,159.100006,156.509995,159.029999,156.054855,20813700
3,2020-01-07,159.320007,159.669998,157.320007,157.580002,154.631973,21634100
4,2020-01-08,158.929993,160.800003,157.949997,160.089996,157.095032,27746500


In [15]:
# Shift the reference point to the start date, Jan 1, 2020
MSFT_copy = MSFT_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
MSFT_copy["ref_Close"] = MSFT_copy["Adj Close"] - 157.615128
# Calculate the % change from the reference day
MSFT_copy["Perc_ref"] = (MSFT_copy["Adj Close"] - 157.615128)/157.615128
# Calculate % change in each day
MSFT_copy["Perc_Change"] = (MSFT_copy["Adj Close"] - MSFT_copy["Open"])/MSFT_copy["Adj Close"]

# Make a column of rolling average
MSFT_px = MSFT_copy['Perc_ref']
MSFT_copy['mavg'] = MSFT_px.rolling(window=30).mean()

print(MSFT_copy.shape)
MSFT_copy.tail()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
457,2021-10-25,309.359985,309.399994,306.459991,308.130005,307.567322,17554500,149.952194,0.951382,-0.005829,0.884071
458,2021-10-26,311.0,312.399994,308.600006,310.109985,309.543671,28107300,151.928543,0.963921,-0.004705,0.886249
459,2021-10-27,316.0,326.100006,316.0,323.170013,322.579865,52588700,164.964737,1.04663,0.020398,0.890123
460,2021-10-28,324.329987,324.869995,321.359985,324.350006,323.75769,26297900,166.142562,1.054103,-0.001768,0.894161
461,2021-10-29,324.130005,332.0,323.899994,331.619995,331.014404,34744900,173.399276,1.100144,0.020798,0.900864


In [16]:
# Import and prepare DISNEY (DIS) stocks
file_path8 = Path("Resources/DIS.csv")
DIS_df = pd.read_csv(file_path8)
print(DIS_df.shape)
DIS_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,145.289993,148.199997,145.100006,148.199997,148.199997,9502100
1,2020-01-03,146.399994,147.899994,146.050003,146.5,146.5,7320200
2,2020-01-06,145.539993,146.029999,144.309998,145.649994,145.649994,8262500
3,2020-01-07,145.990005,146.869995,145.419998,145.699997,145.699997,6906500
4,2020-01-08,145.490005,146.130005,144.820007,145.399994,145.399994,6984200


In [17]:
# Shift the reference point to the start date, Jan 1, 2020
DIS_copy = DIS_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
DIS_copy["ref_Close"] = DIS_copy["Adj Close"] - 148.199997
# Calculate the % change from the reference day
DIS_copy["Perc_ref"] = (DIS_copy["Adj Close"] - 148.199997)/148.199997
# Calculate % change in each day
DIS_copy["Perc_Change"] = (DIS_copy["Adj Close"] - DIS_copy["Open"])/DIS_copy["Adj Close"]

# Make a column of rolling average
DIS_px = DIS_copy['Perc_ref']
DIS_copy['mavg'] = DIS_px.rolling(window=30).mean()

print(DIS_copy.shape)
DIS_copy.head()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
0,2020-01-02,145.289993,148.199997,145.100006,148.199997,148.199997,9502100,0.0,0.0,0.019636,
1,2020-01-03,146.399994,147.899994,146.050003,146.5,146.5,7320200,-1.699997,-0.011471,0.000683,
2,2020-01-06,145.539993,146.029999,144.309998,145.649994,145.649994,8262500,-2.550003,-0.017206,0.000755,
3,2020-01-07,145.990005,146.869995,145.419998,145.699997,145.699997,6906500,-2.5,-0.016869,-0.00199,
4,2020-01-08,145.490005,146.130005,144.820007,145.399994,145.399994,6984200,-2.800003,-0.018893,-0.000619,


In [18]:
# Import and prepare Marathon Digital Holdings, Inc. (MARA) stocks
file_path9 = Path("Resources/MARA.csv")
MARA_df = pd.read_csv(file_path9)
print(MARA_df.shape)
MARA_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,0.88,1.08,0.88,1.05,1.05,447600
1,2020-01-03,0.99,1.0,0.93,0.96,0.96,128000
2,2020-01-06,0.96,1.06,0.96,0.97,0.97,192100
3,2020-01-07,0.98,1.12,0.98,1.09,1.09,1039100
4,2020-01-08,1.13,1.13,0.92,0.95,0.95,487700


In [19]:
# Shift the reference point to the start date, Jan 1, 2020
MARA_copy = MARA_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
MARA_copy["ref_Close"] = MARA_copy["Adj Close"] - 1.05
# Calculate the % change from the reference day
MARA_copy["Perc_ref"] = (MARA_copy["Adj Close"] - 1.05)/1.05
# Calculate % change in each day
MARA_copy["Perc_Change"] = (MARA_copy["Adj Close"] - MARA_copy["Open"])/MARA_copy["Adj Close"]

# Make a column of rolling average
MARA_px = MARA_copy['Perc_ref']
MARA_copy['mavg'] = MARA_px.rolling(window=30).mean()

print(MARA_copy.shape)
MARA_copy.tail()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
457,2021-10-25,51.099998,55.290001,51.009998,54.689999,54.689999,14915800,53.639999,51.085713,0.065643,37.259365
458,2021-10-26,53.880001,54.959999,52.27,52.860001,52.860001,10082200,51.810001,49.342858,-0.019296,37.836508
459,2021-10-27,50.82,51.299999,48.900002,50.040001,50.040001,8853300,48.990001,46.657144,-0.015588,38.248889
460,2021-10-28,52.189999,53.68,49.849998,50.389999,50.389999,11053600,49.339999,46.990475,-0.035721,38.694921
461,2021-10-29,50.830002,52.75,49.799999,52.240002,52.240002,9944600,51.190002,48.752383,0.026991,39.215556


In [20]:
# Import and prepare Electronic Arts, INC (EA) stocks
file_path10 = Path("Resources/EA.csv")
EA_df = pd.read_csv(file_path10)
print(EA_df.shape)
EA_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,107.940002,108.349998,106.760002,107.339996,106.810196,1901000
1,2020-01-03,105.580002,107.75,105.120003,107.199997,106.670868,1840300
2,2020-01-06,106.949997,109.309998,106.629997,108.779999,108.243073,2934200
3,2020-01-07,109.0,109.25,107.730003,108.389999,107.855011,1692400
4,2020-01-08,108.18,110.0,107.779999,109.489998,108.949577,2651600


In [21]:
# Shift the reference point to the start date, Jan 1, 2020
EA_copy = EA_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
EA_copy["ref_Close"] = EA_copy["Adj Close"] - 106.810196
# Calculate the % change from the reference day
EA_copy["Perc_ref"] = (EA_copy["Adj Close"] - 106.810196)/106.810196
# Calculate % change in each day
EA_copy["Perc_Change"] = (EA_copy["Adj Close"] - EA_copy["Open"])/EA_copy["Adj Close"]

# Make a column of rolling average
EA_px = EA_copy['Perc_ref']
EA_copy['mavg'] = EA_px.rolling(window=30).mean()

print(EA_copy.shape)
EA_copy.tail()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
457,2021-10-25,141.110001,143.789993,140.5,142.190002,142.190002,2155600,35.379806,0.33124,0.007595,0.279313
458,2021-10-26,142.919998,144.110001,141.720001,142.399994,142.399994,1543000,35.589798,0.333206,-0.003652,0.278386
459,2021-10-27,142.679993,142.759995,140.309998,140.369995,140.369995,1697000,33.559799,0.3142,-0.016456,0.279425
460,2021-10-28,140.25,141.190002,139.100006,140.229996,140.229996,1372600,33.4198,0.31289,-0.000143,0.280758
461,2021-10-29,140.419998,140.839996,138.910004,140.25,140.25,1990500,33.439804,0.313077,-0.001212,0.282746


In [22]:
# Import and prepare Sony Group Corporation (SONY) stocks
file_path11 = Path("Resources/SONY.csv")
SONY_df = pd.read_csv(file_path11)
print(SONY_df.shape)
SONY_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,68.300003,69.059998,68.279999,69.059998,69.059998,856500
1,2020-01-03,68.5,68.650002,67.959999,68.080002,68.080002,518600
2,2020-01-06,68.25,69.199997,68.220001,69.110001,69.110001,755500
3,2020-01-07,70.510002,70.580002,69.809998,70.199997,70.199997,1316200
4,2020-01-08,70.800003,71.120003,70.370003,70.459999,70.459999,1640200


In [23]:
# Shift the reference point to the start date, Jan 1, 2020
SONY_copy = SONY_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
SONY_copy["ref_Close"] = SONY_copy["Adj Close"] - 69.059998
# Calculate the % change from the reference day
SONY_copy["Perc_ref"] = (SONY_copy["Adj Close"] - 69.059998)/69.059998
# Calculate % change in each day
SONY_copy["Perc_Change"] = (SONY_copy["Adj Close"] - SONY_copy["Open"])/SONY_copy["Adj Close"]

# Make a column of rolling average
SONY_px = SONY_copy['Perc_ref']
SONY_copy['mavg'] = SONY_px.rolling(window=30).mean()

print(SONY_copy.shape)
SONY_copy.head()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
0,2020-01-02,68.300003,69.059998,68.279999,69.059998,69.059998,856500,0.0,0.0,0.011005,
1,2020-01-03,68.5,68.650002,67.959999,68.080002,68.080002,518600,-0.979996,-0.014191,-0.006169,
2,2020-01-06,68.25,69.199997,68.220001,69.110001,69.110001,755500,0.050003,0.000724,0.012444,
3,2020-01-07,70.510002,70.580002,69.809998,70.199997,70.199997,1316200,1.139999,0.016507,-0.004416,
4,2020-01-08,70.800003,71.120003,70.370003,70.459999,70.459999,1640200,1.400001,0.020272,-0.004825,


In [24]:
# Import and prepare NASDAQ (^IXIC) fund index
file_path12 = Path("Resources/IXIC.csv")
IXIC_df = pd.read_csv(file_path12)
print(IXIC_df.shape)
IXIC_df.head(5)

(462, 7)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-02,9039.459961,9093.429688,9010.889648,9092.19043,9092.19043,2848370000
1,2020-01-03,8976.429688,9065.759766,8976.429688,9020.769531,9020.769531,2567400000
2,2020-01-06,8943.5,9072.410156,8943.5,9071.469727,9071.469727,2788120000
3,2020-01-07,9076.639648,9091.929688,9042.549805,9068.580078,9068.580078,2352850000
4,2020-01-08,9068.030273,9168.889648,9059.379883,9129.240234,9129.240234,2464090000


In [25]:
# Shift the reference point to the start date, Jan 1, 2020
IXIC_copy = IXIC_df.copy()

# Subtract the "Adj Close" price on Jan 1, 2020 price from future stocks.
# This is reference "zero"
IXIC_copy["ref_Close"] = IXIC_copy["Adj Close"] - 9092.190430
# Calculate the % change from the reference day
IXIC_copy["Perc_ref"] = (IXIC_copy["Adj Close"] - 9092.190430)/9092.190430
# Calculate % change in each day
IXIC_copy["Perc_Change"] = (IXIC_copy["Adj Close"] - IXIC_copy["Open"])/IXIC_copy["Adj Close"]

# Make a column of rolling average
IXIC_px = IXIC_copy['Perc_ref']
IXIC_copy['mavg'] = IXIC_px.rolling(window=30).mean()

print(IXIC_copy.shape)
IXIC_copy.head()

(462, 11)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg
0,2020-01-02,9039.459961,9093.429688,9010.889648,9092.19043,9092.19043,2848370000,0.0,0.0,0.0058,
1,2020-01-03,8976.429688,9065.759766,8976.429688,9020.769531,9020.769531,2567400000,-71.420899,-0.007855,0.004915,
2,2020-01-06,8943.5,9072.410156,8943.5,9071.469727,9071.469727,2788120000,-20.720703,-0.002279,0.014107,
3,2020-01-07,9076.639648,9091.929688,9042.549805,9068.580078,9068.580078,2352850000,-23.610352,-0.002597,-0.000889,
4,2020-01-08,9068.030273,9168.889648,9059.379883,9129.240234,9129.240234,2464090000,37.049804,0.004075,0.006705,


### Combine all the stocks

In [26]:
# Isert a "Ticker" column
GME_copy["Ticker"] = "GME"
AMC_copy["Ticker"] = "AMC"
BBBY_copy["Ticker"] = "BBBY"
VOO_copy["Ticker"] = "VOO"
BNTX_copy["Ticker"] = "BNTX"
AXP_copy["Ticker"] = "AXP"
MSFT_copy["Ticker"] = "MSFT"
DIS_copy["Ticker"] = "DIS"
MARA_copy["Ticker"] = "MARA"
EA_copy["Ticker"] = "EA"
SONY_copy["Ticker"] = "SONY"
IXIC_copy["Ticker"] = "NASDAQ"


# Combine all the stocks in a single dataframe
stocks = [GME_copy, AMC_copy, BBBY_copy, VOO_copy, BNTX_copy, AXP_copy,
         MSFT_copy, DIS_copy, MARA_copy, EA_copy, SONY_copy, IXIC_copy]
combined_all_df = pd.concat(stocks)
print(combined_all_df.shape)
combined_all_df.sample(5)

(5543, 12)


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg,Ticker
179,2020-09-17,104.269997,105.349998,103.32,104.660004,103.023659,3215300,-19.617889,-0.159961,-0.012098,-0.185576,AXP
444,2021-10-06,36.360001,37.650002,35.639999,36.830002,36.830002,35450500,29.41549,3.967286,0.012761,4.735734,AMC
229,2020-11-27,334.290009,334.609985,333.179993,334.140015,329.408539,984700,39.339661,0.135622,-0.014819,0.087917,VOO
390,2021-07-21,278.899994,281.519989,277.290009,281.399994,280.349396,24364300,122.734268,0.778696,0.00517,0.707342,MSFT
58,2020-03-26,3.46,3.95,3.44,3.7,3.7,5094700,-3.714512,-0.500979,0.064865,-0.334903,AMC


In [27]:
# Drop the "Close" column since it is almos a duplicate of the Adj Close
combined_all = combined_all_df.drop(columns="Close")
combined_all['Net_Close'] = combined_all['Adj Close'] - combined_all['Open']
combined_all['Net_High'] = combined_all['High'] - combined_all['Open']
combined_all.head()

Unnamed: 0,Date,Open,High,Low,Adj Close,Volume,ref_Close,Perc_ref,Perc_Change,mavg,Ticker,Net_Close,Net_High
0,2020-01-02,6.14,6.47,6.07,6.31,4453600,0.0,0.0,0.026941,,GME,0.17,0.33
1,2020-01-03,6.21,6.25,5.84,5.88,3543900,-0.43,-0.068146,-0.056122,,GME,-0.33,0.04
2,2020-01-06,5.8,5.91,5.6,5.85,3394800,-0.46,-0.0729,0.008547,,GME,0.05,0.11
3,2020-01-07,5.77,5.83,5.44,5.52,5228000,-0.79,-0.125198,-0.04529,,GME,-0.25,0.06
4,2020-01-08,5.49,5.85,5.41,5.72,5629400,-0.59,-0.093502,0.04021,,GME,0.23,0.36


In [28]:
# Check the datatypes
combined_all.dtypes

Date            object
Open           float64
High           float64
Low            float64
Adj Close      float64
Volume           int64
ref_Close      float64
Perc_ref       float64
Perc_Change    float64
mavg           float64
Ticker          object
Net_Close      float64
Net_High       float64
dtype: object

In [29]:
combined_all.Date = pd.to_datetime(combined_all.Date)

In [30]:
# Make a line plot to see the data 
combined_all.hvplot(x='Date', y='ref_Close', by='Ticker', 
                     label='Change in the stock closing price compared to the opening price on Jan 01, 2020', 
                    ylabel='Change in closing price', width=800, height=400)

#### Make a line plot to see the how the stocks changed from the reference date, Jan 1, 2020

In [31]:
# Make a line plot to see the how the stocks changed from the reference date, Jan 1, 2020
combined_all.hvplot(x='Date', y='Perc_ref', by='Ticker', 
                     label='%Change in the stock closing price compared to the opening price on Jan 01, 2020', 
                    ylabel='% Change in closing price', width = 800, height=450)

#### Plot the stocks without the NASDAQ index

In [32]:
# Remove NASDAQ from the dataframe
stocks_11 = combined_all.loc[(combined_all["Ticker"] != "NASDAQ")]

# Plot the % change from the reference date without NASDAQ
stocks_11.hvplot(x="Date", y="Perc_ref", by="Ticker", width=800, height=450,
                ylabel="% change of the stock price", 
                label="% change of the stock price from the reference date, Jan 01, 2020")

In [33]:
# Make a line plot to see the data 
stocks_11.hvplot(x='Date', y='ref_Close', by='Ticker', 
                     label='Change in the stock closing price compared to the opening price on Jan 01, 2020', 
                    ylabel='Change in closing price', width=800, height=400)

In [34]:
# Plot the heatmap to see how different stocks changed over one year.
stocks_11.hvplot.heatmap(x="Date", y="Ticker", C="ref_Close", hover_cols="High",
                            colorbar=True, width=800, rot=45, 
                            label = "Heatmap showing 11 stocks intraday High price variation from 01/01/2020 until 10/31/2021")

#### Plot the percent change each day

In [35]:
# Plot the heatmap to see how different stocks changed over one year.
stocks_11.hvplot.heatmap(x="Date", y="Ticker", C="Perc_ref", hover_cols="High",
                            colorbar=True, width=800, rot=45, 
                            label = "Heatmap showing % change from the referance price variation from 01/01/2020 until 10/31/2021")

In [36]:
# Plot the heatmap to see how different stocks changed over Jan 1 until March 30, 2021
# From Stackoverflow: Create a mask that is greater than the start date and smaller than 
# the end date.
mask = (stocks_11["Date"] > '2021-01-01') & (stocks_11["Date"] <= '2021-03-20')
x_mask = stocks_11.loc[mask]

x_mask.hvplot.heatmap(x='Date', y='Ticker', C="Perc_ref", hover_cols='High', width = 800,
                     label="Heatmap showing the change in price from 01-01-2020 between Jan 01 - March 30, 2021")

### Plot line graphs using the moving average

In [39]:
# Create a date range mask. Note: The first 30 days are NaN
mask_avg = (combined_all["Date"] > '2020-02-13') & (combined_all["Date"] <= '2021-10-29')
x_mask_avg = combined_all.loc[mask_avg]

combined_all.hvplot(x='Date', y='mavg', by='Ticker', ylabel='30 days Moving avg of % change in ref. price',
                   label="30 days Moving Avg of % change of stock price from Jan 1, 2020",
                   width=800, height=400)

In [38]:
### Write a CSV file of the combined stocks for Machine Learning
combined_all.to_csv("Resources/stocks_12.csv")