---
Import Libraries

---

In [15]:
import numpy as np
import pandas as pd
from datetime import datetime
import yfinance as yf
import ta
import getpass
from fredapi import Fred

---
Function: cleaning columns

---

In [44]:
def cleanColumnNames(df):
  #remove trailing and leading spaces
  df=df.rename(columns=lambda x :  x.strip())

  #remove extra spaces 
  df.columns = df.columns.str.replace('  ',' ')

  #replace space with underscore
  df.columns=df.columns.str.replace(' ','_')

  #lowercase the column names
  df = df.rename(columns=lambda x: x.lower())
  
  return df

---
Defining Start and End Date

---

In [16]:
# Define dataset start and end date => Two years worth of data
start_date = datetime(2018, 1, 1)
end_date = datetime(2023, 12, 31)

---
Downloading NVDA stock data from Yahoo Finance

---

1. It downloads historical stock data for NVIDIA (NVDA) from Yahoo Finance using the yfinance library.

In [17]:
# Downloaded data
nvda_stock_df = yf.download("NVDA", start_date, end_date)
nvda_stock_df.describe()

nvda_stock_df.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,48.945,49.875,48.625,49.837502,49.312786,35561600
2018-01-03,51.025002,53.424999,50.9375,53.1175,52.55825,91470400
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600


---

Calculating Daily Returns for NVIDIA Stock

---

In [18]:
nvda_stock_df["Returns"] = nvda_stock_df["Adj Close"] - nvda_stock_df["Adj Close"].shift(1)
nvda_stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-02,48.945,49.875,48.625,49.837502,49.312786,35561600,
2018-01-03,51.025002,53.424999,50.9375,53.1175,52.55825,91470400,3.245464
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629


---
Calculating logarithmic daily returns for better interpretability

---

In [19]:
nvda_stock_df["Daily_Return"] = np.log(nvda_stock_df["Adj Close"]) - np.log(nvda_stock_df["Adj Close"].shift(1))
nvda_stock_df = nvda_stock_df.dropna()

nvda_stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-03,51.025002,53.424999,50.9375,53.1175,52.55825,91470400,3.245464,0.063739
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061,0.005258
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735,0.008438
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629,0.030181
2018-01-09,55.555,55.955002,54.66,55.485001,54.900826,49700000,-0.014851,-0.00027


---
Feature-set 1: Typical Price and Return

---

In [20]:
nvda_stock_df["Typical_Price"] = nvda_stock_df[["High", "Low", "Close"]].mean(axis=1)
nvda_stock_df["Typical_Price_Return"] = (
    np.log(nvda_stock_df.Typical_Price) - np.log(nvda_stock_df.Typical_Price.shift(+1))
) * 100.0
nvda_stock_df = nvda_stock_df.dropna()

nvda_stock_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nvda_stock_df["Typical_Price"] = nvda_stock_df[["High", "Low", "Close"]].mean(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nvda_stock_df["Typical_Price_Return"] = (


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061,0.005258,53.694167,2.26182
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735,0.008438,53.615833,-0.145995
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629,0.030181,55.465,3.390777
2018-01-09,55.555,55.955002,54.66,55.485001,54.900826,49700000,-0.014851,-0.00027,55.366667,-0.177445
2018-01-10,54.549999,55.955002,54.0,55.919998,55.331249,58266400,0.430424,0.007809,55.291667,-0.135554


---
Feature-set 2: Common Transforms

---

In [21]:
nvda_stock_df["Volume_Log"] = np.log(nvda_stock_df.Volume)
nvda_stock_df["Volume_Differencing"] = nvda_stock_df.Volume.diff()
nvda_stock_df["Volume_Differencing_10"] = nvda_stock_df.Volume.diff(10)
nvda_stock_df["Volumne_Percent_Change"] = nvda_stock_df.Volume.pct_change()
nvda_stock_df["MA_5"] = np.log(nvda_stock_df.Volume.rolling(5).mean())
nvda_stock_df["Volumne_MA_200"] = (
    nvda_stock_df.Volume / nvda_stock_df.Volume.rolling(200).mean() - 1
)
nvda_stock_df["Close_EMA_50"] = nvda_stock_df.Close / nvda_stock_df.Close.ewm(span=50).mean() - 1

nvda_stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,Volume_Log,Volume_Differencing,Volume_Differencing_10,Volumne_Percent_Change,MA_5,Volumne_MA_200,Close_EMA_50
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061,0.005258,53.694167,2.26182,17.881572,,,,,,0.0
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735,0.008438,53.615833,-0.145995,17.876167,-314400.0,,-0.00539,,,0.004134
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629,0.030181,55.465,3.390777,18.294228,30109200.0,,0.519013,,,0.022527
2018-01-09,55.555,55.955002,54.66,55.485001,54.900826,49700000,-0.014851,-0.00027,55.366667,-0.177445,17.721515,-38421600.0,,-0.436007,,,0.016254
2018-01-10,54.549999,55.955002,54.0,55.919998,55.331249,58266400,0.430424,0.007809,55.291667,-0.135554,17.880536,8566400.0,,0.172362,17.950444,,0.018883


---
Feature-set 3: Momentum Indicators

---

In [22]:
nvda_stock_df['Momentum_AwesomeOscillatorIndicator'] = ta.momentum.AwesomeOscillatorIndicator(nvda_stock_df.High, nvda_stock_df.Low, window1=5, window2=34, fillna=False).awesome_oscillator()
nvda_stock_df['Momentum_KAMA'] = ta.momentum.KAMAIndicator(nvda_stock_df.Close, fillna=False).kama()
nvda_stock_df['Momentum_PercentagePVolumneOscillator'] = ta.momentum.PercentageVolumeOscillator(nvda_stock_df.Volume, fillna=False).pvo()
nvda_stock_df['Momentum_ROC'] = ta.momentum.ROCIndicator(nvda_stock_df.Close, fillna=False).roc()
nvda_stock_df['Momentum_RSI'] = ta.momentum.RSIIndicator(nvda_stock_df.Close, fillna=False).rsi()
nvda_stock_df['Momentum_StochRSIIndicator'] = ta.momentum.StochRSIIndicator(nvda_stock_df.Close, fillna=False).stochrsi()
nvda_stock_df['Momentum_TSIIndicator'] = ta.momentum.TSIIndicator(nvda_stock_df.Close, fillna=False).tsi()

nvda_stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,...,MA_5,Volumne_MA_200,Close_EMA_50,Momentum_AwesomeOscillatorIndicator,Momentum_KAMA,Momentum_PercentagePVolumneOscillator,Momentum_ROC,Momentum_RSI,Momentum_StochRSIIndicator,Momentum_TSIIndicator
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061,0.005258,53.694167,2.26182,...,,,0.0,,,,,,,
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735,0.008438,53.615833,-0.145995,...,,,0.004134,,,,,,,
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629,0.030181,55.465,3.390777,...,,,0.022527,,,,,,,
2018-01-09,55.555,55.955002,54.66,55.485001,54.900826,49700000,-0.014851,-0.00027,55.366667,-0.177445,...,,,0.016254,,,,,,,
2018-01-10,54.549999,55.955002,54.0,55.919998,55.331249,58266400,0.430424,0.007809,55.291667,-0.135554,...,17.950444,,0.018883,,,,,,,


---
Feature-set 4: Trend Indicators

---

In [23]:
nvda_stock_df['Trend_ADX'] = ta.trend.ADXIndicator(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Close, window=20, fillna=False).adx()
nvda_stock_df['Trend_AroonIndicator'] = ta.trend.AroonIndicator(nvda_stock_df.Close, nvda_stock_df.Low, window=20, fillna=False).aroon_indicator()
nvda_stock_df['Trend_CCI'] = ta.trend.CCIIndicator(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Close, window=20, fillna=False).cci()
nvda_stock_df['Trend_DPO'] = ta.trend.DPOIndicator(nvda_stock_df.Close, window=20, fillna=False).dpo()
nvda_stock_df['Trend_EMA'] = ta.trend.EMAIndicator(nvda_stock_df.Close, window=20, fillna=False).ema_indicator()
nvda_stock_df['Trend_MACD'] = ta.trend.MACD(nvda_stock_df.Close, fillna=False).macd()
nvda_stock_df['Trend_MI'] = ta.trend.MassIndex(nvda_stock_df.High, nvda_stock_df.Low, fillna=False).mass_index()

nvda_stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,...,Momentum_RSI,Momentum_StochRSIIndicator,Momentum_TSIIndicator,Trend_ADX,Trend_AroonIndicator,Trend_CCI,Trend_DPO,Trend_EMA,Trend_MACD,Trend_MI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061,0.005258,53.694167,2.26182,...,,,,0.0,,,,,,
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735,0.008438,53.615833,-0.145995,...,,,,0.0,,,,,,
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629,0.030181,55.465,3.390777,...,,,,0.0,,,,,,
2018-01-09,55.555,55.955002,54.66,55.485001,54.900826,49700000,-0.014851,-0.00027,55.366667,-0.177445,...,,,,0.0,,,,,,
2018-01-10,54.549999,55.955002,54.0,55.919998,55.331249,58266400,0.430424,0.007809,55.291667,-0.135554,...,,,,0.0,,,,,,


---
Feature-set 5: Volume Indicator

---

In [25]:
nvda_stock_df['Volume_CMF'] = ta.volume.ChaikinMoneyFlowIndicator(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Close, nvda_stock_df.Volume, window=20, fillna=False).chaikin_money_flow()
nvda_stock_df['Volume_EOM'] = ta.volume.EaseOfMovementIndicator(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Volume, window=20, fillna=False).ease_of_movement()
nvda_stock_df['Volume_FI'] = ta.volume.ForceIndexIndicator(nvda_stock_df.Close, nvda_stock_df.Volume, window=20, fillna=False).force_index()
nvda_stock_df['Volume_MFI'] = ta.volume.money_flow_index(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Close, nvda_stock_df.Volume, window=20, fillna=False)
nvda_stock_df['Volume_VWAP'] = ta.volume.VolumeWeightedAveragePrice(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Close, nvda_stock_df.Volume, window=20, fillna=False).volume_weighted_average_price()

nvda_stock_df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,...,Volumne_CMF,Volumne_EOM,Volumne_FI,Volumne_MFI,Volumne_VWAP,Volume_CMF,Volume_EOM,Volume_FI,Volume_MFI,Volume_VWAP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061,0.005258,53.694167,2.26182,...,,,,,,,,,,
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735,0.008438,53.615833,-0.145995,...,,-0.863636,,,,,-0.863636,,,
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629,0.030181,55.465,3.390777,...,,3.549348,,,,,3.549348,,,
2018-01-09,55.555,55.955002,54.66,55.485001,54.900826,49700000,-0.014851,-0.00027,55.366667,-0.177445,...,,-0.364788,,,,,-0.364788,,,
2018-01-10,54.549999,55.955002,54.0,55.919998,55.331249,58266400,0.430424,0.007809,55.291667,-0.135554,...,,-1.107243,,,,,-1.107243,,,


---
Feature-set 6: Volatility Indicators

---

In [27]:
# Feature-set 6: Volatility Indicators
nvda_stock_df['Volatility_ATR'] = ta.volatility.AverageTrueRange(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Close, window=20, fillna=False).average_true_range()
nvda_stock_df['Volatility_BB'] = ta.volatility.BollingerBands(nvda_stock_df.Close, window=20, fillna=False).bollinger_wband()
nvda_stock_df['Volatility_DonchainChannel'] = ta.volatility.DonchianChannel(nvda_stock_df.High, nvda_stock_df.Low, nvda_stock_df.Close, window=20, fillna=False).donchian_channel_wband()
nvda_stock_df['Volatility_UlcerIndex'] = ta.volatility.UlcerIndex(nvda_stock_df.Close, window=20, fillna=False).ulcer_index()

nvda_stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,...,Volumne_VWAP,Volume_CMF,Volume_EOM,Volume_FI,Volume_MFI,Volume_VWAP,Volatility_ATR,Volatility_BB,Volatility_DonchainChannel,Volatility_UlcerIndex
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-04,53.939999,54.512501,53.172501,53.397499,52.835312,58326800,0.277061,0.005258,53.694167,2.26182,...,,,,,,,0.0,,,
2018-01-05,53.547501,54.227501,52.77,53.849998,53.283047,58012400,0.447735,0.008438,53.615833,-0.145995,...,,,-0.863636,,,,0.0,,,
2018-01-08,55.099998,56.25,54.645,55.5,54.915676,88121600,1.632629,0.030181,55.465,3.390777,...,,,3.549348,,,,0.0,,,
2018-01-09,55.555,55.955002,54.66,55.485001,54.900826,49700000,-0.014851,-0.00027,55.366667,-0.177445,...,,,-0.364788,,,,0.0,,,
2018-01-10,54.549999,55.955002,54.0,55.919998,55.331249,58266400,0.430424,0.007809,55.291667,-0.135554,...,,,-1.107243,,,,0.0,,,


---

Feature-set 7: Fama-French Indicators

---

In [30]:
df_fama = pd.read_csv("./F-F_Research_Data_Factors_daily.CSV", skiprows=3)
df_fama = df_fama.iloc[:-1]
df_fama.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
df_fama["Date"] = pd.to_datetime(df_fama["Date"])
df_fama = df_fama[(df_fama["Date"] >= start_date) & (df_fama["Date"] <= end_date)]
fama = df_fama.set_index("Date")

nvda_stock_df = pd.concat([nvda_stock_df, fama], axis=1)
nvda_stock_df.dropna(inplace=True)

nvda_stock_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,...,Volume_MFI,Volume_VWAP,Volatility_ATR,Volatility_BB,Volatility_DonchainChannel,Volatility_UlcerIndex,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-10-18,61.465,61.852501,59.272499,59.8825,59.357975,52402000.0,-0.874767,-0.01463,60.335833,-1.370346,...,45.203574,65.418863,2.260525,26.135972,22.298079,10.741495,-1.54,-0.54,0.42,0.008
2018-10-19,60.439999,60.637501,56.924999,57.2925,56.790646,61360800.0,-2.567329,-0.044215,58.285,-3.458141,...,44.660537,64.892034,2.333124,28.543358,24.96159,11.595971,-0.25,-1.33,0.71,0.008
2018-10-22,57.82,58.830002,56.767502,57.805,57.298668,36884400.0,0.508022,0.008906,57.800835,-0.834155,...,44.509744,64.570526,2.319593,30.310946,25.371115,12.354732,-0.38,0.48,-1.25,0.008
2018-10-23,55.107498,56.047501,54.177502,55.264999,54.780907,62643600.0,-2.517761,-0.044936,55.163334,-4.670472,...,40.195354,63.871194,2.384988,32.945426,29.643461,13.380735,-0.62,-0.1,-0.41,0.008
2018-10-24,54.877499,55.3475,49.712502,49.852501,49.415833,88428800.0,-5.365074,-0.103071,51.637501,-6.605032,...,35.541378,62.703614,2.547489,38.472874,37.093141,15.020365,-3.33,-0.93,0.77,0.008


---
Feature-set 8: Exracting external factors using Fred API

---

In [35]:
import ssl
print(ssl.OPENSSL_VERSION)

OpenSSL 1.1.1t  7 Feb 2023


In [39]:
# key = getpass.getpass("Enter your Fred API key: ")
fred = Fred(api_key="9f6e8f8ae3826fbee6baf503ab4052b0")

feat_list = ["SP500", "DEXJPUS", "DEXUSEU", "CBBTCUSD"]
feat_df = pd.DataFrame()
for feat in feat_list:
    feature = fred.get_series(feat, start_date, end_date)
    feature = feature.to_frame(feat)
    feature.dropna(inplace=True)
    feat_df = pd.concat([feat_df, feature], axis=1)
feat_df.dropna(inplace=True)

nvda_stock_df = pd.concat([nvda_stock_df, feat_df], axis=1)
nvda_stock_df.dropna(inplace=True)

nvda_stock_df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,...,Volatility_DonchainChannel,Volatility_UlcerIndex,Mkt-RF,SMB,HML,RF,SP500,DEXJPUS,DEXUSEU,CBBTCUSD
2018-10-18,61.465,61.852501,59.272499,59.8825,59.357975,52402000.0,-0.874767,-0.01463,60.335833,-1.370346,...,22.298079,10.741495,-1.54,-0.54,0.42,0.008,2768.78,112.11,1.1494,6394.96
2018-10-19,60.439999,60.637501,56.924999,57.2925,56.790646,61360800.0,-2.567329,-0.044215,58.285,-3.458141,...,24.96159,11.595971,-0.25,-1.33,0.71,0.008,2767.78,112.52,1.1513,6382.99
2018-10-22,57.82,58.830002,56.767502,57.805,57.298668,36884400.0,0.508022,0.008906,57.800835,-0.834155,...,25.371115,12.354732,-0.38,0.48,-1.25,0.008,2755.88,112.78,1.1467,6407.65
2018-10-23,55.107498,56.047501,54.177502,55.264999,54.780907,62643600.0,-2.517761,-0.044936,55.163334,-4.670472,...,29.643461,13.380735,-0.62,-0.1,-0.41,0.008,2740.69,112.12,1.148,6395.14
2018-10-24,54.877499,55.3475,49.712502,49.852501,49.415833,88428800.0,-5.365074,-0.103071,51.637501,-6.605032,...,37.093141,15.020365,-3.33,-0.93,0.77,0.008,2656.1,112.58,1.1389,6415.98


---
Feature-set 9: ADS features

---

In [43]:
# Feature-set 9: ADS features
ads = pd.read_excel("ads_index_most_current_vintage.xlsx")
ads.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
ads["Date"] = pd.to_datetime(ads["Date"], format="%Y:%m:%d")
ads = ads[(ads["Date"] >= start_date) & (ads["Date"] <= end_date)]
ads = ads.set_index("Date")

nvda_stock_df = pd.concat([nvda_stock_df, ads], axis=1)
nvda_stock_df.dropna(inplace=True)

nvda_stock_df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Returns,Daily_Return,Typical_Price,Typical_Price_Return,...,Mkt-RF,SMB,HML,RF,SP500,DEXJPUS,DEXUSEU,CBBTCUSD,ADS_Index,ADS_Index.1
2018-10-18,61.465,61.852501,59.272499,59.8825,59.357975,52402000.0,-0.874767,-0.01463,60.335833,-1.370346,...,-1.54,-0.54,0.42,0.008,2768.78,112.11,1.1494,6394.96,-0.47994,-0.47994
2018-10-19,60.439999,60.637501,56.924999,57.2925,56.790646,61360800.0,-2.567329,-0.044215,58.285,-3.458141,...,-0.25,-1.33,0.71,0.008,2767.78,112.52,1.1513,6382.99,-0.479066,-0.479066
2018-10-22,57.82,58.830002,56.767502,57.805,57.298668,36884400.0,0.508022,0.008906,57.800835,-0.834155,...,-0.38,0.48,-1.25,0.008,2755.88,112.78,1.1467,6407.65,-0.472025,-0.472025
2018-10-23,55.107498,56.047501,54.177502,55.264999,54.780907,62643600.0,-2.517761,-0.044936,55.163334,-4.670472,...,-0.62,-0.1,-0.41,0.008,2740.69,112.12,1.148,6395.14,-0.468786,-0.468786
2018-10-24,54.877499,55.3475,49.712502,49.852501,49.415833,88428800.0,-5.365074,-0.103071,51.637501,-6.605032,...,-3.33,-0.93,0.77,0.008,2656.1,112.58,1.1389,6415.98,-0.465173,-0.465173


In [46]:
final_df = cleanColumnNames(nvda_stock_df)
final_df.head()

Unnamed: 0,open,high,low,close,adj_close,volume,returns,daily_return,typical_price,typical_price_return,...,mkt-rf,smb,hml,rf,sp500,dexjpus,dexuseu,cbbtcusd,ads_index,ads_index.1
2018-10-18,61.465,61.852501,59.272499,59.8825,59.357975,52402000.0,-0.874767,-0.01463,60.335833,-1.370346,...,-1.54,-0.54,0.42,0.008,2768.78,112.11,1.1494,6394.96,-0.47994,-0.47994
2018-10-19,60.439999,60.637501,56.924999,57.2925,56.790646,61360800.0,-2.567329,-0.044215,58.285,-3.458141,...,-0.25,-1.33,0.71,0.008,2767.78,112.52,1.1513,6382.99,-0.479066,-0.479066
2018-10-22,57.82,58.830002,56.767502,57.805,57.298668,36884400.0,0.508022,0.008906,57.800835,-0.834155,...,-0.38,0.48,-1.25,0.008,2755.88,112.78,1.1467,6407.65,-0.472025,-0.472025
2018-10-23,55.107498,56.047501,54.177502,55.264999,54.780907,62643600.0,-2.517761,-0.044936,55.163334,-4.670472,...,-0.62,-0.1,-0.41,0.008,2740.69,112.12,1.148,6395.14,-0.468786,-0.468786
2018-10-24,54.877499,55.3475,49.712502,49.852501,49.415833,88428800.0,-5.365074,-0.103071,51.637501,-6.605032,...,-3.33,-0.93,0.77,0.008,2656.1,112.58,1.1389,6415.98,-0.465173,-0.465173


In [48]:
# Saving the final dataframe as the feature mart
filename = f"Nvidia_Feature_Mart.csv"
nvda_stock_df.to_csv(filename, index=True)