In [None]:
!pip install stockstats
!pip install pytickersymbols

import numpy as np
import pandas as pd
from stockstats import StockDataFrame as Sdf 
import yfinance as yf
from pytickersymbols import PyTickerSymbols
import warnings
warnings.filterwarnings("ignore")

import yfinance as yf

class stockDownloader():
  def __init__(self, start_date , end_date , TICKERS,save=False):
    self.start_date =  start_date
    self.end_date = end_date
    self.index = TICKERS 
    self.save = save
  def list_tickers(self):
    stock_data = PyTickerSymbols()
    stocks = stock_data.get_stocks_by_index(str(self.index))
    TICKERS = []
    stocks = list(stocks)
    for i in range(len(stocks)):
      TICKERS.append(stocks[i]['symbol'])
    return TICKERS 

  def fetch_data(self) -> pd.DataFrame :
    data_df = pd.DataFrame()
    Tickers = self.list_tickers()
    print(Tickers)
    for tic in Tickers:
      temp_df = yf.download(tic, start = self.start_date , end = self.end_date)
      temp_df["tic"] = tic
      data_df = data_df.append(temp_df)

    data_df = data_df.reset_index()
    try:
      data_df.columns = [
                "date",
                "open",
                "high",
                "low",
                "close"
                "volume",
                "tic",
            ]

            # use adjusted close price instead of close price
      data_df["close"] = data_df["adjcp"]
            # drop the adjusted close price column
      data_df = data_df.drop("adjcp", 1)

    except NotImplementedError:
      print("No el possibele")

    data_df["day"] = data_df["date"].dt.dayofweek
    data_df['date'] = data_df.date.apply(lambda x: x.strftime("%Y-%m-%d"))
    data_df = data_df.dropna()
    data_df = data_df.reset_index(drop = True)
    data_df = data_df.sort_values(by=['date','tic']).reset_index(drop=True)

    return data_df

  def add_technical_indicator(self,df):
    stock = Sdf.retype(df.copy())
    unique_ticker = stock.tic.unique()
    macd = pd.DataFrame()
    rsi = pd.DataFrame()
    cci = pd.DataFrame()
    dx = pd.DataFrame()
    close_50_sma = pd.DataFrame()
    close_200_sma = pd.DataFrame()

    for i in range(len(unique_ticker)):
      temp_macd = stock[stock.tic == unique_ticker[i]]['macd']
      temp_macd = pd.DataFrame(temp_macd) 
      macd = macd.append(temp_macd , ignore_index = True)

      temp_rsi = stock[stock.tic == unique_ticker[i]]['rsi_30']
      temp_rsi = pd.DataFrame(temp_rsi) 
      rsi = rsi.append(temp_rsi , ignore_index = True)

      temp_cci = stock[stock.tic == unique_ticker[i]]['cci_30']
      temp_cci = pd.DataFrame(temp_cci) 
      cci = cci.append(temp_cci , ignore_index = True)

      temp_dx = stock[stock.tic == unique_ticker[i]]['dx_30']
      temp_dx = pd.DataFrame(temp_dx) 
      dx = dx = dx.append(temp_dx , ignore_index = True)

      temp_close_50_sma = stock[stock.tic == unique_ticker[i]]['close_50_sma']
      temp_close_50_sma = pd.DataFrame(temp_close_50_sma) 
      close_50_sma = close_50_sma.append(temp_close_50_sma , ignore_index = True)

      temp_close_200_sma = stock[stock.tic == unique_ticker[i]]['close_200_sma']
      temp_close_200_sma = pd.DataFrame(temp_close_200_sma) 
      close_200_sma = close_200_sma.append(temp_close_200_sma , ignore_index = True)


    df['macd'] = macd
    df['rsi'] = rsi
    df['cci'] = cci
    df['dx'] = dx
    df['close_50_sma'] = close_50_sma
    df['close_200_sma'] = close_200_sma
    df.fillna(method='bfill' , inplace = True)
    df[df.tic == 'DOW'].head(100)
    return df


  def calculate_turbulance(self,df):
    df_price_pivot = df.pivot(index='date' , columns = 'tic' , values = 'close')
    df_price_pivot = df_price_pivot.applymap(lambda l: l if not np.isnan(l) else np.random.randint(45, 55))
    unique_date = df.date.unique()

    turbulence_index = []

    count = 0
    for i in range(len(unique_date)):
      current_price = df_price_pivot[df_price_pivot.index == unique_date[i]]
      historical_price = df_price_pivot[[n in unique_date[0:i] for n in df_price_pivot.index ]]
      cov_temp = historical_price.cov()
      current_temp = current_price - np.mean(historical_price , axis= 0)
      try:
        temp = current_temp.values.dot(np.linalg.inv(cov_temp)).dot(current_temp.values.T)
      except:
        turbulence_temp = 0 
      if temp>0 and temp < 10000:
        count+=1
        if count>2:
            turbulence_temp = temp[0][0]
        else:
            #avoid large outlier because of the calculation just begins
            turbulence_temp=0
      else:
        turbulence_temp=0
      turbulence_index.append(turbulence_temp)
    turbulence_index = pd.DataFrame({'date':df_price_pivot.index, 'turbulence':turbulence_index})
    return turbulence_index

  def add_turbulence(self,df):
  
    turbulence_index = self.calcualte_turbulence(df)
    df = df.merge(turbulence_index, on='datadate')
    df = df.sort_values(['datadate','tic']).reset_index(drop=True)
    return df


  def convert_to_date(self,df):
    df1 = df.copy()
    def convert_to_datetime(time):
        date_string = str(time)
        date_object = datetime.datetime.strptime(date_string, "%Y-%m-%d")
        return date_object.strftime("%Y%m%d")
    df1['date'] = df1['date'].apply(convert_to_datetime)
    return df1

  def final_data(self):
    df = self.fetch_data()
    df_indicators = self.add_technical_indicator(df)
    df_turbulance = self.add_turbulence(df_indicators)
    df_final = self.convert_to_date(df_turbulance)
    if self.save == True:
      df_final.to_csv("Final.csv" , index = False)
    return df


TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2021-10-01'
TEST_START_DATE = '2021-10-01'
TEST_END_DATE = '2023-04-01'

df = stockDownloader(start_date = TRAIN_START_DATE, 
                     end_date = TRAIN_END_DATE , TICKERS = "DOW JONES",True).final_data()