In [None]:
import requests
import pandas as pd
import io
import os
import numpy as np
from tabulate import tabulate
from dotenv import load_dotenv
from IPython.display import HTML
from yahoo_fin.stock_info import get_data, get_stats, tickers_nasdaq, tickers_other

cwd = os.getcwd()
path = os.path.dirname(cwd)
folder_path = os.path.join(path, 'stock_data')


In [None]:
def to_closest_friday(date):
    day_of_week = date.dayofweek
    if day_of_week == 4: 
        return date
    elif day_of_week < 4: 
        return date + pd.Timedelta(days=(4 - day_of_week))
    else: 
        return date - pd.Timedelta(days=(day_of_week - 4))


def get_ticker_list():
    folder_path = os.path.join(path, 'preprocessing_data')
    txt_files = [file for file in os.listdir(folder_path) if file.endswith('.txt')]
    file_contents = {}
    for file in txt_files:
        with open(os.path.join(folder_path, file), 'r', encoding='utf-8') as f:
            file_contents[file] = f.read()
    final_df = []
    for filename, content in file_contents.items():
        final_df.append(pd.read_html(content)[0])
    return final_df

def calculate_wma(data):
    weights = np.arange(1, 31)[::-1]
    return np.dot(data, weights) / weights.sum()

def get_stock_df(ticker):
    df = get_data(ticker,index_as_date = True, interval="1wk")
    df.index = df.index.map(to_closest_friday)
    return df

In [None]:
dfs = get_ticker_list()
tickers = []
for df in dfs:
    tickers += list(df['Symbol'])
len(tickers)
# print(tickers)
ticker_list = [item for item in tickers if not isinstance(item, float)]
ticker_list.sort()

In [None]:
####　EXAMPLE 

if not os.path.exists(folder_path):
    os.makedirs(folder_path)

for stock in ticker_list[:2]:
    df = get_stock_df(stock)
    df = df.drop(["adjclose","ticker"], axis = 1)
    df['wma30'] = df['close'].rolling(window=30).apply(calculate_wma, raw=True)
    df['fyh'] = df['high'].rolling(window=5*52, min_periods=1).max()
    df = df.dropna()
    df.to_pickle(folder_path + '/' + stock + '.pkl')

In [None]:
df = pd.read_pickle(folder_path + '/A.pkl')

In [None]:
display(df)