In [33]:
import numpy as np
import pandas as pd
import pandas_datareader
import yaml
import datetime
import math
from time import sleep
# !pwd
with open('../../alpha_key.yml', 'r') as f:
    ALPHA_API_KEY = yaml.safe_load(f)['alpha_api_key']

In [85]:
tickers = ['ARW','OBNK','UEIC','FERG']
tickers += ['SPUC','CPRI','PDEX','TSOC','KR']
tickers += ['CNO','CXW', 'NVDA','AAPL']
tickers += ['AMZN','TSLA','ADXS','ALVR']
tickers += ['INTC','NFLX','GOOG','BABA','CRM']
tickers += ['CSCO','GSX','AMD','YELP']
def format_dates(daily_stocks_data):
    df = daily_stocks_data.copy() 
    df['date']=df.index
    df.reset_index(inplace=True, drop=True)
    return df
def concat_stocks(dfs, keys):
    cat_df = pd.concat(dfs, axis='columns', keys=keys)
    return cat_df
def add_percent_change(df, metric):
    percents = df[metric].pct_change()
    df[f'{metric}_percent_change']=percents
    return df
def add_percent_changes(df, metrics=None):
    if metrics is not None:
        for metric in metrics:
            df = add_percent_change(df,metric)
    else:
        for metric in df.columns:
            df = add_percent_change(df,metric)
    return df
def add_weekly_cat(df):
    day_cos, day_sin = list(), list()
    for date in df.index:
        day = datetime.datetime.strptime(date, '%Y-%m-%d').weekday()
        radians = 2*math.pi*day/6
        day_cos.append(math.cos(radians))
        day_sin.append(math.sin(radians))
    df['week_cos'] = day_cos
    df['week_sin'] = day_sin
    return df
def add_yearly_cat(df):
    day_cos, day_sin = list(), list()
    for date in df.index:
        day = datetime.datetime.strptime(date, '%Y-%m-%d').timetuple().tm_yday
        radians = 2*math.pi*day/365
        day_cos.append(math.cos(radians))
        day_sin.append(math.sin(radians))
    df['year_cos'] = day_cos
    df['year_sin'] = day_sin
    return df
def add_stats(df, lengths, metrics=None):
    if metrics is not None:
        for metric in metrics:
            df = add_metric_stats(df, lengths, metric)
    else:
        for metric in [m for m in df.columns]:
            df = add_metric_stats(df, lengths, metric)
    return df
def add_metric_stats(df, lengths, metric):
    for length in lengths:
        win = df[metric].rolling(length, min_periods=1)
        df[f'{metric}_{length}_mean'] = win.mean()
        df[f'{metric}_{length}_median'] = win.median()
        df[f'{metric}_{length}_std'] = win.std()
        df[f'{metric}_{length}_skew'] = win.skew()
        df[f'{metric}_{length}_quantile_5'] = win.quantile(0.05)
        df[f'{metric}_{length}_quantile_95'] = win.quantile(0.95)
        df[f'{metric}_{length}_quantile_10'] = win.quantile(0.10)
        df[f'{metric}_{length}_quantile_90'] = win.quantile(0.90)
        df[f'{metric}_{length}_high'] = win.max()
        df[f'{metric}_{length}_low'] = win.min()
        df[f'{metric}_{length}_spread'] = df[f'{metric}_{length}_high'].subtract(df[f'{metric}_{length}_low'])
    return df

def data_stats_to_file(tickers):
    daily_stocks_data_raw_list = read_data_from_file(tickers)
    i=0
    for df in daily_stocks_data_raw_list:
        ticker = tickers[i]
        i+=1
        try:
            pd.read_pickle(f'./{ticker}_daily_stats.pkl')
        except:
            df.pipe(add_stats, lengths=[10,50,91,182,274,365], metrics=['open','close','high','low','volume']).pipe(add_percent_changes).pipe(add_weekly_cat).pipe(add_yearly_cat).to_pickle(f'./{ticker}_daily_stats.pkl')
        display(f'{ticker} done')
def combine_to_file(tickers):   
    daily_stocks_data_list = [pd.from_pickle(f'./{ticker}_daily_stats.pkl') for ticker in tickers]
    daily_stocks_data = pd.concat(daily_stocks_data_list, axis='columns', keys=tickers)
    daily_stocks_data.to_pickle(f'./{tickers}_daily.pkl')

def read_data(tickers):
    daily_stocks_data_raw_list = []
    for i in range(5,len(tickers),5):
        daily_stocks_data_raw_list += [pandas_datareader.av.time_series.AVTimeSeriesReader(symbols=ticker, api_key=ALPHA_API_KEY, function='TIME_SERIES_DAILY').read() for ticker in tickers[i-5:i]]
        sleep(61)
    daily_stocks_data_raw_list += [pandas_datareader.av.time_series.AVTimeSeriesReader(symbols=ticker, api_key=ALPHA_API_KEY, function='TIME_SERIES_DAILY').read() for ticker in tickers[-len(tickers)%5:len(tickers)]]
    return daily_stocks_data_raw_list
def read_data_to_file(tickers):
    i = 1
    for ticker in tickers:      
        try:
            pd.read_pickle(f'./{ticker}_daily_raw.pkl')
        except:
            df = pandas_datareader.av.time_series.AVTimeSeriesReader(symbols=ticker, api_key=ALPHA_API_KEY, function='TIME_SERIES_DAILY').read()
            df.to_pickle(f'./{ticker}_daily_raw.pkl')
            i+=1
            if (i)%5==0:
                sleep(61)
        display(f'{ticker} done')            

In [86]:
# read_to_file_data(tickers)
read_data_to_file(tickers)
display('adding stats')
data_stats_to_file(tickers)

# daily_stocks_data = pd.read_pickle(f'./{tickers}_daily.pkl')
# display([col for col in daily_stocks_data.columns])
# display(daily_stocks_data.columns)
# display(daily_stocks_data)

'ARW done'

'OBNK done'

'UEIC done'

'FERG done'

'SPUC done'

'CPRI done'

'PDEX done'

'TSOC done'

'KR done'

'CNO done'

'CXW done'

'NVDA done'

'AAPL done'

'AMZN done'

'TSLA done'

'ADXS done'

'ALVR done'

'INTC done'

'NFLX done'

'GOOG done'

'BABA done'

'CRM done'

'CSCO done'

'GSX done'

'AMD done'

'YELP done'

'adding stats'

'ARW done'

'OBNK done'

'UEIC done'

'FERG done'

'SPUC done'

'CPRI done'

'PDEX done'

'TSOC done'

'KR done'

'CNO done'

'CXW done'

'NVDA done'

'AAPL done'

'AMZN done'

'TSLA done'

'ADXS done'

'ALVR done'

'INTC done'

'NFLX done'

'GOOG done'

'BABA done'

'CRM done'

'CSCO done'

'GSX done'

'AMD done'

'YELP done'

# Extras for keras manipulations

In [None]:

# def to_ts_df(daily_stocks_data, lookback, metric):
#     ## column names
#     columns = list()
#     for i in range(lookback):
#         columns.append(f'{metric}_{i}')
#     columns.append(f'{metric}_target')
#     df = pd.DataFrame(columns=columns)
#     ## columns
#     data = daily_stocks_data[metric].to_numpy()
#     for index, col in enumerate(df.columns):
#         df[col] = data[index:len(data)-lookback+index]
#     ## dates index
#     dates = daily_stocks_data.date.to_numpy()[:-lookback]
#     df.insert(0, 'dates', dates)
#     df.dropna(axis='index', inplace=True)
#     return df
# def to_ts(df, metric, lookback):
#     data, targets = list(), list()
#     for i in range(lookback,len(df.index)):
#         data.append(df.iloc[i-lookback:i,:].values) ## first four metrics
#         targets.append(df[metric].to_list()[i])
#     data = np.array(data)
#     targets = np.array(targets)
#     return data, targets
# def min_max_scale(col):
#     scaled = col.subtract(col.min()).divide(col.max()-col.min())
#     return scaled
# def multi_stock_ts_split(df,tickers): ## could be sped up 
#     data_tr, data_te, targets_tr, targets_te = [],[],[],[]
#     for ticker in tickers:
#         data, targets = to_ts(df[ticker].dropna(), 'low', lookback) ## drops nan for each stock
#         x = train_test_split(data, targets, shuffle=False)
#         data_tr.append(x[0])
#         data_te.append(x[1])
#         targets_tr.append(x[2]) 
#         targets_te.append(x[3])    
#     return np.concatenate(data_tr), np.concatenate(data_te), np.concatenate(targets_tr), np.concatenate(targets_te)
# df = pd.read_pickle(f"./{tickers}_daily.pkl")
# df = df.apply(min_max_scale)
# features = len(df.columns)
# display(df)