In [1]:
import numpy as np
import pandas as pd
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

import datetime as dt
import mplfinance as mpf

import time

import os
from os import listdir
from os.path import isfile, join

import statsmodels.api as sm
import seaborn as sns
from statsmodels.tsa.ar_model import AutoReg, ar_select_order

In [2]:
# Default Values
PATH = "F:/Code/Python/Jupyter/Finance/datasets/Stock List/"
S_YEAR = 2017
S_MONTH = 1
S_DAY = 3
S_DATE_STR = f"{S_YEAR}-{S_MONTH}-{S_DAY}"
S_DATE_DATETIME = dt.datetime(S_YEAR, S_MONTH, S_DAY)

E_YEAR = 2021
E_MONTH = 8
E_DAY = 19
E_DATE_STR = f"{E_YEAR}-{E_MONTH}-{E_DAY}"
E_DATE_DATETIME = dt.datetime(E_YEAR, E_MONTH, E_DAY)

In [3]:
# arrays for not downloaded stocks 
stocks_not_downloaded = []
missing_stocks = []
test_list = ["ABR", "ACER", "ABBV", "AAPL"]

In [12]:
# Function that returns a dataframe from a csv
def get_df_from_csv(folder, ticker):
    try:
        df = pd.read_csv(folder + ticker + ".csv", index_col='Date', parse_dates=True)
    except FileNotFoundError:
        print("File doesn't exist")
    else:
        return df

In [5]:
# Get Stock File Names in a List
files = [x for x in listdir(PATH) if isfile(join(PATH, x))]
tickers = [os.path.splitext(x)[0] for x in files]
len(tickers)

470

In [6]:
# Saves stock data to csv
def save_to_csv_from_yahoo(ticker, syear, smonth, sday, eyear, emonth, eday):
    start = dt.datetime(syear, smonth, sday)
    end = dt.datetime(eyear, emonth, eday)
    
    try:
        print("Get data for: ", ticker)
        df = web.DataReader(ticker, 'yahoo', start, end)['Adj Close']
        
        # Prevent missing data
        time.sleep(5)
        
        df.to_csv('F:/Code/Python/Jupyter/Finance/datasets/Stock List/Update/' + ticker + '.csv')
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print("Couldn't get data for: ", ticker)

In [None]:
# Download stock data updates
for x in range(0, 500):
    save_to_csv_from_yahoo(tickers[x], 2021, 8, 20, 2022, 4, 3)

print("Downloading updates stock data finished!")

In [None]:
# Download stock data of missing stocks
for x in missing_stocks:
    save_to_csv_from_yahoo(x, 2022, 4, 1, 2022, 4, 3)

print("Downloading missing stock data finished!")
stocks_not_downloaded

In [43]:
# Merge updated data with 'old' data
def update_stock_data(ticker):
    update_folder = "F:/Code/Python/Jupyter/Finance/datasets/Stock List/Update/"
    stock_folder = PATH
    
    # Get dataframes
    update_df = get_df_from_csv(update_folder, ticker)
    original_df = get_df_from_csv(stock_folder, ticker)
    
    # Clean data
    original_df.drop(original_df.columns[original_df.columns.str.contains('unnamed',case = False)], axis = 1, inplace = True)
    original_df = original_df.drop('daily_return', 1)
    
    # Combine both dataframes
    join_df = pd.concat([original_df, update_df])
    
    return join_df

In [8]:
# Save dataframe to csv
def save_df_to_csv(df, folder, ticker):
    df.to_csv(folder + ticker + ".csv")

In [53]:
# Update files with new data
def update_stocks(stock_folder):
    for x in missing_stocks:
        try:
            print("Updating: ", x)
            new_df = update_stock_data(x)
            save_df_to_csv(new_df, stock_folder, x)
        except Exception as ex:
            print(ex)

<h3>Update stock data</h3>

In [None]:
update_stocks(PATH)

In [50]:
# Add daily returns to dataframe
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Adj Close'] / df['Adj Close'].shift(1)) - 1
    df.to_csv(PATH + ticker + '.csv')

    return df  

In [None]:
# Update daily returns
for x in tickers:
    try:
        print("Updating:", x)
        new_df = get_df_from_csv(PATH, x)
        new_df = add_daily_return_to_df(new_df, x)
        
        save_df_to_csv(new_df, PATH, x)
    except Exception as ex:
        print(ex)

<h3>Regression (Look at Python Finance 5)</h3>

In [56]:
def calc_projected_roi(ticker):
    a_df = get_df_from_csv(PATH, ticker) 
    
    a_df = a_df.asfreq('d') # Use daily prices
    a_df.index # Check frequency ???
    a_df = a_df.fillna(method='ffill') # Fill missing values

    # Drop unnecessary columns
    a_df.drop(a_df.columns[a_df.columns.str.contains('unnamed',case = False)], axis = 1, inplace = True)
    a_df = a_df.drop(['daily_return'], axis=1)
    
    # Optimum lag (Time without new data, ex weekend)
    lags = ar_select_order(a_df, maxlag=30)

    # Create model
    model = AutoReg(a_df['Adj Close'], lags.ar_lags)
    model_fit = model.fit()

    # Model Training settings based on df datapoints)
    print("Length :",len(a_df))
    #train_df = a_df.iloc[50:1369]
    #test_df = a_df.iloc[1369:]
    
    # Dynamic counts
    train_ds = (len(a_df) - 50) / 100 * 80
    train_df = a_df.iloc[50:int(train_ds)]
    test_df = a_df.iloc[int(train_ds):]

    # Actual training model
    train_model = AutoReg(a_df['Adj Close'], 500).fit(cov_type="HC0")
    start = len(train_df)
    end = len(train_df) + len(test_df) - 1

    pred = train_model.predict(start=start, end=end, dynamic=True)
    forecast = train_model.predict(start=end, end=end+60, dynamic=True)

    # Prediction prices
    s_price = forecast.head(1).iloc[0]
    e_price = forecast.iloc[-1]

    # Get return over prediction
    return (e_price - s_price) / s_price

In [57]:
def get_projected_rois():
    ticker = []
    roi = []
    
    for x in tickers:
        print("Working on: ", x)
        
        try:
            t_roi = calc_projected_roi(x)
        except Exception as ex:
            print("Data corrupted")
            print(ex)
        else:
            ticker.append(x)
            print("ROI: ", t_roi)
            roi.append(t_roi)
            
    return pd.DataFrame({'Ticker': ticker, 'ROI': roi})

In [None]:
projected_roi_df = get_projected_rois()
projected_roi_df

<h3>Get top 20 stocks based on ROI</h3>

In [61]:
projected_roi_df.sort_values(by=['ROI'], ascending=False)[0:20]

Unnamed: 0,Ticker,ROI
55,ADXS,2.033012
210,ARDX,1.971576
80,AGRX,1.698449
288,AXAS,1.351575
193,APPN,1.040243
199,APVO,0.907521
449,CAR,0.831782
412,BSGM,0.782208
450,CARA,0.709601
233,ASPN,0.708718
