In [24]:
from sklearn.svm import SVR
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import data_processing as pf
import altair as alt
from sklearn.metrics import mean_squared_error, r2_score

In [22]:
alt.renderers.enable('default')
alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [31]:
googl = yf.Ticker('GOOGL')
amzn = yf.Ticker('AMZN')
aapl = yf.Ticker('AAPL')
fb = yf.Ticker('FB')
msft = yf.Ticker('MSFT')
tsla = yf.Ticker('TSLA')

In [32]:
aapl_df = aapl.history(period='max')
amzn_df = amzn.history(period='max')
fb_df = fb.history(period='max')
googl_df = googl.history(period='max')
msft_df = msft.history(period='max')
tsla_df = tsla.history(period='max')

In [33]:
stock_dfs = [aapl_df, amzn_df, fb_df, googl_df, msft_df, tsla_df]

for stock_df in stock_dfs:
    stock_df = pf.date_time_prep(stock_df) 
    stock_df = pf.rolling_aves(stock_df)
    stock_df = pf.future_close_setup(stock_df, 5)

In [7]:
def combiner(stock_dfs):
    combine_df = stock_dfs[0]
    i = 1
    while i < len(stock_dfs):
        combine_df = pd.concat([combine_df, stock_dfs[i]])
        i += 1
    
    combine_df.sort_values(by=['Date'], inplace=True)
    return combine_df

In [35]:
combine_df = combiner(stock_dfs)
X = combine_df.iloc[:,:-1]
y = combine_df.iloc[:,-1:]
X

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Day,Month,Year,...,20 Day Open Mean,20 Day High Mean,20 Day Low Mean,20 Day Close Mean,20 Day Volume Mean,20 Day Open Var,20 Day High Var,20 Day Low Var,20 Day Close Var,20 Day Volume Var
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1980-12-12,0.100922,0.101361,0.100922,0.100922,469033600,0.0,0.0,12,12,1980,...,0.100922,0.101361,0.100922,0.100922,4.690336e+08,0.000000,0.000000,0.000000,0.000000,0.000000e+00
1980-12-15,0.096096,0.096096,0.095657,0.095657,175884800,0.0,0.0,15,12,1980,...,0.098509,0.098728,0.098289,0.098289,3.224592e+08,0.000006,0.000007,0.000007,0.000007,2.148405e+16
1980-12-16,0.089075,0.089075,0.088636,0.088636,105728000,0.0,0.0,16,12,1980,...,0.095364,0.095510,0.095071,0.095071,2.502155e+08,0.000024,0.000025,0.000025,0.000025,2.476102e+16
1980-12-17,0.090830,0.091268,0.090830,0.090830,86441600,0.0,0.0,17,12,1980,...,0.094230,0.094450,0.094011,0.094011,2.092720e+08,0.000022,0.000022,0.000022,0.000022,2.359987e+16
1980-12-18,0.093463,0.093902,0.093463,0.093463,73449600,0.0,0.0,18,12,1980,...,0.094077,0.094340,0.093901,0.093901,1.821075e+08,0.000017,0.000018,0.000018,0.000018,2.183153e+16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-16,236.279999,240.050003,235.940002,237.710007,28092196,0.0,0.0,16,3,2021,...,234.551501,237.005500,232.011998,234.417500,3.036902e+07,18.366063,12.327005,21.546973,22.803114,5.100401e+13
2021-03-16,2065.989990,2112.989990,2059.290039,2083.889893,1526368,0.0,0.0,16,3,2021,...,2058.594989,2088.367505,2034.637006,2059.938013,1.631613e+06,644.460419,487.093496,905.473832,1046.909568,1.794415e+11
2021-03-16,3104.969971,3128.909912,3075.860107,3091.860107,2538764,0.0,0.0,16,3,2021,...,3123.381482,3155.164990,3074.216003,3112.219482,3.631118e+06,7725.218989,7981.856493,10189.342291,9892.646807,8.415076e+11
2021-03-16,276.084991,282.130005,274.920013,279.279999,22437665,0.0,0.0,16,3,2021,...,264.551749,269.034497,260.545000,264.491499,2.008895e+07,28.640484,25.094165,35.050287,46.558880,2.339193e+13


In [18]:
def svr(combine_df, split_time, stock_dfs):
    X = combine_df.iloc[:,:-1]
    y = combine_df.iloc[:,-1:]
    #X = stock_df['Days From IPO'].values.reshape(-1, 1)
    #y = stock_df['Close'].values.reshape(-1, 1)
    
    # Does train/Test Split on chosen time
    # Change the -50 to a differnt value to change split point
    split_mark = int(len(combine_df)-(split_time*len(stock_dfs)))
    X_train = X.head(split_mark)
    X_test = X.tail(len(combine_df) - split_mark)
    y_train = y.head(split_mark)
    y_test = y.tail(len(combine_df) - split_mark)

    svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 
    svr_rbf.fit(X_train,y_train)
    svr_close_pred = svr_rbf.predict(X_test)
    svr_train_pred = svr_rbf.predict(X_train)


    columns = ['Apple Price', 'Apple Prediction', 
               'Amazon Price', 'Amazon Prediction',
               'Facebook Price', 'Facebook Prediction',
               'Google Price', 'Google Prediction',
               'Microsoft Price', 'Microsoft Prediction',
               'Tesla Price', 'Tesla Prediction']

    single_split_mark = int(len(stock_dfs[0])-split_time)
    results_df = pd.DataFrame(columns=columns, index=stock_dfs[0].tail(len(stock_dfs[0])-single_split_mark).index)

    i = 0
    for stock_df in stock_dfs:
        single_split_mark = int(len(stock_df)-split_time)
        results_df[columns[i]] = stock_df.iloc[:,-1:].tail(len(stock_df) - single_split_mark)
        results_df[columns[i+1]] = svr_rbf.predict(stock_df.iloc[:,:-1].tail(len(stock_df) - single_split_mark))
        i += 2

    results_df.reset_index(inplace=True)
    results_df = results_df.melt('Date', var_name='Company', value_name='Price')
    # Plots Results
    line_plot = alt.Chart(results_df).mark_line().encode(
        x = 'Date',
        y = 'Price',
        color = 'Company'
    )
    
    
    
    train_score = r2_score(y_train, svr_train_pred)
    print("Training R2 Score: " + str(train_score))
    print()

    
    model_score = r2_score(y_test.dropna(), svr_close_pred[:len(y_test.dropna())])
    print()
    print("R2 Score: " + str(model_score))
    
    return line_plot





In [26]:
aapl_df = aapl.history(period='max')
amzn_df = amzn.history(period='max')
fb_df = fb.history(period='max')
googl_df = googl.history(period='max')
msft_df = msft.history(period='max')
tsla_df = tsla.history(period='max')

stock_dfs = [aapl_df, amzn_df, fb_df, googl_df, msft_df, tsla_df]

for stock_df in stock_dfs:
    stock_df = pf.date_time_prep(stock_df) 
    stock_df = pf.rolling_aves(stock_df)
    stock_df = pf.future_close_setup(stock_df, 5)
  
combine_df = combiner(stock_dfs)
#svr(combine_df, 365, stock_dfs)


  return f(*args, **kwargs)
Training R2 Score: 0.9356250879517715


R2 Score: -0.5269703179860754
