In [6]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


TRAIN_PATH = 'train_with_features_v2.csv'
TEST_PATH = 'test_with_features_v2.csv'

train = pd.read_csv(TRAIN_PATH)
test = pd.read_csv(TEST_PATH)


Prune raw features

In [7]:
print(train.columns)

train = train.drop(['PreClosePrice','Open','High','Low','Volume','Turnover','Close'], axis=1)
test = test.drop(['PreClosePrice','Open','High','Low','Volume','Turnover','Close'], axis=1)


Index(['Timestamp', 'PreClosePrice', 'Open', 'High', 'Low', 'Volume',
       'Turnover', 'Close', 'Hist_Ret_Close_3min', 'Hist_Ret_Close_5min',
       'Hist_Ret_Close_10min', 'Hist_Ret_Close_15min', 'Log_Return_Close_1min',
       'Avg Price', '4mean Price', 'Log_Return_Avg_1min', 'Hist_Ret_Avg_3min',
       'Hist_Ret_Avg_5min', 'Hist_Ret_Avg_10min', 'Hist_Ret_Avg_15min',
       'Target_Close_1min', 'Target_Close_5min_Mean',
       'Target_Close_10min_Mean', 'Target_Close_15min_Mean', 'Target_Avg_1min',
       'Target_Avg_5min_Mean', 'Target_Avg_10min_Mean',
       'Target_Avg_15min_Mean', 'Vol_5min_Close', 'Vol_10min_Close',
       'Vol_15min_Close', 'Sharpe_Log_ret_1min/Vol_5min_Close',
       'Sharpe_Log_ret_1min/Vol_10min_Close',
       'Sharpe_Log_ret_1min/Vol_15min_Close'],
      dtype='object')


Train Model

In [8]:
def trainLR_Close(train, test, target):
       model = LinearRegression()

       model.fit(train[['Hist_Ret_Close_3min', 'Hist_Ret_Close_5min',
              'Hist_Ret_Close_10min', 'Hist_Ret_Close_15min', 'Log_Return_Close_1min',
              'Avg Price', '4mean Price', 'Vol_5min_Close', 'Vol_10min_Close', 'Vol_15min_Close',
              'Sharpe_Log_ret_1min/Vol_5min_Close', 'Sharpe_Log_ret_1min/Vol_10min_Close',
              'Sharpe_Log_ret_1min/Vol_15min_Close']], train[target])

       # Calculate prediction from model
       predictions = model.predict(test[['Hist_Ret_Close_3min', 'Hist_Ret_Close_5min',
              'Hist_Ret_Close_10min', 'Hist_Ret_Close_15min', 'Log_Return_Close_1min',
              'Avg Price', '4mean Price', 'Vol_5min_Close', 'Vol_10min_Close', 'Vol_15min_Close',
              'Sharpe_Log_ret_1min/Vol_5min_Close', 'Sharpe_Log_ret_1min/Vol_10min_Close',
              'Sharpe_Log_ret_1min/Vol_15min_Close']])
       
       # model.fit(train[['Hist_Ret_Close_10min',
       #                  'Sharpe_Log_ret_1min/Vol_15min']], train[target])

       # # Calculate prediction from model
       # predictions = model.predict(test[['Hist_Ret_Close_10min',
       #                  'Sharpe_Log_ret_1min/Vol_15min']])
                            
       # Results
       print(model.coef_)
       print("MSE: %5f"%mean_squared_error(test[target],predictions))
       print("R^2: %5f"%r2_score(test[target],predictions))


In [9]:
def trainLR_Avg(train, test, target):
       model = LinearRegression()

       model.fit(train[['Hist_Ret_Avg_3min', 'Hist_Ret_Avg_5min',
              'Hist_Ret_Avg_10min', 'Hist_Ret_Avg_15min', 'Log_Return_Avg_1min',
              'Avg Price', '4mean Price', 'Vol_5min_Close', 'Vol_10min_Close', 'Vol_15min_Close',
              'Sharpe_Log_ret_1min/Vol_5min_Close', 'Sharpe_Log_ret_1min/Vol_10min_Close',
              'Sharpe_Log_ret_1min/Vol_15min_Close']], train[target])

       predictions = model.predict(test[['Hist_Ret_Avg_3min', 'Hist_Ret_Avg_5min',
              'Hist_Ret_Avg_10min', 'Hist_Ret_Avg_15min', 'Log_Return_Avg_1min',
              'Avg Price', '4mean Price', 'Vol_5min_Close', 'Vol_10min_Close', 'Vol_15min_Close',
              'Sharpe_Log_ret_1min/Vol_5min_Close', 'Sharpe_Log_ret_1min/Vol_10min_Close',
              'Sharpe_Log_ret_1min/Vol_15min_Close']])
       
       # model.fit(train[['Hist_Ret_Close_10min',
       #                  'Sharpe_Log_ret_1min/Vol_15min']], train[target])

       # # Calculate prediction from model
       # predictions = model.predict(test[['Hist_Ret_Close_10min',
       #                  'Sharpe_Log_ret_1min/Vol_15min']])
                            
       # Results
       print(model.coef_)
       print("MSE: %5f"%mean_squared_error(test[target],predictions))
       print("R^2: %5f"%r2_score(test[target],predictions))


In [10]:
# for i in ['Target_Close_1min','Target_Close_5min_Mean','Target_Close_10min_Mean','Target_Close_15min_Mean']:
#     trainLR(train,test,i)
#     print("\n=========================================================\n")

trainLR_Close(train,test,'Target_Close_1min')

trainLR_Avg(train,test, 'Target_Avg_1min')

[ 4.20302636e-02 -3.08540394e-02  1.42047758e-02  1.39530683e-03
 -6.64468947e-02 -1.08480222e-03  1.08258310e-03 -1.12598031e-04
  1.78896619e-04  5.19071948e-05  3.07420467e-02 -6.78726491e-02
  6.82702042e-02]
MSE: 0.000005
R^2: -0.006977
[ 2.60207357e-02 -1.92553857e-02  9.65848640e-03  4.57279312e-03
  2.52498458e-01 -6.05211237e-03  6.05113110e-03  2.80008701e-05
  1.72186512e-04  7.12528153e-06  1.32293548e-01 -1.92521193e-02
  1.85291738e-01]
MSE: 0.000003
R^2: 0.207182
