# Daily Performance - dailyPerf_2022-01-01_2022-04-14

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import mean_squared_error as MSE
import os

In [12]:
file = "dailyPerf_2022-01-01_2022-04-14.csv"
df = pd.read_csv(file)
df.head()

Unnamed: 0,Date,Symbol,Exchange,garch,svr,mlp,LSTM,prev_Close,prediction,volatility,ActualDate,ActualClose,ActualPercent,ActualStd,ActualTrend
0,2022-01-03,AAL,NASDAQ,7.80073,2.68296,2.67993,19.2347,18.75,1.0,2.68144,2022-01-04,19.02,1.44,,0.0
1,2022-01-03,AAPL,NASDAQ,2.24893,1.38374,1.34301,161.639,182.01,0.0,1.36338,2022-01-04,179.7,1.269161,,0.0
2,2022-01-03,AMAT,NASDAQ,4.97567,1.58561,1.55923,141.783,159.93,0.0,1.57242,2022-01-04,158.36,0.981679,,0.0
3,2022-01-03,AMC,NYSE,30.5176,3.79159,4.79857,37.5044,26.52,1.0,4.29508,2022-01-04,25.49,3.883861,,-1.0
4,2022-01-03,AMD,NASDAQ,8.68112,3.00354,2.91554,138.209,150.24,-1.0,2.95954,2022-01-04,144.42,3.873802,,-1.0


In [13]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split, GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [14]:
X = df[["garch", "svr", "mlp"]]
y = df["ActualPercent"]

In [15]:
# Setup the pipeline steps: steps
steps = [('scaler', StandardScaler()),
         ('elasticnet', ElasticNet())]

# Create the pipeline: pipeline 
pipeline = Pipeline(steps)

# Specify the hyperparameter space
parameters = {'elasticnet__l1_ratio':np.linspace(0,1,30)}

# Create train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

# Create the GridSearchCV object: gm_cv
gm_cv = GridSearchCV(pipeline, parameters)

# Fit to the training set
gm_cv.fit(X_train, y_train)

# Compute and print the metrics
r2 = gm_cv.score(X_test, y_test)
y_pred = gm_cv.predict(X_test)
mse = MSE(y_pred, y_test)
print("Tuned ElasticNet Alpha: {}".format(gm_cv.best_params_))
print("Tuned ElasticNet R squared: {}".format(r2))
print("Tuned ElasticNet MSE: {}".format(mse))

Tuned ElasticNet Alpha: {'elasticnet__l1_ratio': 0.13793103448275862}
Tuned ElasticNet R squared: 0.08153587733133816
Tuned ElasticNet MSE: 8.271451498604332


In [16]:
df["ElasticNet_vol"] = gm_cv.predict(X)

In [17]:
def get_price_movement(change):
    if change > 0:
        return 1
    elif change < 0:
        return -1
    else:
        return 0

def get_above_threshold(volatility, threshold):
    if volatility > threshold:
        return True
    else:
        return False

def get_prediction(df):
    if df["new_price_movement"] == 1 and df["new_above_threshold"]:
        return 1 
    elif df["new_price_movement"] == -1 and df["new_above_threshold"]:
        return -1 
    else:
        return 0

In [8]:
threshold = 2
df["actual_above_threshold"] = df.ActualPercent.apply(lambda x: get_above_threshold(x, threshold))
df["en_above_threshold"] = df["ElasticNet_vol"].apply(lambda x: get_above_threshold(x, threshold))

In [9]:
accuracy_score(df.actual_above_threshold, df.en_above_threshold, normalize=True)

0.6758608573436402

In [None]:
from sklearn.metrics import accuracy_score

## Performance

In [50]:
MSE(df["ElasticNet_vol"], df.ActualPercent)

5.71199751693596

In [51]:
from sklearn.model_selection import cross_val_score
cvscores_3 = cross_val_score(gm_cv, X, y,cv=3)
print(np.mean(cvscores_3))

0.0774054222346882
