# Results Analysis 

In [15]:
# Utilities
from joblib import Parallel, delayed
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=False)
from multiprocessing import Pool, cpu_count
import warnings 
warnings.filterwarnings("ignore")
import os
import pandas as pd
import numpy as np
import tqdm
from tqdm import tqdm
tqdm.pandas(desc="progress bar")
import gc
os.environ['TOKENIZERS_PARALLELISM'] = 'true'

# Import time packages 
import time
import datetime

# Packages needed for pre-processing:
from sklearn.preprocessing import MinMaxScaler

# Deep Learning Models 
import torch
import tensorflow as tf

# Visualization Tools
import matplotlib.pyplot as plt
import seaborn as sns 
import matplotlib.pyplot as plt 

# Applying Settings to Viaualization Tools 
plt.rcParams['font.size'] = 13
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
# Set color palette to blue shades
sns.set_palette(["#00204E", "#25587E", "#4182B0", "#5CAADF"])
# Rest of your code...
sns.set_palette(["#00204E", "#25587E", "#4182B0", "#5CAADF"])
sns.set_palette(["#001A41", "#1C4B6E", "#377B9C", "#52ABCA"])

plt.rcParams['font.family'] = 'Times New Roman'
sns.set_style("white")

%load_ext autoreload
%autoreload 2

INFO: Pandarallel will run on 10 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [48]:
from utils import search_folder
current_dir = os.getcwd()
thesis_folder_path = current_dir.replace("/laptop code/thesis_code", "")
stock_data_path = search_folder(thesis_folder_path,"stock_prices")
modelling_data_path = search_folder(thesis_folder_path,"modelling_data")
google_trending_path = search_folder(thesis_folder_path,"google_search")
results_folder = search_folder(thesis_folder_path,"results_output")
topic_modelling = search_folder(thesis_folder_path, "topic_modelling")

--- 

## Moderna Results 

In [67]:
moderna_results_df = pd.read_csv(results_folder + "/moderna_results_summary.csv", index_col = [0])

In [68]:
moderna_results_df.sort_values(by = [ "mae"], ascending = [True]).head(5)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_750,moderna,close,all,rolled_eik_finbert_senti_sw_12,xgboost,permutations,0.512141,0.524887,0.510227,0.518436,4.745,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.810814, 13.090164, 13.920991, 13.994031, 1..."
trial_193,moderna,close,all,rolled_finbert_senti_sw_21,xgboost,permutations,0.514286,0.529412,0.5125,0.521739,4.759,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.770415, 13.429209, 13.731967, 14.078178, 1..."
trial_818,moderna,close,all,rolled_vader_eik_twi_senti_12,xgboost,permutations,0.522422,0.527149,0.520455,0.524775,4.764,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.870809, 13.132, 13.747347, 14.473365, 13.9..."
trial_280,moderna,close,all,rolled_vader_senti_tw_131,xgboost,permutations,0.505519,0.5181,0.503409,0.511732,4.768,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.7335205, 13.329398, 13.603527, 14.193874, ..."
trial_284,moderna,close,all,rolled_vader_senti_tw_171,xgboost,permutations,0.518931,0.527149,0.517045,0.523008,4.771,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.737441, 13.24948, 13.624239, 14.178336, 14..."


In [70]:
moderna_results_df.sort_values(by = ["accuracy", "f1_score"], ascending = [False, False]).head(5)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_700,moderna,close,all,rolled_vader_senti_tw_13,xgboost,permutations,0.530067,0.538462,0.528409,0.534231,4.852,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.853098, 13.170958, 13.747823, 14.411941, 1..."
trial_723,moderna,close,all,rolled_finbert_senti_tw_19,xgboost,permutations,0.528261,0.549774,0.527273,0.538803,4.838,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.865635, 13.2151575, 13.94238, 13.944755, 1..."
trial_706,moderna,close,all,rolled_vader_senti_tw_19,xgboost,permutations,0.529279,0.531674,0.527273,0.530474,4.826,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[13.121658, 13.320258, 13.868723, 14.098442, 1..."
trial_28,moderna,close,all,rolled_textblob_tw_sw_271,xgboost,permutations,0.527594,0.540724,0.526136,0.534078,4.849,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.753787, 13.407628, 13.691781, 14.28641, 14..."
trial_458,moderna,close,all,rolled_textblob_eik_twi_senti_11,xgboost,permutations,0.527716,0.538462,0.526136,0.533035,4.843,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.889562, 13.381269, 13.85841, 13.979132, 14..."


In [47]:
moderna_results_df[moderna_results_df["comments"] == "baseline score"]

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_0,moderna,close,all,,xgboost,baseline score,0.512141,0.524887,0.510227,0.518436,4.875,"[Timestamp('2019-08-16 00:00:00+0000', tz='UTC...","[13.4, 14.15, 14.29, 14.16, 13.88, 14.97, 14.5...","[12.894571, 13.140184, 13.619936, 14.064367, 1..."


--- 

## Apple Results 

In [75]:
apple_results_df = pd.read_csv(results_folder + "/apple_results_summary.csv", index_col = [0])
apple_results_df = apple_results_df[apple_results_df["model"] != "lasso regression"]
#apple_results_df = apple_results_df.sort_values(by = ["accuracy", "mae", "f1_score"], ascending = [False, True, False])

In [76]:
apple_results_df.sort_values(by = [ "mae"], ascending = [True]).head(3)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_1325,apple,ft_target,all,rolled_textblob_eik_twi_senti_251,apple,permutations,0.511364,0.498615,0.482784,0.504909,1.71,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.983399113654016, 40.35526381410628, 39.988..."
trial_1331,apple,ft_target,all,rolled_textblob_eik_twi_senti_311,apple,permutations,0.512712,0.50277,0.484249,0.507692,1.711,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.983399113654016, 40.35526381410628, 39.988..."
trial_988,apple,ft_target,all,rolled_textblob_senti_sw_301,apple,permutations,0.516266,0.50554,0.487912,0.510847,1.711,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[41.10064588842462, 40.434483761611, 40.017317..."


In [77]:
apple_results_df[apple_results_df["comments"].str.contains("baseline")].head(3)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_30,apple,close,all,,elasticnet regression,baseline score,0.520476,0.545706,0.493773,0.532792,1.825,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.68730937317744, 40.44527570679432, 40.1213..."
trial_32,apple,close,all,,random_forest regression,baseline score,0.517375,0.556787,0.490842,0.536358,1.804,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.035075000000006, 40.05032499999997, 39.841..."
trial_1,apple,close,all,rolled_textblob_tw_sw_1,xgboost,baseline score,0.514085,0.50554,0.485714,0.509777,1.991,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.228775, 39.92641, 39.834, 39.20895, 39.696..."


In [78]:
apple_results_df.sort_values(by = ["accuracy", "f1_score"], ascending = [False, False]).head(5)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_276,apple,close,all,rolled_finbert_tw_sw_351,xgboost,permutations,0.537827,0.541551,0.511355,0.539683,1.972,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.21833, 39.923134, 39.87051, 39.172104, 39...."
trial_25,apple,close,all,rolled_eik_finbert_senti_sw_131,xgboost regression,permutations,0.537396,0.537396,0.510623,0.537396,1.941,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.237324, 39.953026, 39.847683, 39.237587, 3..."
trial_358,apple,close,all,rolled_finbert_senti_sw_31,xgboost,permutations,0.53453,0.536011,0.507692,0.53527,1.991,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.22936, 39.92876, 39.84852, 39.21196, 39.90..."
trial_163,apple,close,all,rolled_eik_vader_senti_sw_281,xgboost,permutations,0.535311,0.524931,0.507692,0.53007,1.965,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.02322, 39.982407, 39.855785, 39.962322, 39..."
trial_1671,apple,ft_target,all,rolled_textblob_tw_sw_1,support vector,permutations,0.531695,0.569252,0.50696,0.549833,11.769,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[36.79825683612862, 36.40659537054695, 36.0614..."


In [60]:
apple_results_df[apple_results_df["model"].str.contains("apple")].sort_values(by = ["accuracy", "f1_score"], ascending = [False, False]).head(3)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_1670,apple,ft_target,all,news_count,apple,permutations,0.526167,0.515235,0.498168,0.520644,1.731,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.66956418760967, 40.01873616930948, 39.7503..."
trial_1145,apple,ft_target,all,rolled_vader_senti_tw_351,apple,permutations,0.523272,0.51385,0.495238,0.518519,1.735,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[41.0207614487654, 40.373385514854704, 39.9752..."
trial_1231,apple,ft_target,all,rolled_eik_finbert_senti_sw_71,apple,permutations,0.523605,0.506925,0.495238,0.51513,1.757,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.537413228358965, 39.9413294323787, 39.5913..."


In [86]:
apple_results_df.model.value_counts()

apple                       827
support vector              827
xgboost                     820
xgboost regression            8
elasticnet regression         1
random_forest regression      1
ridge regression              1
Name: model, dtype: int64

In [10]:
apple_results_df[(apple_results_df["comments"] == "baseline score") & (apple_results_df["model"] == "xgboost")].iloc[1:,:]

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_0,apple,close,all,,xgboost,baseline score,0.503516,0.495845,0.474725,0.499651,1.98,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[39.9125, 39.57, 39.97, 39.6675, 39.6825, 39.0...","[40.23484, 39.92794, 39.847237, 39.259945, 39...."


--- 

## Tesla Results 

In [79]:
tesla_results_df = pd.read_csv(results_folder + "/tesla_results_summary.csv", index_col = [0])

In [80]:
tesla_results_df.sort_values(by = ["mae"], ascending = [True]).head(3)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_772,tesla,ft_target,all,rolled_vader_sw_171,ridge,permutations,0.541076,0.538028,0.522344,0.539548,3.526,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.257950241503572, 24.45800419788908, 25.398..."
trial_773,tesla,ft_target,all,rolled_vader_sw_181,ridge,permutations,0.542897,0.543662,0.524542,0.543279,3.532,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.279596272948222, 24.48380129857389, 25.425..."
trial_784,tesla,ft_target,all,rolled_vader_sw_291,ridge,permutations,0.545706,0.55493,0.528205,0.550279,3.533,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.279596272948222, 24.48380129857389, 25.425..."


In [81]:
tesla_results_df.sort_values(by = ["accuracy", "f1_score"], ascending = [False, False]).head(5)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_1149,tesla,ft_target,all,rolled_vader_eik_twi_senti_151,ridge,permutations,0.553547,0.560563,0.536264,0.557033,3.567,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.1615797709587, 24.36121100426894, 25.30741..."
trial_990,tesla,ft_target,all,rolled_eik_finbert_senti_sw_71,ridge,permutations,0.553696,0.559155,0.536264,0.556412,3.607,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.271246939840925, 24.479102131452372, 25.37..."
trial_804,tesla,ft_target,all,rolled_finbert_senti_sw_111,ridge,permutations,0.553521,0.553521,0.535531,0.553521,3.57,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.2623232654506, 24.51598485559243, 25.51545..."
trial_998,tesla,ft_target,all,rolled_eik_finbert_senti_sw_151,ridge,permutations,0.551867,0.561972,0.534799,0.556874,3.622,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.301561332587973, 24.502781535288346, 25.42..."
trial_1015,tesla,ft_target,all,rolled_eik_finbert_senti_sw_321,ridge,permutations,0.552156,0.559155,0.534799,0.555633,3.593,"[Timestamp('2017-09-12 00:00:00'), Timestamp('...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[24.224962809799937, 24.416583134165833, 25.34..."


In [82]:
tesla_results_df[tesla_results_df["model"] == "xgboost"].sort_values(by = ["accuracy", "f1_score"], ascending = [False, False]).head(3)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_523,tesla,close,all,rolled_finbert_eik_twi_senti_281,xgboost,permutations,0.54418,0.546479,0.526007,0.545327,3.698,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.98509, 24.151989, 24.709486, 24.993444, 25..."
trial_433,tesla,close,all,rolled_eik_vader_senti_sw_141,xgboost,permutations,0.541029,0.547887,0.523077,0.544437,3.706,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.708046, 23.927109, 24.366182, 25.039324, 2..."
trial_37,tesla,close,all,rolled_textblob_tw_sw_361,xgboost,permutations,0.541259,0.54507,0.523077,0.543158,3.676,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.98509, 24.151989, 24.709486, 24.993444, 25..."


In [83]:
tesla_results_df[tesla_results_df["model"] == "xgboost"].sort_values(by = ["mae"], ascending = [True]).head(3)

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_254,tesla,close,all,rolled_textblob_senti_tw_251,xgboost,permutations,0.527504,0.526761,0.508425,0.527132,3.63,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.98509, 24.151989, 24.709486, 24.993444, 25..."
trial_219,tesla,close,all,rolled_finbert_senti_sw_281,xgboost,permutations,0.539749,0.54507,0.521612,0.542397,3.632,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.655859, 24.018785, 24.660797, 24.985577, 2..."
trial_74,tesla,close,all,rolled_vader_tw_sw_351,xgboost,permutations,0.524476,0.528169,0.505495,0.526316,3.637,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.98509, 24.151989, 24.709486, 24.993444, 25..."


In [35]:
tesla_results_df[tesla_results_df["comments"] == "close baseline score"]

Unnamed: 0,company,target,variables,sentiment,model,comments,precision,recall,accuracy,f1_score,mae,datetime,y_test,y_pred
trial_0,tesla,close,all,,xgboost,close baseline score,0.519553,0.523944,0.500366,0.521739,3.733,"[Timestamp('2017-09-12 00:00:00+0000', tz='UTC...","[24.415309, 25.175975, 25.320641, 25.666641, 2...","[23.67142, 23.955599, 24.378054, 25.058353, 25..."


In [85]:
(29.74 / 30) -1

-0.008666666666666711