In [1]:
import numpy as np
import pandas as pd
from constants import SHARED_RANDOM_STATE
from db_helper_functions import (
    get_stock_news_with_finbert_scores_from_db,
    get_stock_news_with_finbert_tone_scores_from_db,
    get_stock_news_with_finbert_whole_article_scores_from_db,
    get_stock_news_with_finbert_tone_whole_article_scores_from_db,
    get_stock_news_with_bertopic_sentiment_scores_from_db,
)
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import ParameterGrid
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from supervised_learning_data_preprocessing_functions import (
    gen_df_for_supervised_learning,
)
from grid_model_search_functions import iterative_grid_cv_model_testing

ticker = "AAPL"

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
finbert_summary_sentiment_df = gen_df_for_supervised_learning(
    ticker=ticker,
    sentiment_df_retrieval_function=get_stock_news_with_finbert_scores_from_db,
)
finbert_tone_summary_sentiment_df = gen_df_for_supervised_learning(
    ticker=ticker,
    sentiment_df_retrieval_function=get_stock_news_with_finbert_tone_scores_from_db,
)
finbert_whole_article_sentiment_df = gen_df_for_supervised_learning(
    ticker=ticker,
    sentiment_df_retrieval_function=get_stock_news_with_finbert_whole_article_scores_from_db,
)
finbert_tone_article_sentiment_df = gen_df_for_supervised_learning(
    ticker=ticker,
    sentiment_df_retrieval_function=get_stock_news_with_finbert_tone_whole_article_scores_from_db,
)
bert_topic_article_sentiment_df = gen_df_for_supervised_learning(
    ticker=ticker,
    sentiment_df_retrieval_function=get_stock_news_with_bertopic_sentiment_scores_from_db,
)

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')


In [3]:
dataframes_to_test = [
    (finbert_summary_sentiment_df, "finbert_summary_sentiment_df"),
    (finbert_tone_summary_sentiment_df, "finbert_tone_summary_sentiment_df"),
    (finbert_whole_article_sentiment_df, "finbert_whole_article_sentiment_df"),
    (finbert_tone_article_sentiment_df, "finbert_tone_whole_article_sentiment_df"),
    (bert_topic_article_sentiment_df, "bert_topic_article_sentiment_df"),
]
cv_train_size = [10, 20, 60, 252, 504]
cv_test_size = [5, 10, 60, 120]
lag_time = [0, 5, 30]
features_to_use = [
    "open",
    "prev_high",
    "prev_low",
    "prev_close",
    # "prev_volume",
    # "dividends",
    # "stock_splits",
    "positive",
    "negative",
    "neutral",
    # "day_of_month",
    # "day_of_week",
    # "quarter",
    # "month",
    # "year",
]
feature_to_predict = ["close"]
scoring_method = ["neg_mean_absolute_error"]

param_grid = {
    "data_frame": dataframes_to_test,
    "cv_train_size": cv_train_size,
    "cv_test_size": cv_test_size,
    "lag_time": lag_time,
    "feature_to_predict": feature_to_predict,
    "scoring_method": scoring_method,
}
data_settings_grid_list = list(ParameterGrid(param_grid))

In [4]:
knn_results = iterative_grid_cv_model_testing(
    model=KNeighborsRegressor(),
    model_parameters={"n_neighbors": [2, 3, 4, 5]},
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [5]:
print(len(knn_results))
knn_results.sort_values("mean_test_score", ascending=False).head()

780


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
3,0.001203,0.000115,0.000642,4.6e-05,5,{'model__n_neighbors': 5},-1.338548,-2.084142,-3.657228,-1.7927,...,-2.202304,-2.488909,0.656071,1,bert_topic_article_sentiment_df,504,10,0,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
3,0.001319,0.000105,0.000764,8.7e-05,5,{'model__n_neighbors': 5},-1.23949,-2.368841,-3.470506,-1.986331,...,-2.202304,-2.49174,0.61806,1,bert_topic_article_sentiment_df,504,10,5,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
3,0.001232,0.000149,0.000657,5e-05,5,{'model__n_neighbors': 5},-1.645086,-1.818716,-3.402988,-1.733904,...,-2.167902,-2.503914,0.633171,1,bert_topic_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
2,0.001301,0.000115,0.0007,5.7e-05,4,{'model__n_neighbors': 4},-1.807427,-1.763613,-3.583929,-1.766926,...,-2.045014,-2.515937,0.646259,2,bert_topic_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
2,0.00129,0.000101,0.00071,3.4e-05,4,{'model__n_neighbors': 4},-1.505693,-1.89789,-3.252043,-2.141416,...,-2.141812,-2.525172,0.566902,2,bert_topic_article_sentiment_df,504,10,5,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."


In [6]:
knn_results.to_csv("./knn_regression_results.csv")

In [7]:
gbr_results = iterative_grid_cv_model_testing(
    model=GradientBoostingRegressor(),
    model_parameters={
        "random_state": [SHARED_RANDOM_STATE],
        "learning_rate": [0.01, 0.1, 1],
        "n_estimators": [50, 100, 300],
        "max_depth": [None, 2, 5],
        "max_leaf_nodes": [None, 5, 10],
    },
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [8]:
print(len(gbr_results))
gbr_results.sort_values("mean_test_score", ascending=False).head()

15795


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__learning_rate,param_model__max_depth,param_model__max_leaf_nodes,param_model__n_estimators,param_model__random_state,params,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
43,0.07191,0.007204,0.001456,0.001719,0.1,2.0,10.0,100,1337,"{'model__learning_rate': 0.1, 'model__max_dept...",...,-2.133765,-2.393128,0.682748,1,finbert_tone_whole_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
40,0.081742,0.011546,0.001167,0.000883,0.1,2.0,5.0,100,1337,"{'model__learning_rate': 0.1, 'model__max_dept...",...,-2.133765,-2.393128,0.682748,1,finbert_tone_whole_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
37,0.083922,0.022608,0.00125,0.000973,0.1,2.0,,100,1337,"{'model__learning_rate': 0.1, 'model__max_dept...",...,-2.133765,-2.393128,0.682748,1,finbert_tone_whole_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
49,0.11826,0.003319,0.000939,0.00041,0.1,5.0,5.0,100,1337,"{'model__learning_rate': 0.1, 'model__max_dept...",...,-1.913932,-2.393195,0.673491,1,finbert_tone_summary_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
31,0.116521,0.008993,0.000836,1.8e-05,0.1,,5.0,100,1337,"{'model__learning_rate': 0.1, 'model__max_dept...",...,-1.913932,-2.393195,0.673491,1,finbert_tone_summary_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."


In [9]:
gbr_results.to_csv("./model_results/gb_regression_results.csv")

In [10]:
rf_results = iterative_grid_cv_model_testing(
    model=RandomForestRegressor(),
    model_parameters={
        "random_state": [SHARED_RANDOM_STATE],
        "n_estimators": [10, 20, 100],
        "max_depth": [None, 2, 5],
        "max_leaf_nodes": [None, 5, 10],
    },
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [11]:
rf_results.to_csv("./model_results/rf_regression_results.csv")

In [12]:
rf_results.sort_values("mean_test_score", ascending=False).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__max_depth,param_model__max_leaf_nodes,param_model__n_estimators,param_model__random_state,params,split0_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
19,0.024787,0.003962,0.000932,7e-05,5,,20,1337,"{'model__max_depth': 5, 'model__max_leaf_nodes...",-1.187039,...,-2.334191,-2.409361,0.625797,1,finbert_tone_whole_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
20,0.10761,0.012756,0.001858,9.8e-05,5,,100,1337,"{'model__max_depth': 5, 'model__max_leaf_nodes...",-1.225781,...,-2.223094,-2.423425,0.647325,2,finbert_tone_whole_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
18,0.015005,0.00352,0.001012,0.00062,5,,10,1337,"{'model__max_depth': 5, 'model__max_leaf_nodes...",-1.146101,...,-2.412747,-2.455331,0.646053,3,finbert_tone_whole_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
20,0.108837,0.011336,0.002553,0.001961,5,,100,1337,"{'model__max_depth': 5, 'model__max_leaf_nodes...",-1.262754,...,-2.161542,-2.457311,0.664213,1,finbert_whole_article_sentiment_df,504,10,30,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
20,0.114626,0.012426,0.001896,1.8e-05,5,,100,1337,"{'model__max_depth': 5, 'model__max_leaf_nodes...",-1.238547,...,-2.14143,-2.467329,0.72744,1,finbert_tone_whole_article_sentiment_df,504,10,0,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."


In [13]:
lin_reg_results = iterative_grid_cv_model_testing(
    model=LinearRegression(),
    model_parameters={},
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [14]:
print(len(lin_reg_results))
lin_reg_results.sort_values("mean_test_score", ascending=False).head()

195


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
0,0.001257,9.3e-05,0.000489,1.4e-05,{},-2.072167,-1.431565,-1.373396,-1.223925,-1.302183,...,-2.470496,-1.869304,0.476783,1,finbert_tone_summary_sentiment_df,504,60,0,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.001237,0.000167,0.000476,1.8e-05,{},-2.072842,-1.410496,-1.365303,-1.226776,-1.307513,...,-2.450358,-1.870032,0.478265,1,finbert_tone_whole_article_sentiment_df,504,60,0,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.001351,9.6e-05,0.000513,4e-05,{},-2.068993,-1.431502,-1.370532,-1.23121,-1.304757,...,-2.459852,-1.870385,0.477316,1,bert_topic_article_sentiment_df,504,60,0,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.00122,0.000139,0.000479,2.8e-05,{},-2.089763,-1.43249,-1.373291,-1.206936,-1.302556,...,-2.469165,-1.870965,0.480575,1,finbert_tone_summary_sentiment_df,504,60,5,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.001276,9.1e-05,0.000485,2e-05,{},-2.059573,-1.411713,-1.378692,-1.257808,-1.306895,...,-2.485027,-1.871207,0.474339,1,finbert_summary_sentiment_df,504,60,0,neg_mean_absolute_error,"open,prev_high,prev_low,prev_close,positive,ne..."


In [15]:
lin_reg_results.to_csv("./model_results/linear_regression_results.csv")

In [16]:
for df, name in dataframes_to_test:
    df["closed_higher"] = np.where(df.close > df.open, 1, 0)


feature_to_predict = ["closed_higher"]
scoring_method = ["accuracy"]

param_grid = {
    "data_frame": dataframes_to_test,
    "cv_train_size": cv_train_size,
    "cv_test_size": cv_test_size,
    "lag_time": lag_time,
    "feature_to_predict": feature_to_predict,
    "scoring_method": scoring_method,
}
data_settings_grid_list = list(ParameterGrid(param_grid))

In [17]:
lr_clf_results = iterative_grid_cv_model_testing(
    model=LogisticRegression(),
    model_parameters={
        "random_state": [SHARED_RANDOM_STATE],
    },
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [18]:
lr_clf_results.sort_values("mean_test_score", ascending=False).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
0,0.001765,0.000139,0.000589,2.7e-05,1337,{'model__random_state': 1337},0.6,0.6,1.0,0.8,...,0.4,0.58,0.26,1,finbert_tone_whole_article_sentiment_df,20,5,0,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.001806,0.000151,0.000598,3.2e-05,1337,{'model__random_state': 1337},0.6,0.4,0.4,0.6,...,0.4,0.56,0.195959,1,finbert_tone_summary_sentiment_df,60,5,30,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.00173,0.000122,0.000594,2.2e-05,1337,{'model__random_state': 1337},0.6,0.4,0.8,0.6,...,0.2,0.56,0.174356,1,finbert_whole_article_sentiment_df,20,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.001835,0.000145,0.000609,3.2e-05,1337,{'model__random_state': 1337},0.6,0.4,0.4,0.6,...,0.4,0.56,0.174356,1,finbert_whole_article_sentiment_df,60,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.002121,0.000127,0.000618,2.2e-05,1337,{'model__random_state': 1337},0.6,0.6,0.4,0.6,...,0.8,0.56,0.12,1,finbert_tone_summary_sentiment_df,504,5,0,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."


In [19]:
lr_clf_results.to_csv("./model_results/logistic_regression_classifier_results.csv")

In [20]:
gbr_clf_results = iterative_grid_cv_model_testing(
    model=GradientBoostingClassifier(),
    model_parameters={
        "random_state": [SHARED_RANDOM_STATE],
        "learning_rate": [0.1],
        "n_estimators": [100, 300],
    },
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [21]:
gbr_clf_results.sort_values("mean_test_score", ascending=False).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__learning_rate,param_model__n_estimators,param_model__random_state,params,split0_test_score,split1_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
1,0.07032,0.007543,0.0009,3.9e-05,0.1,300,1337,"{'model__learning_rate': 0.1, 'model__n_estima...",0.6,0.6,...,0.8,0.66,0.156205,1,finbert_tone_summary_sentiment_df,20,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.024156,0.001097,0.000835,1.8e-05,0.1,100,1337,"{'model__learning_rate': 0.1, 'model__n_estima...",0.6,0.6,...,0.6,0.64,0.149666,2,finbert_tone_summary_sentiment_df,20,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.058736,0.001551,0.000868,1.4e-05,0.1,100,1337,"{'model__learning_rate': 0.1, 'model__n_estima...",0.6,0.9,...,0.3,0.62,0.166132,1,finbert_tone_summary_sentiment_df,252,10,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.029989,0.000446,0.000841,1.6e-05,0.1,100,1337,"{'model__learning_rate': 0.1, 'model__n_estima...",0.8,0.8,...,0.5,0.62,0.14,1,finbert_tone_summary_sentiment_df,60,10,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
1,0.096024,0.008382,0.000926,2.7e-05,0.1,300,1337,"{'model__learning_rate': 0.1, 'model__n_estima...",0.7,0.8,...,0.5,0.62,0.107703,2,finbert_tone_summary_sentiment_df,60,10,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."


In [22]:
gbr_clf_results.to_csv("./model_results/gb_classifier_results.csv")

In [23]:
rf_clf_results = iterative_grid_cv_model_testing(
    model=RandomForestClassifier(),
    model_parameters={
        "random_state": [SHARED_RANDOM_STATE],
        "n_estimators": [10, 20, 100],
        "max_depth": [None, 2, 5],
        "max_leaf_nodes": [None, 5, 10],
    },
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [24]:
rf_clf_results.sort_values("mean_test_score", ascending=False).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__max_depth,param_model__max_leaf_nodes,param_model__n_estimators,param_model__random_state,params,split0_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
2,0.060222,0.005729,0.002252,0.000717,,,100,1337,"{'model__max_depth': None, 'model__max_leaf_no...",0.4,...,0.6,0.72,0.16,1,bert_topic_article_sentiment_df,252,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
1,0.013298,0.001035,0.001153,0.000285,,,20,1337,"{'model__max_depth': None, 'model__max_leaf_no...",0.6,...,0.4,0.63,0.148661,1,finbert_tone_whole_article_sentiment_df,252,10,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
7,0.011043,0.003415,0.000991,6e-05,,10.0,20,1337,"{'model__max_depth': None, 'model__max_leaf_no...",0.6,...,0.7,0.62,0.18868,1,finbert_whole_article_sentiment_df,60,10,0,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
6,0.006155,0.001905,0.001005,0.000538,,10.0,10,1337,"{'model__max_depth': None, 'model__max_leaf_no...",0.6,...,0.8,0.62,0.227156,1,finbert_tone_summary_sentiment_df,20,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
24,0.006896,0.002258,0.00085,4.3e-05,5.0,10.0,10,1337,"{'model__max_depth': 5, 'model__max_leaf_nodes...",0.6,...,0.8,0.62,0.227156,1,finbert_tone_summary_sentiment_df,20,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."


In [25]:
rf_clf_results.to_csv("./model_results/rf_classifier_results.csv")

In [26]:
knn_clf_results = iterative_grid_cv_model_testing(
    model=KNeighborsClassifier(),
    model_parameters={
        "n_neighbors": [2, 3, 4, 5],
    },
    data_settings_grid_list=data_settings_grid_list,
    features_to_use=features_to_use,
)

In [28]:
knn_clf_results.sort_values("mean_test_score", ascending=False).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
0,0.001205,0.000127,0.001045,9e-05,2,{'model__n_neighbors': 2},0.8,0.8,0.6,0.6,...,0.4,0.68,0.132665,1,bert_topic_article_sentiment_df,60,5,0,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
2,0.001305,9.7e-05,0.001109,6.6e-05,4,{'model__n_neighbors': 4},0.6,0.6,0.2,1.0,...,0.8,0.66,0.269072,1,finbert_summary_sentiment_df,504,5,30,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
3,0.001275,0.000142,0.001027,9.3e-05,5,{'model__n_neighbors': 5},0.6,0.8,0.6,0.8,...,0.8,0.66,0.22,1,finbert_tone_whole_article_sentiment_df,504,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
3,0.001274,9.7e-05,0.001033,6.7e-05,5,{'model__n_neighbors': 5},0.6,0.8,0.6,0.8,...,0.8,0.66,0.22,1,finbert_tone_whole_article_sentiment_df,504,5,0,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
0,0.001294,0.000106,0.001063,6.6e-05,2,{'model__n_neighbors': 2},0.8,0.6,0.6,0.8,...,0.4,0.66,0.253772,1,finbert_tone_whole_article_sentiment_df,252,5,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."


In [29]:
knn_clf_results = pd.read_csv("./model_results/knn_classifier_results.csv")

In [30]:
knn_clf_results[knn_clf_results.cv_test_size > 15].sort_values(
    "mean_test_score", ascending=False
).head()

Unnamed: 0.1,Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,...,split9_test_score,mean_test_score,std_test_score,rank_test_score,data_frame,cv_train_size,cv_test_size,lag_time,scoring_method,features_to_use
1816,0,0.001328,9.4e-05,0.001282,7.4e-05,2,{'model__n_neighbors': 2},0.55,0.55,0.75,...,0.65,0.61,0.091652,1,finbert_summary_sentiment_df,20,20,60,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
2041,1,0.001287,7.2e-05,0.001291,0.000119,3,{'model__n_neighbors': 3},0.55,0.55,0.65,...,0.5,0.595,0.082006,1,finbert_whole_article_sentiment_df,252,20,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
2155,3,0.001317,0.000121,0.001269,8.2e-05,5,{'model__n_neighbors': 5},0.6,0.65,0.7,...,0.45,0.59,0.139284,1,finbert_whole_article_sentiment_df,504,20,20,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
2053,1,0.001233,8.1e-05,0.001237,4.7e-05,3,{'model__n_neighbors': 3},0.65,0.55,0.7,...,0.45,0.59,0.109087,1,finbert_whole_article_sentiment_df,252,20,20,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
2141,1,0.001369,0.000127,0.001309,6.2e-05,3,{'model__n_neighbors': 3},0.55,0.55,0.75,...,0.45,0.585,0.122577,1,finbert_whole_article_sentiment_df,504,20,5,accuracy,"open,prev_high,prev_low,prev_close,positive,ne..."
