In [8]:
import pandas as pd
import numpy as np
%matplotlib inline

from sklearn.impute import SimpleImputer
import sys
import os
import warnings

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from statsmodels.graphics.tsaplots import plot_acf

import sklearn
import skforecast
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, MinMaxScaler
from sklearn.feature_selection import RFECV
from sklearn.ensemble import  HistGradientBoostingRegressor, RandomForestRegressor
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import backtesting_forecaster
from skforecast.model_selection import bayesian_search_forecaster
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries
from skforecast.model_selection_multiseries import select_features_multiseries
from skforecast.plot import set_dark_theme
from skforecast.preprocessing import series_long_to_dict
from skforecast.preprocessing import exog_long_to_dict
from datetime import timedelta
from lightgbm import LGBMRegressor
import utils as u
from sklearn.metrics import classification_report

warnings.filterwarnings('once')

color = '\033[1m\033[38;5;208m'
print(f"{color}Version skforecast: {skforecast.__version__}")
print(f"{color}Version scikit-learn: {sklearn.__version__}")
print(f"{color}Version pandas: {pd.__version__}")
print(f"{color}Version numpy: {np.__version__}")

[1m[38;5;208mVersion skforecast: 0.13.0
[1m[38;5;208mVersion scikit-learn: 1.5.2
[1m[38;5;208mVersion pandas: 2.2.3
[1m[38;5;208mVersion numpy: 1.26.4


## Loading meme data

In [9]:
gaming = pd.read_csv('../data/processed/gaming.csv')
ai = pd.read_csv('../data/processed/ai.csv')
meme = pd.read_csv('../data/processed/meme.csv')
rwa = pd.read_csv('../data/processed/rwa.csv')

gaming.shape, ai.shape, meme.shape, rwa.shape

((125654, 10), (113423, 10), (33039, 10), (52339, 10))

## Preprocessing data

In [10]:
days_to_predict = 7

In [11]:
class category_data:
    def __init__(self, train_data, test_data, series_dict, exog_dict, future_exog_dict, series_scaler, exog_scaler):
        self.train_data = train_data
        self.test_data = test_data
        self.series_dict = series_dict
        self.exog_dict = exog_dict
        self.future_exog_dict = future_exog_dict
        self.series_scaler = series_scaler
        self.exog_scaler = exog_scaler
        self.forecaster = None
        self.predictions = None
        self.og_train = None
        self.og_test = None
        self.og_pred = None
        self.error_df = None
        self.close_df = None

In [12]:
def generate_data_object(data: pd.DataFrame, days: int):
    train_data, test_data, series_dict, exog_dict, future_exog_dict, series_scaler, exog_scaler = u.preprocess(data, days_to_predict=7)
    data_obj = category_data(train_data, test_data, series_dict, exog_dict, future_exog_dict, series_scaler, exog_scaler)
    return data_obj

In [13]:
gaming_obj = generate_data_object(gaming, days_to_predict)
ai_obj = generate_data_object(ai, days_to_predict)
meme_obj = generate_data_object(meme, days_to_predict)
rwa_obj = generate_data_object(rwa, days_to_predict)

In [14]:
gaming_obj.forecaster = u.train_best_forecaster(gaming_obj.series_dict, gaming_obj.exog_dict, gaming_obj.future_exog_dict, gaming_obj.test_data, future_days=days_to_predict)
ai_obj.forecaster = u.train_best_forecaster(ai_obj.series_dict, ai_obj.exog_dict, ai_obj.future_exog_dict, ai_obj.test_data, future_days=days_to_predict)
meme_obj.forecaster = u.train_best_forecaster(meme_obj.series_dict, meme_obj.exog_dict, meme_obj.future_exog_dict, meme_obj.test_data, future_days=days_to_predict)
rwa_obj.forecaster = u.train_best_forecaster(rwa_obj.series_dict, rwa_obj.exog_dict, rwa_obj.future_exog_dict, rwa_obj.test_data, future_days=days_to_predict)

Training model with parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 5, 'min_child_samples': 10, 'n_estimators': 100, 'num_leaves': 31, 'subsample': 0.8}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003587 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3885
[LightGBM] [Info] Number of data points in the train set: 80319, number of used features: 16
[LightGBM] [Info] Start training from score 0.000875
Training model with parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 5, 'min_child_samples': 10, 'n_estimators': 100, 'num_leaves': 31, 'subsample': 1.0}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003534 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3885
[LightGBM] [Info] Number of data points in the train set: 80319, number of used features: 16
[LightGBM] 

In [15]:
gaming_obj.predictions = u.predict_X_days(days_to_predict=days_to_predict, forecaster=gaming_obj.forecaster, future_exog_dict=gaming_obj.future_exog_dict)
ai_obj.predictions = u.predict_X_days(days_to_predict=days_to_predict, forecaster=ai_obj.forecaster, future_exog_dict=ai_obj.future_exog_dict)
meme_obj.predictions = u.predict_X_days(days_to_predict=days_to_predict, forecaster=meme_obj.forecaster, future_exog_dict=meme_obj.future_exog_dict)
rwa_obj.predictions = u.predict_X_days(days_to_predict=days_to_predict, forecaster=rwa_obj.forecaster, future_exog_dict=rwa_obj.future_exog_dict)

In [16]:
gaming_obj.og_train, gaming_obj.og_test, gaming_obj.og_pred = u.inverse_scaling(train_df=gaming_obj.train_data, test_df=gaming_obj.test_data, pred_df=gaming_obj.predictions, series_scaler=gaming_obj.series_scaler)
ai_obj.og_train, ai_obj.og_test, ai_obj.og_pred = u.inverse_scaling(train_df=ai_obj.train_data, test_df=ai_obj.test_data, pred_df=ai_obj.predictions, series_scaler=ai_obj.series_scaler)
meme_obj.og_train, meme_obj.og_test, meme_obj.og_pred = u.inverse_scaling(train_df=meme_obj.train_data, test_df=meme_obj.test_data, pred_df=meme_obj.predictions, series_scaler=meme_obj.series_scaler)
rwa_obj.og_train, rwa_obj.og_test, rwa_obj.og_pred = u.inverse_scaling(train_df=rwa_obj.train_data, test_df=rwa_obj.test_data, pred_df=rwa_obj.predictions, series_scaler=rwa_obj.series_scaler)

In [17]:
gaming_obj.close_df = u.get_last_close_info(gaming_obj.og_train, gaming_obj.og_test, gaming_obj.og_pred)
ai_obj.close_df = u.get_last_close_info(ai_obj.og_train, ai_obj.og_test, ai_obj.og_pred)
meme_obj.close_df = u.get_last_close_info(meme_obj.og_train, meme_obj.og_test, meme_obj.og_pred)
rwa_obj.close_df = u.get_last_close_info(rwa_obj.og_train, rwa_obj.og_test, rwa_obj.og_pred)

In [22]:
gaming_obj.close_df

Unnamed: 0,Token ID,last_close,last_test_close,last_pred_close,real_difference,pred_difference,test_pred_difference,real_went_up,pred_went_up
0,06963e10-2042-41d3-9583-acb4135460dc,0.00339864,0.003429296,0.021359,3.065594e-05,0.01796,-0.01793,1.0,1.0
1,0844705f-10ed-42ca-b483-914b7d59e351,0.283641,0.3097892,0.250775,0.02614822,-0.032866,0.059014,1.0,0.0
2,10e910bd-6f50-4085-bd78-fbbcde408d28,0.06786522,0.07049241,0.082503,0.002627189,0.014638,-0.012011,1.0,1.0
3,1ae14641-7365-4be1-9bb9-c86ab07715b3,0.9301679,1.032336,0.932301,0.1021681,0.002133,0.100035,1.0,1.0
4,2150225f-b2f2-4b59-bc68-001aa8ae4666,0.03202424,0.03732745,0.04467,0.005303213,0.012645,-0.007342,1.0,1.0
5,246715d6-065f-475e-8026-3e0a201c7e33,0.0004626805,0.0005688365,0.021359,0.000106156,0.020896,-0.02079,1.0,1.0
6,24e85250-c2a7-4158-8e7d-244818085463,0.04032475,0.04317771,0.050616,0.002852958,0.010292,-0.007439,1.0,1.0
7,30d41b9d-f8eb-4821-8a6e-9016bfd35023,0.001822311,0.001918915,0.01818,9.660406e-05,0.016358,-0.016262,1.0,1.0
8,39268841-e3d8-40b3-8c30-6d6d5bb19bf4,0.1452945,0.1582443,0.155561,0.01294984,0.010267,0.002683,1.0,1.0
9,3a048f18-7aab-4abf-87bd-9f70572c5a9e,0.003363081,0.003330903,0.021359,-3.217725e-05,0.017996,-0.018028,0.0,1.0


In [18]:
print("Gaming")
print(classification_report(gaming_obj.close_df['real_went_up'], gaming_obj.close_df['pred_went_up']))
print("AI")
print(classification_report(ai_obj.close_df['real_went_up'], ai_obj.close_df['pred_went_up']))
print("Meme")
print(classification_report(meme_obj.close_df['real_went_up'], meme_obj.close_df['pred_went_up']))
print("RWA")
print(classification_report(rwa_obj.close_df['real_went_up'], rwa_obj.close_df['pred_went_up']))

Gaming
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         8
         1.0       0.84      0.82      0.83        50

    accuracy                           0.71        58
   macro avg       0.42      0.41      0.41        58
weighted avg       0.72      0.71      0.71        58

AI
              precision    recall  f1-score   support

         0.0       0.14      0.10      0.12        10
         1.0       0.62      0.71      0.67        21

    accuracy                           0.52        31
   macro avg       0.38      0.41      0.39        31
weighted avg       0.47      0.52      0.49        31

Meme
              precision    recall  f1-score   support

         0.0       0.47      1.00      0.64         8
         1.0       1.00      0.10      0.18        10

    accuracy                           0.50        18
   macro avg       0.74      0.55      0.41        18
weighted avg       0.76      0.50      0.39        18

RWA


In [19]:
gaming_obj.error_df = u.compute_errors(train_data=gaming_obj.og_train, predictions_x_days=gaming_obj.og_pred, test_data=gaming_obj.og_test)
ai_obj.error_df = u.compute_errors(train_data=ai_obj.og_train, predictions_x_days=ai_obj.og_pred, test_data=ai_obj.og_test)
meme_obj.error_df = u.compute_errors(train_data=meme_obj.og_train, predictions_x_days=meme_obj.og_pred, test_data=meme_obj.og_test)
rwa_obj.error_df = u.compute_errors(train_data=rwa_obj.og_train, predictions_x_days=rwa_obj.og_pred, test_data=rwa_obj.og_test)

In [20]:
ai_obj.error_df

Unnamed: 0,Token ID,MSE,MAE,MAPE,MAPE (%)
0,050cd820-2c82-4222-892c-e6f2859625c7,5e-05,0.006986,1422.103931,1422.10%
1,0bb97d30-b5bb-45d6-b105-e49436cace0e,0.000114,0.009475,2.891446,2.89%
2,0d1428e8-2b88-4314-9b1e-aa664b34a5fe,6.9e-05,0.008073,122.931492,122.93%
3,172a1e9a-f56c-4386-85e1-093d801e4e93,3e-05,0.005208,13.957665,13.96%
4,18b987bb-2597-4288-a28e-08d7dbce2ee2,3.4e-05,0.005714,323.75751,323.76%
5,196199f6-1c8f-4be4-83dd-733866d6d558,5.6e-05,0.007371,25458.060422,25458.06%
6,1e389c02-0a09-4b57-9c50-3a74bca12e4f,0.173044,0.366308,6.760458,6.76%
7,1f34fd87-5598-40e7-ac78-7efd6bd28bdd,0.000229,0.014774,19.643994,19.64%
8,22820bda-48eb-45bd-b189-af3253ab57c0,1.5e-05,0.003565,18.170606,18.17%
9,3ee65bec-c8c8-482d-ba5c-5c594cc86f2f,7.5e-05,0.008213,65.918921,65.92%


In [None]:
u.plot_predictions(train_data=gaming_obj.og_train, predictions_x_days=gaming_obj.og_pred, test_data=gaming_obj.og_test, last_data_points=120)

In [None]:
u.plot_predictions(train_data=ai_obj.og_train, predictions_x_days=ai_obj.og_pred, test_data=ai_obj.og_test, last_data_points=60)

In [None]:
u.plot_predictions(train_data=rwa_obj.og_train, predictions_x_days=rwa_obj.og_pred, test_data=rwa_obj.og_test, last_data_points=60)

In [None]:
u.plot_predictions(train_data=meme_obj.og_train, predictions_x_days=meme_obj.og_pred, test_data=meme_obj.og_test, last_data_points=60)