# Modules used in this project

In [None]:
# standard libraries
import os
import time
from datetime import datetime
import itertools
from itertools import chain, combinations
import warnings
warnings.filterwarnings('ignore')

# data analysis and manipulation
import pandas as pd
from pandas.plotting import scatter_matrix
import numpy as np

# data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# machine learning and model selection
from sklearn.model_selection import train_test_split, TimeSeriesSplit, ParameterGrid
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import xgboost as xgb
from xgboost import XGBRegressor

# statistical tools and analysis
from scipy.stats import pearsonr, shapiro
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# APIs and requests
import requests
import json

import time

import functions

# Getting the data

In [None]:
criptos = pd.read_csv(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\input_data\criptos_final.csv", 
                      delimiter = ",", index_col = 'date_hour')

In [None]:
criptos_diff = pd.read_csv(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\input_data\criptos_diff_final.csv", 
                      delimiter = ",", index_col = 'date_hour')

## ML models

## Model validation

In [None]:
decision_tree = {
    "Decision Tree": {
        "model": DecisionTreeRegressor(random_state=42),
        "params": {
            "splitter": ["best"],
            "max_depth": [int(3), int(5), int(10), int(50)],
            "min_samples_split": [int(2), int(5),  int(50)],
            "min_samples_leaf": [int(1), int(2), int(10)],
            "max_features": ["sqrt"],
            "max_leaf_nodes": [int(10), int(20), int(50), int(100)],
        }
    }}

In [None]:
rf = {"Random Forest": {
        "model": RandomForestRegressor(random_state=42),
        "params": {
            "n_estimators": [int(100), int(300)],
            "max_depth": [int(5), int(30)],
            "min_samples_split": [int(2), int(20)],
            "min_samples_leaf": [int(2), int(6)],
            "bootstrap": [False],  
            "oob_score": [False],
        }
    }}

In [None]:
rf_2 = {
    "Random Forest": {
        "model": RandomForestRegressor(random_state=42),
        "params": {
            "n_estimators": [int(100), int(200), int(300), int(500)],  # Aumentado o número de estimadores
            "max_depth": [int(5), int(10), int(20), int(30), int(40)],  # Aumentado o intervalo de profundidade máxima
            "min_samples_split": [int(2), int(5), int(10), int(15), int(20)],  # Mais valores para divisão mínima
            "min_samples_leaf": [int(1), int(2), int(4), int(6)],  # Adicionando mais valores de folhas mínimas
            "bootstrap": [True, False],  # Testando bootstrap
            "oob_score": [False],  # Mantido fixo, pois o bootstrap=False
            "max_features": ['sqrt', 'log2'],  # Adicionando max_features para mais variação
        }
    }
}


In [None]:
xgb = {"XGBoost": {
        "model": XGBRegressor(random_state=42),
        "params": {
            "n_estimators": [int(100), int(500)],
            "max_depth": [int(3), int(15)],
            "learning_rate": [0.001, 0.01, 0.1],
            "subsample": [0.7, 1.0],
            "colsample_bytree": [0.7,  1.0],
            "gamma": [0.1, 0.3],
            "reg_alpha": [0.1, 1],
            "reg_lambda": [0.1,  1],
            "min_child_weight": [int(1), int(5)]
        }
}}

## 1 lag models

### Decision Tree

In [None]:
one_lag_results_tree = functions.optimize_and_evaluate_one_lag(criptos_diff, decision_tree)

In [None]:
one_lag_results_tree.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\results_tree_ml.xlsx")

### Random Forest

In [None]:
one_lag_results_rf = functions.optimize_and_evaluate_one_lag(criptos_diff, rf)

In [None]:
one_lag_results_rf.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\results_rf_ml.xlsx")

### Xgboost

In [None]:
one_lag_results_xgb = functions.optimize_and_evaluate_one_lag(criptos_diff, xgb)

In [None]:
one_lag_results_xgb.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\results_xgb_ml.xlsx")

## Adding exogenous variables

In [None]:
y = criptos_diff['Bitcoin']
X = criptos_diff.drop('Bitcoin', axis=1)

In [None]:
y_diff = y.shift(1)

y_diff.dropna(inplace=True)

In [None]:
y_diff = pd.DataFrame(y_diff)

In [None]:
X = pd.merge(X, y_diff, left_index=True, right_index=True)

In [None]:
y = y.iloc[1:]

#### Decision tree

In [None]:
ml_results_tree = functions.optimize_and_evaluate(X, y, decision_tree, train_size=0.8)

#### Random forest

In [None]:
ml_results_rf = functions.optimize_and_evaluate(X, y, rf)

In [None]:
ml_results_rf_2 = functions.optimize_and_evaluate(X, y, rf_2)

#### Xgboost

In [None]:
ml_results_xgb = functions.optimize_and_evaluate(X, y, xgb)

#### Saving to excel

In [None]:
ml_results_tree.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\results_tree_ml_exog.xlsx")

In [None]:
ml_results_rf.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\results_rf_ml_exog.xlsx")

In [None]:
ml_results_xgb.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\results_xgb_ml_exog.xlsx")