# Modules used in this project

In [None]:
# standard libraries
import os
import time
from datetime import datetime
import itertools
from itertools import chain, combinations
import warnings
warnings.filterwarnings('ignore')

# data analysis and manipulation
import pandas as pd
from pandas.plotting import scatter_matrix
import numpy as np

# data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# machine learning and model selection
from sklearn.model_selection import train_test_split, TimeSeriesSplit, ParameterGrid
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import xgboost as xgb
from xgboost import XGBRegressor

# statistical tools and analysis
from scipy.stats import pearsonr, shapiro
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# APIs and requests
import requests
import json

import time

import functions

# Getting the data

In [None]:
criptos = pd.read_csv(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\input_data\criptos_final.csv", 
                      delimiter = ",", index_col = 'date_hour')

In [None]:
criptos_diff = pd.read_csv(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\input_data\criptos_diff_final.csv", 
                      delimiter = ",", index_col = 'date_hour')

## AR Models

In [None]:
LAGS = [1]
STARTING_POINT = 0.8

In [None]:
performance, residuals = functions.fit_ar_model(criptos_diff, starting_point_percent = STARTING_POINT, lags_list = LAGS)

In [None]:
performance.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\ar_performance_table.xlsx")

In [None]:
residuals.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\residuals\residuals_ar_simple.xlsx")

## Adding exogenous variables

In [None]:
performance_exog, residuals_exog = functions.fit_ar_model_exog(df = criptos_diff, target_column = 'Bitcoin', starting_point_percent = STARTING_POINT, lags_list = LAGS)

In [None]:
performance_exog.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\ar_exog_performance_table.xlsx")

## Using logs

In [None]:
criptos_log = np.log(criptos)

In [None]:
criptos_log_diff = criptos_log - criptos_log.shift(1)
criptos_log_diff = criptos_log_diff.dropna()

### Simple AR(1) models

In [None]:
performance_log, residuals_log = functions.fit_ar_model(criptos_log_diff, starting_point_percent = STARTING_POINT, lags_list = LAGS)

In [None]:
performance_log.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\ar_log_performance_table.xlsx")

### Exogenous

In [None]:
performance_exog_log, residuals_exog_log = functions.fit_ar_model_exog(df = criptos_log_diff, target_column = 'Bitcoin', starting_point_percent = STARTING_POINT, lags_list = LAGS)

In [None]:
performance_exog_log.to_excel(r"C:\Users\Caio\Documents\Documentos\IC - Cripto\output_data\ar_exog_log_performance_table.xlsx")

## Residual analysis

In [None]:
residuals = residuals.iloc[:,:5]

In [None]:
residuals_exog =  residuals_exog.iloc[:, :4]

In [None]:
residuals_exog_log = residuals_exog_log.iloc[:, :4]

### Scatter plot

In [None]:
sns.pairplot(residuals_exog)

plt.tight_layout()

In [None]:
sns.pairplot(residuals_log)

plt.tight_layout()

In [None]:
sns.pairplot(residuals_exog_log)

plt.tight_layout()

### Residuals Evolution in Time

In [None]:
functions.plot_residuals(residuals)

In [None]:
functions.plot_residuals(residuals_exog)

In [None]:
functions.plot_residuals(residuals_log)

In [None]:
functions.plot_residuals(residuals_exog_log)