<a href="https://colab.research.google.com/github/MariaGabrielaAlvesZuppardo/SeriesTemporaisCin/blob/main/Lista_de_Exercicio_ARIMA_Not_Seasonal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install yfinance



In [2]:
!pip install pmdarima statsmodels scikit-learn



In [3]:
#!pip install --upgrade scikit-learn pmdarima

In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
from pandas import DataFrame
from datetime import datetime
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from math import sqrt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Load Data

In [None]:
start_date = datetime(2007, 3, 18)
end_date = datetime (2025, 3, 18)
ticker = 'BTC-USD'
df =yf.download(ticker, start=start_date, end=end_date)['Close']
df = df.rename(columns={ticker:'y'})
df = df.resample('ME').last()


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

In [None]:
df.plot()

## Train / Test

In [None]:
size = int(len(df) * 0.7)
train, test = df[0:size], df[size:len(df)]
h = len(test)
print(train.shape)
print(test.shape)
train.head()

# Analysis

In [None]:
def adf_test(series,title=''):
    """
    Pass in a time series and an optional title, returns an ADF report
    """
    print(f'Augmented Dickey-Fuller Test: {title}')
    dftest = adfuller(series.dropna(),autolag='AIC')

    labels = ['ADF test statistic','p-value','# lags used','# observations']
    out = pd.Series(dftest[0:4],index=labels)

    for key,val in dftest[4].items():
        out[f'critical value ({key})']=val

    print(out.to_string())

    if dftest[1] <= 0.05:
        print("Reject the null hypothesis")
        print("Data has no unit root and is stationary")
    else:
        print("Fail to reject the null hypothesis")
        print("Data has a unit root and is non-stationary")

def ljungbox_test(serie):
    """
    Aplica o teste de Ljung-Box para verificar autocorrelação nos resíduos.
    """
    from statsmodels.stats.diagnostic import acorr_ljungbox

    ljung_box_results = acorr_ljungbox(serie, return_df=True, lags=[20])

    p_value = ljung_box_results['lb_pvalue'].values[0]
    print('\nLJung-Box Test\nH0:has no autocorrelation\n')
    alpha = 0.05
    print(f'p-valor = {round(p_value,2)}\nalpha = {round(alpha,2)}')
    if p_value < alpha:
        print('Data has autocorrelation')
    else:
        print('Data has no autocorrelation')


In [None]:
adf_test(train)

## Definindo 'd'

### 1ª diff

In [None]:
diff1 = train.copy()
diff1['y'] = diff1['y'].diff(1)
diff1.dropna(inplace=True)
diff1.head()

In [None]:
fig, axes = plt.subplots(2, 2, )
axes[0, 0].plot(train); axes[0, 0].set_title('Original Series with Logarithm')
plot_acf(train, ax=axes[0, 1],lags=20)

axes[1, 0].plot(diff1); axes[1, 0].set_title('1st Order Differencing')
plot_acf(diff1, ax=axes[1, 1],lags=20)

# Rotaciona os rótulos do eixo X apenas nos gráficos à esquerda
for ax in axes[:, 0]:
    for label in ax.get_xticklabels():
        label.set_rotation(45)

plt.tight_layout()
plt.show()

In [None]:
adf_test(diff1)

* A série já parece bastante estacionária visualmente.

* A ACF mostra poucos lags significativos (quase todos dentro da banda de confiança).

* Isso indica que a 1ª diferenciação pode ser suficiente.

### 2ª diff

In [None]:
diff2 = diff1.copy()
diff2['y'] = diff2['y'].diff(1)
diff2.dropna(inplace=True)
diff2.head()

In [None]:
fig, axes = plt.subplots(1, 2)
axes[0].plot(diff2); axes[0].set_title('2st Differencing')
axes[1].set(ylim=(0,5))
plot_acf(diff2, ax=axes[1],lags=20)

# Rotaciona os rótulos do eixo X apenas nos gráficos à esquerda
for label in axes[0].get_xticklabels():
    label.set_rotation(45)
plt.tight_layout()
plt.show()

* A 2ª diferenciação ganhou ainda mais oscilação.
* Mais lags se tornam significativos negativamente, o que pode indicar superdiferenciação.

### Conclusão

* A 1ª diferenciação é suficiente

In [None]:
d = 1

## Definindo 'p'

In [None]:
fig, axes = plt.subplots(1, 2)
axes[0].plot(diff1); axes[0].set_title('1st Differencing')
axes[1].set(ylim=(0,5))
plot_pacf(diff1, ax=axes[1],lags=20)

# Rotaciona os rótulos do eixo X apenas nos gráficos à esquerda
for label in axes[0].get_xticklabels():
    label.set_rotation(45)
plt.tight_layout()
plt.show()

In [None]:
p = 5

## Definindo 'q'

In [None]:
fig, axes = plt.subplots(1, 2)
axes[0].plot(diff1); axes[0].set_title('1st Differencing')
axes[1].set(ylim=(0,1.2))
plot_acf(diff1, ax=axes[1], lags=20)

# Rotaciona os rótulos do eixo X apenas nos gráficos à esquerda
for label in axes[0].get_xticklabels():
    label.set_rotation(45)
plt.tight_layout()

plt.show()

In [None]:
ljungbox_test(diff1)

In [None]:
q = 7

# Modelo ARIMA

## Arima(5,1,7)

In [None]:
arima_517 = ARIMA(train, order=(5,1,7))
fitted_517 = arima_517.fit()

In [None]:
# summary of fit model
print(fitted_517.summary())
# residuals
resid_517 = DataFrame(fitted_517.resid)
# summary stats of residuals
print(resid_517.describe())

In [None]:
fig = fitted_517.plot_diagnostics(figsize=(14,10))
plt.show()

In [None]:
predictions_517 = predictions = fitted_517.forecast(len(test))
train['y'].plot(legend=True, label='TRAIN')
test['y'].plot(legend=True, label='TEST')
pd.DataFrame({'ds':test.index, 'y':predictions_517}).set_index('ds')['y'].plot(legend=True, label='PREDICTIONS', figsize=(12, 6))


# Modelo AR

## AR(5)

In [None]:
ar5 = ARIMA(train, order=(5,0,0))
fitted_ar5 = ar5.fit()

In [None]:
# summary of fit model
print(fitted_ar5.summary())
# residuals
resid_ar5 = DataFrame(fitted_ar5.resid)
# summary stats of residuals
print(resid_ar5.describe())
predictions_ar5 = predictions = fitted_ar5.forecast(len(test))
fig = fitted_ar5.plot_diagnostics(figsize=(14,10))
plt.show()

In [None]:
predictions_ar5 = predictions = fitted_ar5.forecast(len(test))
train['y'].plot(legend=True, label='TRAIN')
test['y'].plot(legend=True, label='TEST')
pd.DataFrame({'ds':test.index, 'y':predictions_ar5}).set_index('ds')['y'].plot(legend=True, label='PREDICTIONS', figsize=(12, 6))


# Modelo MA

## MA(7)

In [None]:
ma7 = ARIMA(train, order=(0,0,7))
fitted_ma7 = ma7.fit()


In [None]:
# summary of fit model
print(fitted_ma7.summary())
# residuals
resid_ma7 = DataFrame(fitted_ma7.resid)
# summary stats of residuals
print(resid_ma7.describe())
predictions_ma7 = predictions = fitted_ma7.forecast(len(test))
fig = fitted_ma7.plot_diagnostics(figsize=(14,10))
plt.show()

In [None]:
predictions_ma7 = predictions = fitted_ma7.forecast(len(test))
train['y'].plot(legend=True, label='TRAIN')
test['y'].plot(legend=True, label='TEST')
pd.DataFrame({'ds':test.index, 'y':predictions_ma7}).set_index('ds')['y'].plot(legend=True, label='PREDICTIONS', figsize=(12, 6))


# Conclusion

In [None]:
def metrics(test, predictions):
  # Cálculo das métricas de avaliação
  mse = mean_squared_error(test['y'], predictions)
  rmse = np.sqrt(mse)
  mae = mean_absolute_error(test['y'], predictions)
  r2 = r2_score(test['y'], predictions)

  # Calcular MAPE (Mean Absolute Percentage Error)
  mape = np.mean(np.abs((test['y'] - predictions) / test['y'])) * 100

  # Calcular SMAPE (Symmetric Mean Absolute Percentage Error)
  smape = np.mean(2 * np.abs(test['y'] - predictions) /
                (np.abs(test['y']) + np.abs(predictions))) * 100

  # Exibir as métricas
  print(f"MSE: {mse}")
  print(f"RMSE: {rmse}")
  print(f"MAE: {mae}")
  print(f"R²: {r2}")
  print(f"MAPE: {mape}")
  print(f"SMAPE: {smape}")


In [None]:
print('\nARIMA(5,1,7)')
metrics(test, predictions_517)

print('\nAR(5)')
metrics(test, predictions_ar5)

print('\nMA(7)')
metrics(test, predictions_ma7)