# Bibliotecas

In [None]:
import yfinance as yf
import pandas as pd

In [None]:
# símbolos das empresas e intervalo de datas
symbols = ['MSFT', 'GOOGL', 'TSLA', 'AMZN', 'AAPL', 'META', 'NVDA']

start_date = '2015-01-01'
end_date = '2023-11-22'


data = yf.download(symbols, start=start_date, end=end_date)

[*********************100%%**********************]  7 of 7 completed


In [None]:
data_adj_close = data.iloc[:, data.columns.get_level_values(0)=='Adj Close']
data_adj_close.columns = data_adj_close.columns.droplevel()

data_adj_close.head()

Ticker,AAPL,AMZN,GOOGL,META,MSFT,NVDA,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-01-02,24.466448,15.426,26.477501,78.449997,40.452732,4.832805,14.620667
2015-01-05,23.777187,15.1095,25.973,77.190002,40.080734,4.75118,14.006
2015-01-06,23.779428,14.7645,25.332001,76.150002,39.492466,4.607129,14.085333
2015-01-07,24.112869,14.921,25.2575,76.150002,39.994236,4.595127,14.063333
2015-01-08,25.039343,15.023,25.345501,78.18,41.170784,4.767984,14.041333


# Teste Estacionaridade



Dois testes para garantir que a série foi caracterizada corretamente
- Teste de Dickey Fuller Aumentado (ADF)

Interpretações:

ADF < valor crítico e p-valor < nível de significância (padrão 0,05): rejeita-se a hipótese nula de que a série possui raiz unitária e considera-se que a série é estacionária

ADF > valor crítico e p-valor > nível de significância (padrão 0,05): não se rejeita a hipótese nula de que a série possui raiz unitária e considera-se que a série não é estacionária.

ADF ~ valor crítico e p-valor ~ nível de significância (padrão 0,05): a série pode ser considerada marginalmente estacionária ou a decisão pode ser inconclusiva. Serão necessários outros testes;

- Teste de Phillips-Perron

O teste de Phillips-Perron se diferencia principalmente na forma como lida com a autocorrelação e a heterocedasticidade (variabilidade desigual) nos termos de erro da série temporal.

In [None]:
!pip install arch



In [None]:
from statsmodels.tsa.stattools import adfuller
from arch.unitroot import PhillipsPerron

def estacionaridade(dataframe):

  results_adfuller = {}
  results_pperron = {}

  for column in dataframe:
    results_adfuller[column] = adfuller(dataframe.loc[:,column])
    results_pperron[column] = PhillipsPerron(dataframe.loc[:,column])

  for index, result, result2 in zip(results_adfuller.keys(), results_adfuller.values(), results_pperron.values()):
    if result[1] < 0.05 and result2.pvalue < 0.05:
      print(f'{index} Adj Close -> Estacionária')
    else:
      print(f'{index} Adj Close -> Não Estacionária')

In [None]:
estacionaridade(data_adj_close)

AAPL Adj Close -> Não Estacionária
AMZN Adj Close -> Não Estacionária
GOOGL Adj Close -> Não Estacionária
META Adj Close -> Não Estacionária
MSFT Adj Close -> Não Estacionária
NVDA Adj Close -> Não Estacionária
TSLA Adj Close -> Não Estacionária


In [None]:
# Operação de Diff para tornar as séries estacionárias
data_adj_close_diff = data_adj_close.diff().dropna()

In [None]:
estacionaridade(data_adj_close_diff)

AAPL Adj Close -> Estacionária
AMZN Adj Close -> Estacionária
GOOGL Adj Close -> Estacionária
META Adj Close -> Estacionária
MSFT Adj Close -> Estacionária
NVDA Adj Close -> Estacionária
TSLA Adj Close -> Estacionária


# Critérios de Avaliação do Modelo

In [None]:
!pip install yfinance pmdarima



In [None]:
!pip install -q sktime

In [None]:
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.theta import ThetaForecaster
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots
from prophet import Prophet

In [None]:
import numpy as np

def calcula_smape(actual, predicted) -> float:

    # Convert actual and predicted to numpy
    # array data type if not already
    if not all([isinstance(actual, np.ndarray),
                isinstance(predicted, np.ndarray)]):
        actual, predicted = np.array(actual), np.array(predicted)

    return round(
        np.mean(
            np.abs(predicted - actual) /
            ((np.abs(predicted) + np.abs(actual))/2)
        )*100, 2
    )

In [None]:
mse_theta = []
mae_theta = []
smape_theta = []

mse_prophet = []
mae_prophet = []
smape_prophet = []

metricas = pd.DataFrame(columns=['Empresa', 'MAE', 'MSE','SMAPE', 'MAE', 'MSE', 'SMAPE','MAE','MSE','SMAPE'])

## Normalização

### Para Theta

In [None]:
data_adjd_close_mult = data_adj_close_diff.copy()
data_adjd_close_mult.reset_index(inplace=True)
#data_adjd_close_mult.set_index('Date', inplace=True)

def zero_to_one(value):
  return 0.1 if value == 0 else value

empresas = ['AAPL', 'AMZN', 'GOOGL', 'META', 'MSFT', 'NVDA', 'TSLA']

for empresa in empresas:
      min_value = data_adjd_close_mult[empresa].min()
      max_value = data_adjd_close_mult[empresa].max()
      data_adjd_close_mult[empresa] = (data_adjd_close_mult[empresa] - min_value) / (max_value - min_value)
      data_adjd_close_mult[empresa] = data_adjd_close_mult[empresa].apply(zero_to_one)

In [None]:
data_adjd_close_mult

Ticker,Date,AAPL,AMZN,GOOGL,META,MSFT,NVDA,TSLA
0,2015-01-05,0.432769,0.511353,0.538950,0.694681,0.524176,0.233443,0.487169
1,2015-01-06,0.463843,0.510624,0.533155,0.696501,0.519081,0.232801,0.494541
2,2015-01-07,0.478727,0.523447,0.557204,0.705104,0.544762,0.234160,0.493464
3,2015-01-08,0.505376,0.522054,0.564102,0.721896,0.560660,0.236062,0.493464
4,2015-01-09,0.464949,0.514933,0.547228,0.701464,0.524787,0.234481,0.490894
...,...,...,...,...,...,...,...,...
2232,2023-11-15,0.489357,0.452965,0.602819,0.675325,0.536475,0.155249,0.551374
2233,2023-11-16,0.540137,0.509985,0.658431,0.717346,0.686081,0.295206,0.395447
2234,2023-11-17,0.462844,0.579534,0.491595,0.712135,0.384040,0.215554,0.501239
2235,2023-11-20,0.542833,0.543737,0.600272,0.745885,0.711762,0.348617,0.507506


### Para Prophet

In [None]:
data_prophet = data_adj_close_diff.copy()
data_prophet.reset_index(inplace=True)

for empresa in empresas:
      min_value = data_prophet[empresa].min()
      max_value = data_prophet[empresa].max()
      data_prophet[empresa] = (data_prophet[empresa] - min_value) / (max_value - min_value)
      data_prophet[empresa] = data_prophet[empresa].apply(zero_to_one)

data_prophet

Ticker,Date,AAPL,AMZN,GOOGL,META,MSFT,NVDA,TSLA
0,2015-01-05,0.432769,0.511353,0.538950,0.694681,0.524176,0.233443,0.487169
1,2015-01-06,0.463843,0.510624,0.533155,0.696501,0.519081,0.232801,0.494541
2,2015-01-07,0.478727,0.523447,0.557204,0.705104,0.544762,0.234160,0.493464
3,2015-01-08,0.505376,0.522054,0.564102,0.721896,0.560660,0.236062,0.493464
4,2015-01-09,0.464949,0.514933,0.547228,0.701464,0.524787,0.234481,0.490894
...,...,...,...,...,...,...,...,...
2232,2023-11-15,0.489357,0.452965,0.602819,0.675325,0.536475,0.155249,0.551374
2233,2023-11-16,0.540137,0.509985,0.658431,0.717346,0.686081,0.295206,0.395447
2234,2023-11-17,0.462844,0.579534,0.491595,0.712135,0.384040,0.215554,0.501239
2235,2023-11-20,0.542833,0.543737,0.600272,0.745885,0.711762,0.348617,0.507506


In [None]:
data_prophet['ds'] = data_prophet['Date']
data_prophet = data_prophet.drop(columns=['Date'])

data_prophet['y'] = data_prophet['AAPL']
data_prophet['y_AMZN'] = data_prophet['AMZN']
data_prophet['y_GOOGL'] = data_prophet['GOOGL']
data_prophet['y_META'] = data_prophet['META']
data_prophet['y_MSFT'] = data_prophet['MSFT']
data_prophet['y_NVDA'] = data_prophet['NVDA']
data_prophet['y_TSLA'] = data_prophet['TSLA']

###Para Arima

In [None]:
data_arima = data_adj_close_diff.copy()
data_arima.reset_index(inplace=True)

for empresa in empresas:
      min_value = data_arima[empresa].min()
      max_value = data_arima[empresa].max()
      data_arima[empresa] = (data_arima[empresa] - min_value) / (max_value - min_value)
      data_arima[empresa] = data_arima[empresa].apply(zero_to_one)

data_arima

Ticker,Date,AAPL,AMZN,GOOGL,META,MSFT,NVDA,TSLA
0,2015-01-05,0.432769,0.511353,0.538950,0.694681,0.524176,0.233443,0.487169
1,2015-01-06,0.463843,0.510624,0.533155,0.696501,0.519081,0.232801,0.494541
2,2015-01-07,0.478727,0.523447,0.557204,0.705104,0.544762,0.234160,0.493464
3,2015-01-08,0.505376,0.522054,0.564102,0.721896,0.560660,0.236062,0.493464
4,2015-01-09,0.464949,0.514933,0.547228,0.701464,0.524787,0.234481,0.490894
...,...,...,...,...,...,...,...,...
2232,2023-11-15,0.489357,0.452965,0.602819,0.675325,0.536475,0.155249,0.551374
2233,2023-11-16,0.540137,0.509985,0.658431,0.717346,0.686081,0.295206,0.395447
2234,2023-11-17,0.462844,0.579534,0.491595,0.712135,0.384040,0.215554,0.501239
2235,2023-11-20,0.542833,0.543737,0.600272,0.745885,0.711762,0.348617,0.507506


# Predição por Ação

In [None]:
#Arima
autoarima_models = {}
symbols = ['MSFT', 'GOOGL', 'TSLA', 'AMZN', 'AAPL', 'META', 'NVDA']
for symbol in symbols:
    print(f"Modelando {symbol} com AutoARIMA...")

    # Selecionando a série temporal normalizada para a ação atual
    series = data_arima[symbol].dropna()

    # Ajustando o modelo AutoARIMA
    model = auto_arima(series, seasonal=False, stepwise=True, trace=True, error_action='ignore', suppress_warnings=True)

    autoarima_models[symbol] = model

    print(model.summary())

Modelando MSFT com AutoARIMA...
Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=11.06 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=3610.017, Time=0.23 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=-3379.701, Time=0.19 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=1310.307, Time=0.45 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=inf, Time=1.46 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=inf, Time=4.23 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=-5083.582, Time=1.86 sec
 ARIMA(3,0,1)(0,0,0)[0]             : AIC=-5094.705, Time=4.42 sec
 ARIMA(3,0,0)(0,0,0)[0]             : AIC=inf, Time=3.08 sec
 ARIMA(4,0,1)(0,0,0)[0]             : AIC=inf, Time=4.94 sec
 ARIMA(3,0,2)(0,0,0)[0]             : AIC=inf, Time=9.59 sec
 ARIMA(4,0,0)(0,0,0)[0]             : AIC=inf, Time=0.62 sec
 ARIMA(4,0,2)(0,0,0)[0]             : AIC=inf, Time=4.18 sec
 ARIMA(3,0,1)(0,0,0)[0] intercept   : AIC=-5134.091, Time=2.12 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   

## Modelo

### Apple



In [None]:
import plotly.graph_objects as go
from pmdarima import auto_arima

In [None]:
# Theta
dados = data_adjd_close_mult['AAPL']

# Prophet

In [None]:
#Divisão conjunto treino/teste
X_train_1, X_test_1 = temporal_train_test_split(dados, test_size=0.30)
X_train_2, X_test_2 = temporal_train_test_split(data_prophet, test_size=0.30)

# Theta
fh_1 = np.arange(1, len(X_test_1) + 1)
forecaster = ThetaForecaster(sp=7) # sazonalidade (nesse caso, 1 semana)

forecaster.fit(X_train_1)
X_pred_1 = forecaster.predict(fh_1)

# Prophet
model_AAPL = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model_AAPL.add_seasonality(name='custom_daily', period=1, fourier_order=1)
model_AAPL.fit(X_train_2[['ds', 'y']])

future = model_AAPL.make_future_dataframe(periods=len(X_test_2['y']), freq='D', include_history=False)
X_pred_2 = model_AAPL.predict(future)



DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/auto9xje.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/cho6938s.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=44259', 'data', 'file=/tmp/tmp8ggp_o_q/auto9xje.json', 'init=/tmp/tmp8ggp_o_q/cho6938s.json', 'output', 'file=/tmp/tmp8ggp_o_q/prophet_model54kdv4t_/prophet_model-20240125203018.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:30:18 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:30:18 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots

#Arima ainda para apple


appl_data = data_arima['AAPL']


# divisão de treinamento e teste
train_size = int(len(appl_data) * 0.7)
train, test = appl_data[:train_size], appl_data[train_size:]

# ajuste do modelo AutoARIMA
model = auto_arima(train, seasonal=False, stepwise=True, error_action='ignore', suppress_warnings=True)
predictions = model.predict(n_periods=len(test))

# métricas de desempenho
mse_arima = mean_squared_error(test, predictions)
mae_arima = mean_absolute_error(test, predictions)
smape_arima = calcula_smape(test, predictions)

print(f"Desempenho do Modelo para 'MSFT':")
print(f"MSE: {mse_arima}")
print(f"MAE: {mae_arima}")
print(f"SMAPE: {smape_arima}")


Desempenho do Modelo para 'MSFT':
MSE: 0.014339859124507384
MAE: 0.09013316355936565
SMAPE: 20.21


In [None]:
X_pred_2

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,custom_daily,custom_daily_lower,custom_daily_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-03-24,0.456735,0.403164,0.543396,0.456735,0.456735,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.472055
1,2021-03-25,0.456741,0.400614,0.541464,0.456741,0.456741,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.472062
2,2021-03-26,0.456748,0.393147,0.543195,0.456748,0.456748,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.472068
3,2021-03-27,0.456754,0.399920,0.540755,0.456754,0.456754,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.472074
4,2021-03-28,0.456760,0.399716,0.544257,0.456760,0.456760,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.472081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,2023-01-20,0.460913,0.399214,0.544278,0.460635,0.461200,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.476234
668,2023-01-21,0.460920,0.403270,0.550378,0.460641,0.461207,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.476240
669,2023-01-22,0.460926,0.400724,0.547314,0.460647,0.461214,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.476246
670,2023-01-23,0.460932,0.403831,0.549448,0.460652,0.461222,0.01532,0.01532,0.01532,0.01532,0.01532,0.01532,0.0,0.0,0.0,0.476253


In [None]:
#Plotagem do modelo Theta para a Apple

fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_test_1,
    name = 'Apple',
    marker=dict(color='#4682B4')
),col = 1, row = 1)
fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_pred_1,
    name = 'Previsto - Apple',
    marker=dict(color='#FFFF00'))
    ,col = 1, row = 1)

fig.update_layout(
    template = 'plotly_dark',
    title=dict(
        text = 'Modelo Theta',
        x = 0.5,
        font = dict(
            family = 'Arial Black',
            size = 25,
            color = 'White'
        )
    ),
    height=500, width=900,
    font=dict(
        family="Arial",
        size=16,
        color="White"
    )
)

In [None]:
mse = mean_squared_error(X_test_1, X_pred_1)
mae = mean_absolute_error(X_test_1, X_pred_1)
smape = calcula_smape(X_test_1, X_pred_1)

mse_prophet = mean_squared_error(X_test_2['y'], X_pred_2['yhat'])
mae_prophet = mean_absolute_error(X_test_2['y'], X_pred_2['yhat'])
smape_prophet = calcula_smape(X_test_2['y'], X_pred_2['yhat'])

metricas.loc[0] = ['Apple', mae, mse, smape, mae_prophet, mse_prophet, smape_prophet,mae_arima,mse_arima,smape_arima]
#metricas.loc[1] = ['Apple - Prophet', mae_prophet, mse_prophet, smape_prophet]

In [None]:
metricas

Unnamed: 0,Empresa,MAE,MSE,SMAPE,MAE.1,MSE.1,SMAPE.1,MAE.2,MSE.2,SMAPE.2
0,Apple,0.090052,0.014368,20.18,0.090024,0.014369,20.17,0.090133,0.01434,20.21


### Amazon

In [None]:
data_prophet = data_prophet.rename(columns={"y":"y_AAPL","y_AMZN":"y"})
dados = data_adjd_close_mult['AMZN']

In [None]:
#Divisão conjunto treino/teste
X_train_1, X_test_1 = temporal_train_test_split(dados, test_size=0.30)
X_train_2, X_test_2 = temporal_train_test_split(data_prophet, test_size=0.30)

# Theta
fh_1 = np.arange(1, len(X_test_1) + 1)
forecaster = ThetaForecaster(sp=7) # sazonalidade (nesse caso, 1 semana)

forecaster.fit(X_train_1)
X_pred_1 = forecaster.predict(fh_1)

# Prophet
model_AMZN = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model_AMZN.add_seasonality(name='custom_daily', period=1, fourier_order=1)  # Adicione sazonalidade diária se necessário
model_AMZN.fit(X_train_2[['ds', 'y']])

future = model_AMZN.make_future_dataframe(periods=len(X_test_2['y']), freq='D', include_history=False)
X_pred_2 = model_AMZN.predict(future)

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/hn24m2wa.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/3dl8yqw6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=6507', 'data', 'file=/tmp/tmp8ggp_o_q/hn24m2wa.json', 'init=/tmp/tmp8ggp_o_q/3dl8yqw6.json', 'output', 'file=/tmp/tmp8ggp_o_q/prophet_modeljhce1sy0/prophet_model-20240125203041.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:30:41 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:30:42 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots

#Arima ainda para apple


amzn_data = data_arima['AMZN']


# divisão de treinamento e teste
train_size = int(len(amzn_data) * 0.7)
train, test = amzn_data[:train_size], amzn_data[train_size:]

# ajuste do modelo AutoARIMA
model = auto_arima(train, seasonal=False, stepwise=True, error_action='ignore', suppress_warnings=True)
predictions = model.predict(n_periods=len(test))

# métricas de desempenho
mse_arima = mean_squared_error(test, predictions)
mae_arima = mean_absolute_error(test, predictions)
smape_arima = calcula_smape(test, predictions)

print(f"Desempenho do Modelo para 'MSFT':")
print(f"MSE: {mse_arima}")
print(f"MAE: {mae_arima}")
print(f"SMAPE: {smape_arima}")


Desempenho do Modelo para 'MSFT':
MSE: 0.006330149360815937
MAE: 0.058008358067581096
SMAPE: 11.41


In [None]:
#Plotagem do modelo Theta para a AMZN

fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_test_1,
    name = 'AMZN',
    marker=dict(color='#4682B4')
),col = 1, row = 1)
fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_pred_1,
    name = 'Previsto - AMZN',
    marker=dict(color='#FFFF00'))
    ,col = 1, row = 1)


fig.update_layout(
    template = 'plotly_dark',
    title=dict(
        text = 'Modelo Theta',
        x = 0.5,
        font = dict(
            family = 'Arial Black',
            size = 25,
            color = 'White'
        )
    ),
    height=500, width=900,
    font=dict(
        family="Arial",
        size=16,
        color="White"
    )
)

In [None]:
mse = mean_squared_error(X_test_1, X_pred_1)
mae = mean_absolute_error(X_test_1, X_pred_1)
smape = calcula_smape(X_test_1, X_pred_1)

mse_prophet = mean_squared_error(X_test_2['y'], X_pred_2['yhat'])
mae_prophet = mean_absolute_error(X_test_2['y'], X_pred_2['yhat'])
smape_prophet = calcula_smape(X_test_2['y'], X_pred_2['yhat'])

metricas.loc[1] = ['Amazon', mae, mse, smape, mae_prophet, mse_prophet, smape_prophet,mae_arima,mse_arima,smape_arima]
#metricas.loc[3] = ['Amazon- Prophet', mae_prophet, mse_prophet, smape_prophet]

###GOOGL

In [None]:
data_prophet = data_prophet.rename(columns={"y":"y_AMZN","y_GOOGL":"y"})
dados = data_adjd_close_mult['GOOGL']

In [None]:
#Divisão conjunto treino/teste
X_train_1, X_test_1 = temporal_train_test_split(dados, test_size=0.30)
X_train_2, X_test_2 = temporal_train_test_split(data_prophet, test_size=0.30)

# Theta
fh_1 = np.arange(1, len(X_test_1) + 1)
forecaster = ThetaForecaster(sp=7) # sazonalidade (nesse caso, 1 semana)

forecaster.fit(X_train_1)
X_pred_1 = forecaster.predict(fh_1)

# Prophet
model_GOOGL = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model_GOOGL.add_seasonality(name='custom_daily', period=1, fourier_order=1)  # Adicione sazonalidade diária se necessário
model_GOOGL.fit(X_train_2[['ds', 'y']])

future = model_GOOGL.make_future_dataframe(periods=len(X_test_2['y']), freq='D', include_history=False)
X_pred_2 = model_GOOGL.predict(future)

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/53xisab3.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/n331thya.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=25981', 'data', 'file=/tmp/tmp8ggp_o_q/53xisab3.json', 'init=/tmp/tmp8ggp_o_q/n331thya.json', 'output', 'file=/tmp/tmp8ggp_o_q/prophet_model5c4d_rlu/prophet_model-20240125203103.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:31:03 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:31:03 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots

#Arima ainda para apple


data = data_arima['GOOGL']


# divisão de treinamento e teste
train_size = int(len(data) * 0.7)
train, test = data[:train_size], data[train_size:]

# ajuste do modelo AutoARIMA
model = auto_arima(train, seasonal=False, stepwise=True, error_action='ignore', suppress_warnings=True)
predictions = model.predict(n_periods=len(test))

# métricas de desempenho
mse_arima = mean_squared_error(test, predictions)
mae_arima = mean_absolute_error(test, predictions)
smape_arima = calcula_smape(test, predictions)

print(f"Desempenho do Modelo para 'MSFT':")
print(f"MSE: {mse_arima}")
print(f"MAE: {mae_arima}")
print(f"SMAPE: {smape_arima}")


Desempenho do Modelo para 'MSFT':
MSE: 0.009716769257878136
MAE: 0.0730978703958565
SMAPE: 13.32


In [None]:
#Plotagem do modelo Theta para a Google

fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_test_1,
    name = 'Google',
    marker=dict(color='#4682B4')
),col = 1, row = 1)
fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_pred_1,
    name = 'Previsto - Google',
    marker=dict(color='#FFFF00'))
    ,col = 1, row = 1)


fig.update_layout(
    template = 'plotly_dark',
    title=dict(
        text = 'Modelo Theta',
        x = 0.5,
        font = dict(
            family = 'Arial Black',
            size = 25,
            color = 'White'
        )
    ),
    height=500, width=900,
    font=dict(
        family="Arial",
        size=16,
        color="White"
    )
)

In [None]:
mse = mean_squared_error(X_test_1, X_pred_1)
mae = mean_absolute_error(X_test_1, X_pred_1)
smape = calcula_smape(X_test_1, X_pred_1)

mse_prophet = mean_squared_error(X_test_2['y'], X_pred_2['yhat'])
mae_prophet = mean_absolute_error(X_test_2['y'], X_pred_2['yhat'])
smape_prophet = calcula_smape(X_test_2['y'], X_pred_2['yhat'])

metricas.loc[2] = ['Google', mae, mse, smape, mae_prophet, mse_prophet, smape_prophet,mae_arima,mse_arima,smape_arima]
#metricas.loc[5] = ['Google - Prophet', mae_prophet, mse_prophet, smape_prophet]

###META

In [None]:
data_prophet = data_prophet.rename(columns={"y":"y_GOOGL","y_META":"y"})
dados = data_adjd_close_mult['META']

In [None]:
#Divisão conjunto treino/teste
X_train_1, X_test_1 = temporal_train_test_split(dados, test_size=0.30)
X_train_2, X_test_2 = temporal_train_test_split(data_prophet, test_size=0.30)

# Theta
fh_1 = np.arange(1, len(X_test_1) + 1)
forecaster = ThetaForecaster(sp=7) # sazonalidade (nesse caso, 1 semana)

forecaster.fit(X_train_1)
X_pred_1 = forecaster.predict(fh_1)

# Prophet
model_META = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model_META.add_seasonality(name='custom_daily', period=1, fourier_order=1)  # Adicione sazonalidade diária se necessário
model_META.fit(X_train_2[['ds', 'y']])

future = model_META.make_future_dataframe(periods=len(X_test_2['y']), freq='D', include_history=False)
X_pred_2 = model_META.predict(future)

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/cnl0lyzk.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/bqjxbshv.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=45230', 'data', 'file=/tmp/tmp8ggp_o_q/cnl0lyzk.json', 'init=/tmp/tmp8ggp_o_q/bqjxbshv.json', 'output', 'file=/tmp/tmp8ggp_o_q/prophet_modelfivk54f0/prophet_model-20240125203143.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:31:43 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:31:43 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots

#Arima ainda para apple


data = data_arima['META']


# divisão de treinamento e teste
train_size = int(len(data) * 0.7)
train, test = data[:train_size], data[train_size:]

# ajuste do modelo AutoARIMA
model = auto_arima(train, seasonal=False, stepwise=True, error_action='ignore', suppress_warnings=True)
predictions = model.predict(n_periods=len(test))

# métricas de desempenho
mse_arima = mean_squared_error(test, predictions)
mae_arima = mean_absolute_error(test, predictions)
smape_arima = calcula_smape(test, predictions)

print(f"Desempenho do Modelo para 'MSFT':")
print(f"MSE: {mse_arima}")
print(f"MAE: {mae_arima}")
print(f"SMAPE: {smape_arima}")


Desempenho do Modelo para 'MSFT':
MSE: 0.0030254424969691886
MAE: 0.037150679925606465
SMAPE: 5.37


In [None]:
#Plotagem do modelo Theta para a Google

fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_test_1,
    name = 'META',
    marker=dict(color='#4682B4')
),col = 1, row = 1)
fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_pred_1,
    name = 'Previsto - META',
    marker=dict(color='#FFFF00'))
    ,col = 1, row = 1)


fig.update_layout(
    template = 'plotly_dark',
    title=dict(
        text = 'Modelo Theta',
        x = 0.5,
        font = dict(
            family = 'Arial Black',
            size = 25,
            color = 'White'
        )
    ),
    height=500, width=900,
    font=dict(
        family="Arial",
        size=16,
        color="White"
    )
)

In [None]:
mse = mean_squared_error(X_test_1, X_pred_1)
mae = mean_absolute_error(X_test_1, X_pred_1)
smape = calcula_smape(X_test_1, X_pred_1)

mse_prophet = mean_squared_error(X_test_2['y'], X_pred_2['yhat'])
mae_prophet = mean_absolute_error(X_test_2['y'], X_pred_2['yhat'])
smape_prophet = calcula_smape(X_test_2['y'], X_pred_2['yhat'])

metricas.loc[3] = ['Meta', mae, mse, smape, mae_prophet, mse_prophet, smape_prophet,mae_arima,mse_arima,smape_arima]
#metricas.loc[7] = ['Meta - Prophet', mae_prophet, mse_prophet, smape_prophet]

###MSFT

In [None]:
dados = data_adjd_close_mult['MSFT']
data_prophet = data_prophet.rename(columns={"y":"y_META","y_MSFT":"y"})

In [None]:
#Divisão conjunto treino/teste
X_train_1, X_test_1 = temporal_train_test_split(dados, test_size=0.30)
X_train_2, X_test_2 = temporal_train_test_split(data_prophet, test_size=0.30)

# Theta
fh_1 = np.arange(1, len(X_test_1) + 1)
forecaster = ThetaForecaster(sp=7) # sazonalidade (nesse caso, 1 semana)

forecaster.fit(X_train_1)
X_pred_1 = forecaster.predict(fh_1)

# Prophet
model_MSFT = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model_MSFT.add_seasonality(name='custom_daily', period=1, fourier_order=1)  # Adicione sazonalidade diária se necessário
model_MSFT.fit(X_train_2[['ds', 'y']])

future = model_MSFT.make_future_dataframe(periods=len(X_test_2['y']), freq='D', include_history=False)
X_pred_2 = model_MSFT.predict(future)

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/why2we8a.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/l4dua6zo.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=48477', 'data', 'file=/tmp/tmp8ggp_o_q/why2we8a.json', 'init=/tmp/tmp8ggp_o_q/l4dua6zo.json', 'output', 'file=/tmp/tmp8ggp_o_q/prophet_modelk4rj1uhm/prophet_model-20240125203158.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:31:58 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:31:58 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots

#Arima ainda para apple


data = data_arima['MSFT']


# divisão de treinamento e teste
train_size = int(len(data) * 0.7)
train, test = data[:train_size], data[train_size:]

# ajuste do modelo AutoARIMA
model = auto_arima(train, seasonal=False, stepwise=True, error_action='ignore', suppress_warnings=True)
predictions = model.predict(n_periods=len(test))

# métricas de desempenho
mse_arima = mean_squared_error(test, predictions)
mae_arima = mean_absolute_error(test, predictions)
smape_arima = calcula_smape(test, predictions)

print(f"Desempenho do Modelo para 'MSFT':")
print(f"MSE: {mse_arima}")
print(f"MAE: {mae_arima}")
print(f"SMAPE: {smape_arima}")


Desempenho do Modelo para 'MSFT':
MSE: 0.013536560406313765
MAE: 0.08819661388109974
SMAPE: 16.91


In [None]:
#Plotagem do modelo Theta para a Google

fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_test_1,
    name = 'MSFT',
    marker=dict(color='#4682B4')
),col = 1, row = 1)
fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_pred_1,
    name = 'Previsto - MSFT',
    marker=dict(color='#FFFF00'))
    ,col = 1, row = 1)


fig.update_layout(
    template = 'plotly_dark',
    title=dict(
        text = 'Modelo Theta',
        x = 0.5,
        font = dict(
            family = 'Arial Black',
            size = 25,
            color = 'White'
        )
    ),
    height=500, width=900,
    font=dict(
        family="Arial",
        size=16,
        color="White"
    )
)

In [None]:
mse = mean_squared_error(X_test_1, X_pred_1)
mae = mean_absolute_error(X_test_1, X_pred_1)
smape = calcula_smape(X_test_1, X_pred_1)

mse_prophet = mean_squared_error(X_test_2['y'], X_pred_2['yhat'])
mae_prophet = mean_absolute_error(X_test_2['y'], X_pred_2['yhat'])
smape_prophet = calcula_smape(X_test_2['y'], X_pred_2['yhat'])

metricas.loc[4] = ['Microsoft', mae, mse, smape, mae_prophet, mse_prophet, smape_prophet,mae_arima,mse_arima,smape_arima]
#metricas.loc[9] = ['Microsoft - Prophet', mae_prophet, mse_prophet, smape_prophet]

###NVDA

In [None]:
dados = data_adjd_close_mult['NVDA']
data_prophet = data_prophet.rename(columns={"y":"y_MSFT","y_NVDA":"y"})

In [None]:
#Divisão conjunto treino/teste
X_train_1, X_test_1 = temporal_train_test_split(dados, test_size=0.30)
X_train_2, X_test_2 = temporal_train_test_split(data_prophet, test_size=0.30)

# Theta
fh_1 = np.arange(1, len(X_test_1) + 1)
forecaster = ThetaForecaster(sp=7) # sazonalidade (nesse caso, 1 semana)

forecaster.fit(X_train_1)
X_pred_1 = forecaster.predict(fh_1)

# Prophet
model_NVDA = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model_NVDA.add_seasonality(name='custom_daily', period=1, fourier_order=1)  # Adicione sazonalidade diária se necessário
model_NVDA.fit(X_train_2[['ds', 'y']])

future = model_NVDA.make_future_dataframe(periods=len(X_test_2['y']), freq='D', include_history=False)
X_pred_2 = model_NVDA.predict(future)

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/og5b7ciy.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/i78ola06.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=19070', 'data', 'file=/tmp/tmp8ggp_o_q/og5b7ciy.json', 'init=/tmp/tmp8ggp_o_q/i78ola06.json', 'output', 'file=/tmp/tmp8ggp_o_q/prophet_modeldndybm83/prophet_model-20240125203230.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:32:30 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:32:30 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots

#Arima ainda para apple


data = data_arima['NVDA']


# divisão de treinamento e teste
train_size = int(len(data) * 0.7)
train, test = data[:train_size], data[train_size:]

# ajuste do modelo AutoARIMA
model = auto_arima(train, seasonal=False, stepwise=True, error_action='ignore', suppress_warnings=True)
predictions = model.predict(n_periods=len(test))

# métricas de desempenho
mse_arima = mean_squared_error(test, predictions)
mae_arima = mean_absolute_error(test, predictions)
smape_arima = calcula_smape(test, predictions)

print(f"Desempenho do Modelo para 'MSFT':")
print(f"MSE: {mse_arima}")
print(f"MAE: {mae_arima}")
print(f"SMAPE: {smape_arima}")


Desempenho do Modelo para 'MSFT':
MSE: 0.00748323172267768
MAE: 0.06185440680020747
SMAPE: 27.19


In [None]:
#Plotagem do modelo Theta para a Google

fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_test_1,
    name = 'NVDA',
    marker=dict(color='#4682B4')
),col = 1, row = 1)
fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_pred_1,
    name = 'Previsto - NVDA',
    marker=dict(color='#FFFF00'))
    ,col = 1, row = 1)


fig.update_layout(
    template = 'plotly_dark',
    title=dict(
        text = 'Modelo Theta',
        x = 0.5,
        font = dict(
            family = 'Arial Black',
            size = 25,
            color = 'White'
        )
    ),
    height=500, width=900,
    font=dict(
        family="Arial",
        size=16,
        color="White"
    )
)

In [None]:
mse = mean_squared_error(X_test_1, X_pred_1)
mae = mean_absolute_error(X_test_1, X_pred_1)
smape = calcula_smape(X_test_1, X_pred_1)

mse_prophet = mean_squared_error(X_test_2['y'], X_pred_2['yhat'])
mae_prophet = mean_absolute_error(X_test_2['y'], X_pred_2['yhat'])
smape_prophet = calcula_smape(X_test_2['y'], X_pred_2['yhat'])

metricas.loc[6] = ['Nvidia', mae, mse, smape, mae_prophet, mse_prophet, smape_prophet,mae_arima,mse_arima,smape_arima]
#metricas.loc[11] = ['Nvidia - Prophet', mae_prophet, mse_prophet, smape_prophet]

###TSLA

In [None]:
dados = data_adjd_close_mult['TSLA']
data_prophet = data_prophet.rename(columns={"y":"y_NVDA","y_TSLA":"y"})

In [None]:
#Divisão conjunto treino/teste
X_train_1, X_test_1 = temporal_train_test_split(dados, test_size=0.30)
X_train_2, X_test_2 = temporal_train_test_split(data_prophet, test_size=0.30)

# Theta
fh_1 = np.arange(1, len(X_test_1) + 1)
forecaster = ThetaForecaster(sp=7) # sazonalidade (nesse caso, 1 semana)

forecaster.fit(X_train_1)
X_pred_1 = forecaster.predict(fh_1)

# Prophet
model_TSLA = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model_TSLA.add_seasonality(name='custom_daily', period=1, fourier_order=1)  # Adicione sazonalidade diária se necessário
model_TSLA.fit(X_train_2[['ds', 'y']])

future = model_TSLA.make_future_dataframe(periods=len(X_test_2['y']), freq='D', include_history=False)
X_pred_2 = model_TSLA.predict(future)

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/ooxjscxi.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8ggp_o_q/p96__im_.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=96796', 'data', 'file=/tmp/tmp8ggp_o_q/ooxjscxi.json', 'init=/tmp/tmp8ggp_o_q/p96__im_.json', 'output', 'file=/tmp/tmp8ggp_o_q/prophet_modeljwqehsfy/prophet_model-20240125203303.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:33:03 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:33:03 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from plotly.subplots import make_subplots

#Arima ainda para apple


data = data_arima['TSLA']


# divisão de treinamento e teste
train_size = int(len(data) * 0.7)
train, test = data[:train_size], data[train_size:]

# ajuste do modelo AutoARIMA
model = auto_arima(train, seasonal=False, stepwise=True, error_action='ignore', suppress_warnings=True)
predictions = model.predict(n_periods=len(test))

# métricas de desempenho
mse_arima = mean_squared_error(test, predictions)
mae_arima = mean_absolute_error(test, predictions)
smape_arima = calcula_smape(test, predictions)

print(f"Desempenho do Modelo para 'MSFT':")
print(f"MSE: {mse_arima}")
print(f"MAE: {mae_arima}")
print(f"SMAPE: {smape_arima}")


Desempenho do Modelo para 'MSFT':
MSE: 0.00974302564071728
MAE: 0.07153632481970074
SMAPE: 15.01


In [None]:
#Plotagem do modelo Theta para a Google

fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_test_1,
    name = 'TSLA',
    marker=dict(color='#4682B4')
),col = 1, row = 1)
fig.add_trace(go.Scatter(
    x=data_adjd_close_mult['Date'],
    y=X_pred_1,
    name = 'Previsto - TSLA',
    marker=dict(color='#FFFF00'))
    ,col = 1, row = 1)


fig.update_layout(
    template = 'plotly_dark',
    title=dict(
        text = 'Modelo Theta',
        x = 0.5,
        font = dict(
            family = 'Arial Black',
            size = 25,
            color = 'White'
        )
    ),
    height=500, width=900,
    font=dict(
        family="Arial",
        size=16,
        color="White"
    )
)

In [None]:
mse = mean_squared_error(X_test_1, X_pred_1)
mae = mean_absolute_error(X_test_1, X_pred_1)
smape = calcula_smape(X_test_1, X_pred_1)

mse_prophet = mean_squared_error(X_test_2['y'], X_pred_2['yhat'])
mae_prophet = mean_absolute_error(X_test_2['y'], X_pred_2['yhat'])
smape_prophet = calcula_smape(X_test_2['y'], X_pred_2['yhat'])

metricas.loc[6] = ['Tesla', mae, mse, smape, mae_prophet, mse_prophet, smape_prophet,mae_arima,mse_arima,smape_arima]
#metricas.loc[13] = ['Tesla - Prophet', mae_prophet, mse_prophet, smape_prophet]

#Métricas dos Modelos

* MAE

É a média do valor absoluto dos erros. O erro é a diferença entre os valores observados e os valores previstos.

Um MAE mais baixo indica um melhor ajuste do modelo aos dados. Como usa o valor absoluto, é menos sensível a outliers do que outras métricas.

* MSE

É a média dos quadrados dos erros. Assim como o MAE, o erro é a diferença entre os valores observados e previstos, mas neste caso, os erros são elevados ao quadrado.

Um MSE menor indica um melhor ajuste. Diferente do MAE, o MSE é mais sensível a outliers, pois os erros maiores são amplificados ao serem elevados ao quadrado.

* SMAPE

É uma medida de precisão baseada em porcentagens, que normaliza os erros absolutos.
O SMAPE varia de 0% a 100%, com valores mais baixos indicando maior precisão. É útil quando se quer comparar a precisão entre diferentes conjuntos de dados com diferentes escalas. O "simétrico" no nome vem do fato de tratar igualmente os desvios relativos, quer o valor previsto esteja acima ou abaixo do valor observado.

In [None]:
metricas

Unnamed: 0,Empresa,MAE,MSE,SMAPE,MAE.1,MSE.1,SMAPE.1,MAE.2,MSE.2,SMAPE.2
0,Apple,0.090052,0.014368,20.18,0.090024,0.014369,20.17,0.090133,0.01434,20.21
1,Amazon,0.058203,0.006372,11.45,0.058062,0.006343,11.42,0.058008,0.00633,11.41
2,Google,0.073138,0.009706,13.33,0.073145,0.009734,13.33,0.073098,0.009717,13.32
3,Meta,0.037223,0.003035,5.38,0.037216,0.003032,5.38,0.037151,0.003025,5.37
4,Microsoft,0.088338,0.013639,16.93,0.088246,0.013537,16.92,0.088197,0.013537,16.91
6,Tesla,0.071056,0.009738,14.9,0.071522,0.009847,14.98,0.071536,0.009743,15.01


In [None]:
metricas.columns = ['Empresa', 'MAE_Theta', 'MSE_Theta', 'SMAPE_Theta', 'MAE_Prophet', 'MSE_Prophet', 'SMAPE_Prophet','MAE_ARIMA','MSE_ARIMA','SMAPE_ARIMA']

# Dividir o DataFrame em três, um para cada modelo
df_theta = metricas[['Empresa', 'MAE_Theta', 'MSE_Theta', 'SMAPE_Theta']].copy()
df_theta.columns = ['Empresa', 'MAE', 'MSE', 'SMAPE']

df_prophet = metricas[['Empresa', 'MAE_Prophet', 'MSE_Prophet', 'SMAPE_Prophet']].copy()
df_prophet.columns = ['Empresa', 'MAE', 'MSE', 'SMAPE']

df_arima = metricas[['Empresa', 'MAE_ARIMA', 'MSE_ARIMA', 'SMAPE_ARIMA']].copy()
df_arima.columns = ['Empresa', 'MAE', 'MSE', 'SMAPE']

# Combinar os DataFrames para criar subcolunas
df_final = pd.concat([df_theta.set_index('Empresa'), df_prophet.set_index('Empresa'), df_arima.set_index('Empresa')],
                     axis=1,
                     keys=['Theta', 'Prophet', 'ARIMA'])
df_final

Unnamed: 0_level_0,Theta,Theta,Theta,Prophet,Prophet,Prophet,ARIMA,ARIMA,ARIMA
Unnamed: 0_level_1,MAE,MSE,SMAPE,MAE,MSE,SMAPE,MAE,MSE,SMAPE
Empresa,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Apple,0.090052,0.014368,20.18,0.090024,0.014369,20.17,0.090133,0.01434,20.21
Amazon,0.058203,0.006372,11.45,0.058062,0.006343,11.42,0.058008,0.00633,11.41
Google,0.073138,0.009706,13.33,0.073145,0.009734,13.33,0.073098,0.009717,13.32
Meta,0.037223,0.003035,5.38,0.037216,0.003032,5.38,0.037151,0.003025,5.37
Microsoft,0.088338,0.013639,16.93,0.088246,0.013537,16.92,0.088197,0.013537,16.91
Tesla,0.071056,0.009738,14.9,0.071522,0.009847,14.98,0.071536,0.009743,15.01


In [None]:
import pandas as pd

# Supondo que df_final seja seu DataFrame
# df_final = pd.DataFrame(...)

def highlight_min(s):
    """
    Destaca o menor valor em uma série com background-color.
    """
    is_min = s == s.min()
    return ['background-color: #4169e1' if v else '' for v in is_min]

styled_df = df_final.style.applymap(
    lambda x: '' if isinstance(x, (int, float)) and x > 0.1 else ''
).apply(
    highlight_min, axis=1
).format("{:.5f}").set_properties(**{
    # Propriedades adicionais aqui, se necessário
}).set_table_styles([{
    'selector': 'th',
    'props': [

        ('text-align', 'center'),
        ('font-weight', 'bold'),
        ('font-size', '20px')
    ]
}])

# Exibir styled_df
styled_df

Unnamed: 0_level_0,Theta,Theta,Theta,Prophet,Prophet,Prophet,ARIMA,ARIMA,ARIMA
Unnamed: 0_level_1,MAE,MSE,SMAPE,MAE,MSE,SMAPE,MAE,MSE,SMAPE
Empresa,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Apple,0.09005,0.01437,20.18,0.09002,0.01437,20.17,0.09013,0.01434,20.21
Amazon,0.0582,0.00637,11.45,0.05806,0.00634,11.42,0.05801,0.00633,11.41
Google,0.07314,0.00971,13.33,0.07314,0.00973,13.33,0.0731,0.00972,13.32
Meta,0.03722,0.00304,5.38,0.03722,0.00303,5.38,0.03715,0.00303,5.37
Microsoft,0.08834,0.01364,16.93,0.08825,0.01354,16.92,0.0882,0.01354,16.91
Tesla,0.07106,0.00974,14.9,0.07152,0.00985,14.98,0.07154,0.00974,15.01


##ARIMA (AutoRegressive Integrated Moving Average):

Visão Geral: ARIMA é um método amplamente usado para previsão de séries temporais. Ele combina modelos autorregressivos (AR) e de média móvel (MA) e integra a diferenciação para tornar a série temporal estacionária.

Componentes:

AR (AutoRegressivo): O modelo usa a relação entre uma observação e várias observações defasadas.

I (Integrado): Envolve a diferenciação da série temporal para torná-la estacionária, ou seja, sem tendências ou sazonalidades.

MA (Média Móvel): O modelo usa a dependência entre uma observação e um erro residual de um modelo de média móvel aplicado a observações defasadas.

Adequação: Melhor para dados de séries temporais que mostram evidências de não estacionariedade, onde um passo inicial de diferenciação pode ser usado para remover esta característica.

Complexidade: Pode ser mais complexo para entender e aplicar, especialmente na determinação da ordem correta de diferenciação e dos termos AR e MA.

##Theta

Mecanismo: O Modelo Theta faz uso de uma técnica chamada "decomposição theta", que envolve a decomposição de uma série temporal em duas ou mais linhas 'theta'. Essas linhas são basicamente séries temporais suavizadas com diferentes níveis de suavização.

Uso: É eficaz em dados de séries temporais que exibem um padrão sazonal forte. O modelo é particularmente útil para previsões de curto prazo.

Simplicidade: Uma das principais vantagens do Modelo Theta é sua simplicidade e facilidade de implementação.

Desempenho: Enquanto o modelo pode ser eficaz em muitos casos, ele pode não ser tão robusto quanto outras técnicas mais complexas em situações com padrões de dados altamente irregulares ou não lineares.

##Prophet

O Prophet foi desenvolvido pelo Facebook para previsão de séries temporais e é amplamente utilizado em aplicações comerciais.

Mecanismo: O Prophet é baseado em modelos aditivos onde as tendências não lineares são ajustadas às sazonalidades anuais, semanais e diárias, além de feriados. Usa um modelo de crescimento que pode ser linear ou logístico.

Uso: É particularmente útil para dados com padrões sazonais fortes e várias temporadas de dados históricos. O Prophet também lida bem com valores ausentes e mudanças na tendência dos dados.

Flexibilidade: O modelo é altamente configurável, permitindo ajustes finos para modelar sazonalidades e feriados. Ele também fornece uma maneira de adicionar componentes personalizados ao modelo.

Desempenho: Tende a funcionar muito bem em uma variedade de situações, especialmente em casos com complexidades, como tendências de mudança e sazonalidades múltiplas.

##Comparação

Complexidade: O Prophet é mais complexo e oferece mais flexibilidade e opções de configuração em comparação com o Modelo Theta mais simples.

Uso de Caso: O Prophet é geralmente preferido para conjuntos de dados mais complexos e situações que requerem modelagem de feriados e sazonalidades múltiplas, enquanto o Modelo Theta pode ser suficiente para previsões mais simples e de curto prazo.

Desempenho: O desempenho de cada modelo depende muito do tipo específico de série temporal sendo analisada. O Prophet muitas vezes se sai melhor em séries temporais mais complexas e irregulares.

