In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error,mean_absolute_percentage_error,r2_score
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.diagnostics import performance_metrics
from prophet.diagnostics import cross_validation

In [21]:
df = pd.read_csv('datasets/Ibovespa4anos.csv',parse_dates=['Data'])
display(df.head(20))





Unnamed: 0,Data,Último,Abertura,Máxima,Mínima,Vol.,Var%
0,2024-05-24,124.306,124.731,125.257,124.259,"9,21M","-0,34%"
1,2024-05-23,124.729,125.65,125.665,124.431,"9,99M","-0,73%"
2,2024-05-22,125.65,127.412,127.412,125.524,"12,40M","-1,38%"
3,2024-05-21,127.412,127.754,128.272,127.205,"9,14M","-0,27%"
4,2024-05-20,127.751,128.151,128.73,127.488,"9,34M","-0,31%"
5,2024-05-17,128.151,128.28,128.464,127.696,"10,26M","-0,10%"
6,2024-05-16,128.284,128.029,128.965,127.922,"9,86M","0,20%"
7,2024-05-15,128.028,128.514,128.646,127.029,"10,86M","-0,38%"
8,2024-05-14,128.515,128.155,128.965,127.962,"11,89M","0,28%"
9,2024-05-13,128.155,127.6,128.669,127.599,"8,92M","0,44%"


In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1094 entries, 0 to 1093
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Data      1094 non-null   datetime64[ns]
 1   Último    1094 non-null   float64       
 2   Abertura  1094 non-null   float64       
 3   Máxima    1094 non-null   float64       
 4   Mínima    1094 non-null   float64       
 5   Vol.      1094 non-null   object        
 6   Var%      1094 non-null   object        
dtypes: datetime64[ns](1), float64(4), object(2)
memory usage: 60.0+ KB


In [23]:
df_data = df[['Data','Último']]

In [24]:
df_data.rename(columns={'Data':'ds','Último':'y'},inplace=True)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [25]:
df_data.head()

Unnamed: 0,ds,y
0,2024-05-24,124.306
1,2024-05-23,124.729
2,2024-05-22,125.65
3,2024-05-21,127.412
4,2024-05-20,127.751


In [26]:
model = Prophet(interval_width=0.95)

In [27]:
model.fit(df_data)

17:00:17 - cmdstanpy - INFO - Chain [1] start processing
17:00:17 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1f0c7bc4140>

In [28]:
future = model.make_future_dataframe(periods=365, freq='D')
future.head()

Unnamed: 0,ds
0,2020-01-02
1,2020-01-03
2,2020-01-06
3,2020-01-07
4,2020-01-08


In [29]:
forecast = model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
1454,2025-05-20,152.698932,79.394461,228.417508
1455,2025-05-21,153.049915,79.3011,227.502635
1456,2025-05-22,153.432246,79.131559,226.50873
1457,2025-05-23,153.719238,79.187435,227.0661
1458,2025-05-24,153.450758,78.863851,228.322278


In [30]:
plot_plotly(model, forecast)

In [31]:
df_cv = cross_validation(model, initial='730 days', period='180 days', horizon = '30 days')
df_cv.head()


  0%|          | 0/5 [00:00<?, ?it/s]17:00:18 - cmdstanpy - INFO - Chain [1] start processing
17:00:18 - cmdstanpy - INFO - Chain [1] done processing
 20%|██        | 1/5 [00:00<00:01,  3.42it/s]17:00:18 - cmdstanpy - INFO - Chain [1] start processing
17:00:18 - cmdstanpy - INFO - Chain [1] done processing
 40%|████      | 2/5 [00:00<00:00,  3.19it/s]17:00:18 - cmdstanpy - INFO - Chain [1] start processing
17:00:18 - cmdstanpy - INFO - Chain [1] done processing
 60%|██████    | 3/5 [00:00<00:00,  3.31it/s]17:00:18 - cmdstanpy - INFO - Chain [1] start processing
17:00:19 - cmdstanpy - INFO - Chain [1] done processing
 80%|████████  | 4/5 [00:01<00:00,  3.18it/s]17:00:19 - cmdstanpy - INFO - Chain [1] start processing
17:00:19 - cmdstanpy - INFO - Chain [1] done processing
100%|██████████| 5/5 [00:01<00:00,  3.06it/s]


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper,y,cutoff
0,2022-05-06,114.065937,106.799297,121.26072,105.135,2022-05-05
1,2022-05-09,114.636859,107.388567,121.602804,103.25,2022-05-05
2,2022-05-10,115.021734,108.117862,122.549519,103.11,2022-05-05
3,2022-05-11,115.513105,108.058356,122.918533,104.397,2022-05-05
4,2022-05-12,115.587001,108.549985,123.150072,105.688,2022-05-05


In [32]:
df_p = performance_metrics(df_cv)
df_p.tail(30)

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,4 days,34.699076,5.890592,4.084694,0.036732,0.018526,0.036546,0.75
1,5 days,39.160917,6.257868,4.288442,0.039743,0.020192,0.038914,0.7
2,6 days,46.563409,6.823739,4.916628,0.046287,0.02592,0.044792,0.7
3,7 days,50.464549,7.10384,5.663978,0.052195,0.048557,0.051156,0.7
4,8 days,48.103285,6.935653,5.996444,0.054609,0.05482,0.054018,0.7
5,9 days,34.568655,5.879511,5.079438,0.04593,0.045992,0.045849,0.8
6,10 days,24.08382,4.907527,4.354403,0.03937,0.040916,0.039707,0.9
7,11 days,28.579556,5.345985,4.882759,0.043962,0.043271,0.044354,0.875
8,12 days,36.265526,6.022087,5.671223,0.050816,0.047494,0.051057,0.8
9,13 days,43.868138,6.623303,5.986857,0.053498,0.040426,0.05306,0.733333
