<a href="https://colab.research.google.com/github/IronZiiz/Data-Science_Time-Series/blob/main/HW_Forecast_temp_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Case (Temperature forecast model)
In this activity, your task is to build a good predictive model for temperatures so that it can add value to the current project.

Given the guidelines, let's proceed with our activity:

Create a visualization of the time series related to temperatures (TEMP column).

Identify the seasonality present in the data.

Perform a forecast of temperature values for one year after the end of the time series.

Analyze the trend, annual seasonality, and weekly seasonality components of the temperature series.

## Imports


In [1]:
import pandas as pd
import plotly.express as px
import calendar
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from prophet.plot import plot_plotly
from prophet.plot import plot_components_plotly

## First model

In [3]:
# import data
df = pd.read_csv('/content/drive/MyDrive/Alura/Data-science_Regressão:prevendo-séries-temporais-com-Prophe/poluentes.csv')
df

Unnamed: 0,Data,PM2.5,PM10,SO2,NO2,CO,O3,TEMP
0,2020-03-01,7.0,11.0,12.0,23.0,429.0,64.0,1.0
1,2020-03-02,31.0,42.0,37.0,67.0,825.0,30.0,1.0
2,2020-03-03,77.0,121.0,61.0,81.0,1621.0,19.0,6.0
3,2020-03-04,23.0,45.0,23.0,46.0,606.0,54.0,10.0
4,2020-03-05,149.0,184.0,94.0,133.0,2358.0,68.0,6.0
...,...,...,...,...,...,...,...,...
1455,2024-02-24,22.0,33.0,17.0,59.0,575.0,51.0,5.0
1456,2024-02-25,11.0,20.0,7.0,43.0,421.0,66.0,7.0
1457,2024-02-26,28.0,41.0,10.0,65.0,721.0,49.0,7.0
1458,2024-02-27,75.0,97.0,21.0,98.0,1427.0,37.0,8.0


In [4]:
df.dtypes

Unnamed: 0,0
Data,object
PM2.5,float64
PM10,float64
SO2,float64
NO2,float64
CO,float64
O3,float64
TEMP,float64


In [5]:
# convert Data to datetime
df['Data'] = pd.to_datetime(df['Data'])
df.dtypes

Unnamed: 0,0
Data,datetime64[ns]
PM2.5,float64
PM10,float64
SO2,float64
NO2,float64
CO,float64
O3,float64
TEMP,float64


### Create a visualization of the time series related to temperatures (TEMP column).

In [6]:
# Vizualize temperature day by day
fig = px.line(df, x='Data', y='TEMP', title='Temperature')
fig.show()

### Identify the seasonality present in the data.


In [9]:
# Check seasonality
df_2022 =df[df['Data'].dt.year == 2022]
df_2022_mensal = df_2022.groupby(df_2022['Data'].dt.month)['TEMP'].mean().reset_index()

df_2022_mensal['month'] = df_2022_mensal['Data'].apply(lambda x: calendar.month_abbr[x])
df_2022_mensal

Unnamed: 0,Data,TEMP,month
0,1,-0.741935,Jan
1,2,1.285714,Feb
2,3,8.419355,Mar
3,4,15.566667,Apr
4,5,21.451613,May
5,6,24.466667,Jun
6,7,26.387097,Jul
7,8,26.516129,Aug
8,9,20.566667,Sep
9,10,14.387097,Oct


In [8]:
# chart
fig = px.bar(df_2022_mensal, x='month', y='TEMP', title='Temperature for month in 2022')
fig.show()

### Perform a forecast of temperature values for one year after the end of the time series.


In [10]:
df_prophet = df[['Data', 'TEMP']]
df_prophet.columns = ['ds', 'y']
df_prophet

Unnamed: 0,ds,y
0,2020-03-01,1.0
1,2020-03-02,1.0
2,2020-03-03,6.0
3,2020-03-04,10.0
4,2020-03-05,6.0
...,...,...
1455,2024-02-24,5.0
1456,2024-02-25,7.0
1457,2024-02-26,7.0
1458,2024-02-27,8.0


In [11]:
np.random.seed(4587)

model = Prophet()
model.fit(df_prophet)

forecast = model.make_future_dataframe(periods = 365, freq = 'D')
forecast = model.predict(forecast)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpeywufo8k/wkk70z2x.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpeywufo8k/cmh2r8iz.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=48267', 'data', 'file=/tmp/tmpeywufo8k/wkk70z2x.json', 'init=/tmp/tmpeywufo8k/cmh2r8iz.json', 'output', 'file=/tmp/tmpeywufo8k/prophet_model5loroa7v/prophet_model-20250301225319.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
22:53:19 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
22:53:19 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [12]:
fig = plot_plotly(model, forecast)
fig.show()

In [13]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2020-03-01,11.413120,-1.862400,4.574915,11.413120,11.413120,-10.066279,-10.066279,-10.066279,0.027936,0.027936,0.027936,-10.094216,-10.094216,-10.094216,0.0,0.0,0.0,1.346841
1,2020-03-02,11.422210,-1.306377,5.045779,11.422210,11.422210,-9.623640,-9.623640,-9.623640,0.168891,0.168891,0.168891,-9.792531,-9.792531,-9.792531,0.0,0.0,0.0,1.798570
2,2020-03-03,11.431299,-1.534489,5.126681,11.431299,11.431299,-9.464928,-9.464928,-9.464928,0.003592,0.003592,0.003592,-9.468521,-9.468521,-9.468521,0.0,0.0,0.0,1.966371
3,2020-03-04,11.440389,-1.029228,5.424882,11.440389,11.440389,-9.223112,-9.223112,-9.223112,-0.099522,-0.099522,-0.099522,-9.123590,-9.123590,-9.123590,0.0,0.0,0.0,2.217277
4,2020-03-05,11.449479,-0.541874,5.751977,11.449479,11.449479,-8.875240,-8.875240,-8.875240,-0.115635,-0.115635,-0.115635,-8.759605,-8.759605,-8.759605,0.0,0.0,0.0,2.574238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1820,2025-02-23,15.744871,1.046882,7.415754,15.238295,16.193959,-11.431249,-11.431249,-11.431249,0.027936,0.027936,0.027936,-11.459186,-11.459186,-11.459186,0.0,0.0,0.0,4.313622
1821,2025-02-24,15.747846,1.345715,7.729601,15.239512,16.199831,-11.128474,-11.128474,-11.128474,0.168891,0.168891,0.168891,-11.297365,-11.297365,-11.297365,0.0,0.0,0.0,4.619372
1822,2025-02-25,15.750821,1.566337,7.869847,15.240729,16.205671,-11.112277,-11.112277,-11.112277,0.003592,0.003592,0.003592,-11.115869,-11.115869,-11.115869,0.0,0.0,0.0,4.638544
1823,2025-02-26,15.753796,1.502176,7.902769,15.241947,16.210209,-11.012517,-11.012517,-11.012517,-0.099522,-0.099522,-0.099522,-10.912995,-10.912995,-10.912995,0.0,0.0,0.0,4.741279


In [14]:
forecast_filtered = forecast[['ds','yhat', 'yhat_lower', 'yhat_upper']]
forecast_filtered

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2020-03-01,1.346841,-1.862400,4.574915
1,2020-03-02,1.798570,-1.306377,5.045779
2,2020-03-03,1.966371,-1.534489,5.126681
3,2020-03-04,2.217277,-1.029228,5.424882
4,2020-03-05,2.574238,-0.541874,5.751977
...,...,...,...,...
1820,2025-02-23,4.313622,1.046882,7.415754
1821,2025-02-24,4.619372,1.345715,7.729601
1822,2025-02-25,4.638544,1.566337,7.869847
1823,2025-02-26,4.741279,1.502176,7.902769


### Analyze the trend, annual seasonality, and weekly seasonality components of the temperature series.

In [15]:
plot_components_plotly(model, forecast)


Discarding nonzero nanoseconds in conversion.



##### How prophet components works

y(t) = g(t) + s(t) + h(t) + ϵt

y(t): real value/observed value

g(t): tendency component

s(t): sazonality tendenci

h(t): component holliday

ϵt: error