In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('data/portugal.csv', index_col='date', parse_dates=True)
df_pl = pd.read_csv('data/poland.csv', index_col='date', parse_dates=True)

In [None]:
# check how many unique values are in each column
df.nunique()

# choose only columns with more than 1 unique value
df = df.loc[:, df.nunique() > 1]

In [None]:
df.columns

In [None]:
df.female_smokers

In [None]:
df.describe()

In [None]:
a# create histogram of daily cases, x -axis is the date

plt.figure(figsize=(10, 6))
plt.plot(df.index, df['total_cases'], label='Portugal', color='red')
plt.plot(df_pl.index, df_pl['total_cases'], label='Poland', color='grey')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Total cases')
plt.yticks([0, 1000000, 2000000, 3000000, 4000000, 5000000, 6000000, 7000000], ['0M', '1M', '2M', '3M', '4M', '5M', '6M', '7M'])
plt.title('Comparison of total cases in Portugal and Poland')
plt.xticks(rotation=45)
plt.ylim(-100000, 7500000)
plt.grid(True, linestyle='--')
plt.show()

In [None]:
from datetime import datetime

# create histogram of the new cases, y-axis is the number of new cases, x-axis is the date

plt.figure(figsize=(10, 6))
plt.plot(df.index, df['new_cases'], label='Portugal', color='red', alpha=0.9)

plt.xlabel('Date')
plt.ylabel('New cases')
plt.title('The new cases of Covid-19 in Portugal')
plt.yticks([0, 50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 450000], ['0', '50k', '100k', '150k', '200k', '250k', '300k', '350k', '400k', '450k'])
plt.grid(True, linestyle='--')

plt.show()


In [None]:
# create animated plots to comapre total cases in Portugal and Poland using the same date scale - boxes

import matplotlib.animation as animation

fig, ax = plt.subplots(figsize=(10, 6))

In [None]:
import pandas as pd
df_portugal = pd.read_csv('./data/portugal.csv', index_col='date', parse_dates=True)['new_cases']
df_poland = pd.read_csv('./data/poland.csv', index_col='date', parse_dates=True)['new_cases']

df_portugal_r = df_portugal.resample('M').sum()
df_poland_r = df_poland.resample('M').sum()

df = pd.concat([df_portugal_r, df_poland_r], axis=1)
df.columns = ['Portugal', 'Poland']

In [None]:
df

In [None]:
df_poland

In [None]:
# df = pd.read_csv('../../data/portugal.csv', index_col='date', parse_dates=True)

df_portugal.fillna(0, inplace=True)

df = df.resample('M').sum()

In [None]:
df

In [None]:
df = pd.read_csv('../../data/portugal.csv', index_col='date', parse_dates=True)
df.fillna(0, inplace=True)

df = df.resample('M').sum()

df['death_rate'] = (df['new_deaths'] / df['new_cases']) * 100

def update(frame, ax, df):
    ax.clear()

    ax.plot(df.index[:frame], df['new_cases'][:frame], color='red', label='New cases')
    ax.set_ylim(-100000, 2000000)
    ax.set_yticks([0, 500000, 1000000, 1500000, 2000000])
    ax.set_yticklabels(['0M', '0.5M', '1M', '1.5M', '2M'])
    plt.ylabel('Cases')
    ax.grid(True, linestyle='--', color='grey', alpha=0.3)

    ax.plot(df.index[:frame], df['new_deaths'][:frame], color='black', label='Deaths')

    plt.legend(loc='upper left')

    case_pos = df['new_cases'][frame]
    ax.text(df.index[frame], case_pos, f'{case_pos:.0f}',
            ha='center', va='bottom', fontsize=8, color='red')

    death_pos = df['new_deaths'][frame]
    print(death_pos)
    ax.text(df.index[frame], death_pos, f'{df["new_deaths"][frame]:.0f}',
             ha='center', va='top', fontsize=8, color='black')

    ax.fill_between(df.index[:frame], df['new_cases'][:frame], color='red', alpha=0.1)
    ax.fill_between(df.index[:frame], df['new_deaths'][:frame], color='black', alpha=0.1)

    death_rate = df['death_rate'][frame]
    ax.text(0.9, 0.8, f'{death_rate:.2f}% death rate',
             ha='center', va='bottom', fontsize=8, color='blue', bbox=dict(facecolor='white', alpha=0.5))

    ax.set_title('Comparison of New Cases and Deaths due to COVID-19 in Portugal')
    plt.xlim(df.index[0], df.index[-1])
    plt.text(0.51, 0.9, df.index[frame].strftime('%m.%Y'), transform=ax.transAxes,
             ha='center', fontsize=15)



In [None]:

import pandas as pd
df= pd.read_csv('./data/portugal.csv', index_col='date', parse_dates=True)
df.fillna(0, inplace=True)

df_monthly = df.resample('M').max()

df_vaccinations = df_monthly[
    ['people_vaccinated_per_hundred', 'people_fully_vaccinated_per_hundred', 'total_boosters_per_hundred']]

# drop 7 last rows

df_vaccinations = df_vaccinations[:-7]

df_vaccinations

In [None]:
df.describe()

In [None]:
import pandas as pd

# Wczytaj dane
data = pd.read_csv('./data/portugal.csv')

data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')

data.set_index('date', inplace=True)

data['data_freq'] = data.index.to_period('D')

In [None]:
data

In [None]:
start_date = '2020-01-01'
end_date = '2022-12-31'

filtered_data = data.loc[start_date:end_date]


In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from neuralprophet import NeuralProphet
def compare_models(series, train_end_date, forecast_periods):
    train = series[:train_end_date]

    # ARIMA
    arima_model = ARIMA(train, order=(5, 1, 0))
    arima_fit = arima_model.fit()
    arima_forecast = arima_fit.forecast(steps=forecast_periods)

    # SARIMA
    sarima_model = SARIMAX(train, order=(5, 1, 0), seasonal_order=(1, 1, 1, 12))
    sarima_fit = sarima_model.fit()
    sarima_forecast = sarima_fit.forecast(steps=forecast_periods)

    # NeuralProphet
    prophet_data = pd.DataFrame({'ds': train.index, 'y': train.values})
    prophet_model = NeuralProphet()
    prophet_model.fit(prophet_data, freq='D')
    future = prophet_model.make_future_dataframe(prophet_data, periods=forecast_periods)
    prophet_forecast = prophet_model.predict(future)['yhat1'].values

    print(f"ARIMA AIC: {arima_fit.aic}, BIC: {arima_fit.bic}")
    print(f"SARIMA AIC: {sarima_fit.aic}, BIC: {sarima_fit.bic}")

    return arima_forecast, sarima_forecast, prophet_forecast


In [None]:
cases_forecast_arima_2022, cases_forecast_sarima_2022, cases_forecast_prophet_2022 = compare_models(cases_2020_22,'2022-12-31', 365)

In [None]:
cases_forecast_sarima_2022

In [None]:
prophet_data = pd.DataFrame({'ds': filtered_data['data_freq'], 'y': filtered_data['new_cases']})
prophet_data.reset_index(inplace=True)
prophet_data.drop('date', axis=1, inplace=True)
prophet_data['ds'] = data.to_timestamp('D')

In [None]:
prophet_model = NeuralProphet()

In [None]:
prophet_model.fit(prophet_data, freq='D')

In [None]:
future = prophet_model.make_future_dataframe(prophet_data, periods=forecast_periods)
prophet_forecast = prophet_model.predict(future)['yhat1'].values

In [None]:
import pandas as pd
from neuralprophet import NeuralProphet
import matplotlib.pyplot as plt

# Wczytaj dane
data = pd.read_csv('./data/portugal.csv')

# Przekształć kolumnę 'date' na typ datetime
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')

# Ustaw kolumnę 'date' jako indeks
data.set_index('date', inplace=True)

# Filtruj dane tylko z lat 2020, 2021, 2022
filtered_data = data.loc['2020-01-01':'2022-12-31']

# Przygotuj dane w odpowiednim formacie dla NeuralProphet
df = filtered_data.reset_index()[['date', 'new_cases']]
df.columns = ['ds', 'y']

df


In [None]:
# Zainicjuj model NeuralProphet
model = NeuralProphet()

# Wytrenuj model
model.fit(df, freq='D')
future = model.make_future_dataframe(df, periods=365)
forecast = model.predict(future)

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
model.plot(forecast, ax=ax)
plt.show()

In [None]:
import pandas as pd
from neuralprophet import NeuralProphet
import matplotlib.pyplot as plt

data = pd.read_csv('./data/portugal.csv')

data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')

data.set_index('date', inplace=True)

filtered_data = data.loc['2020-01-01':'2022-12-31']

df = filtered_data.reset_index()[['date', 'new_cases']]
df.columns = ['ds', 'y']

model = NeuralProphet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    n_forecasts=1,
    n_lags=0,
    seasonality_mode='additive'
)
metrics = model.fit(df, freq='D', epochs=100)

future = model.make_future_dataframe(df, periods=365)
forecast = model.predict(future)


In [None]:
forecast

In [None]:
forecast['yhat1'].plot()

In [None]:
data = pd.read_csv('./data/portugal.csv')
df = pd.DataFrame()
df['ds'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')
df['y'] = data['new_cases']
df.plot(x='ds', y='y')

In [None]:
–from neuralprophet import set_random_seed

set_random_seed(1234)
nm = NeuralProphet(n_changepoints=100,
                   daily_seasonality=False,   
                   yearly_seasonality=True,
                   weekly_seasonality=False,
                   changepoints_range=0.95,
                   seasonality_reg=0.1)

nm_result = nm.fit(df, freq='D')

In [None]:
plt.rcParams['figure.figsize'] = (7, 9)
f, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)
ax1.plot(nm_result.Loss)
ax1.set_title('SmoothL1Loss')
ax1.grid()
ax2.plot(nm_result.MAE)
ax2.set_title('MAE')
ax2.grid()
ax3.plot(nm_result.RegLoss)
ax3.set_title('RegLoss')
ax3.grid()
plt.show()

In [None]:
future = nm.make_future_dataframe(df, periods = 200,
                                  n_historic_predictions=len(df))

forecast_2 = nm.predict(future)

nm.plot(forecast_2)

In [None]:
nm.plot_components(forecast_2)

In [None]:
nm.plot_parameters()

In [None]:
df2 = df[df['y'] > 0]
plt.scatter(df2['ds'], df2['y'])

In [None]:
df2

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data['new_cases'].dropna(), lags=30)
plt.show()

In [None]:
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')

# Oblicz różnice między kolejnymi datami
data['date_diff'] = data['date'].diff().dt.days

# Sprawdź unikalne wartości różnic
unique_diffs = data['date_diff'].dropna().unique()

print("Unikalne różnice między datami:", unique_diffs)

In [None]:
import pandas as pd
from neuralprophet import NeuralProphet
from neuralprophet import set_random_seed
import plotly.graph_objects as go
import plotly.io as pio

# Wczytaj dane
data = pd.read_csv('./data/portugal.csv')

# Przygotuj dane
df = pd.DataFrame()
df['ds'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')
df['y'] = data['new_cases']

# Filtruj dane, aby usunąć zerowe wartości
df = df[df['y'] > 0]

# Podział danych na dwa okresy
period1 = df[(df['ds'] >= '2020-01-01') & (df['ds'] < '2023-01-01')]
period2 = df[(df['ds'] >= '2023-01-01') & (df['ds'] <= '2025-01-01')]

# Ustawienie ziarna losowego
set_random_seed(1234)

# Konfiguracja i trenowanie modelu dla period1
nm1 = NeuralProphet(n_changepoints=300,
                    daily_seasonality=True,
                    yearly_seasonality=True,
                    weekly_seasonality=False,
                    changepoints_range=0.95)

nm_result1 = nm1.fit(period1, freq='D')
future1 = nm1.make_future_dataframe(period1, periods=365, n_historic_predictions=len(period1))
forecast1 = nm1.predict(future1)

# Zapobieganie ujemnym wartościom
forecast1['yhat1'] = forecast1['yhat1'].apply(lambda x: max(0, x))

# Konfiguracja i trenowanie modelu dla period2
nm2 = NeuralProphet(n_changepoints=300,
                    daily_seasonality=True,
                    yearly_seasonality=True,
                    weekly_seasonality=False,
                    changepoints_range=0.95)

nm_result2 = nm2.fit(period2, freq='D')
future2 = nm2.make_future_dataframe(period2, periods=365, n_historic_predictions=len(period2))
forecast2 = nm2.predict(future2)

# Zapobieganie ujemnym wartościom
forecast2['yhat1'] = forecast2['yhat1'].apply(lambda x: max(0, x))

# Tworzenie wykresu za pomocą Plotly
fig = go.Figure()

# Dodanie rzeczywistych wartości z period1
fig.add_trace(go.Scatter(x=period1['ds'], y=period1['y'], mode='lines', name='Actual Period1'))

# Dodanie prognozowanych wartości z period1
fig.add_trace(go.Scatter(x=forecast1['ds'], y=forecast1['yhat1'], mode='lines', name='Forecast Period1'))

# Dodanie rzeczywistych wartości z period2
fig.add_trace(go.Scatter(x=period2['ds'], y=period2['y'], mode='lines', name='Actual Period2'))

# Dodanie prognozowanych wartości z period2
fig.add_trace(go.Scatter(x=forecast2['ds'], y=forecast2['yhat1'], mode='lines', name='Forecast Period2'))

# Ustawienia wykresu
fig.update_layout(title='NeuralProphet Forecasting Model - Portugal',
                  xaxis_title='Date',
                  yaxis_title='New Cases',
                  template='plotly_dark',
                  showlegend=True)

# Zapisz wykres do pliku HTML
pio.write_html(fig, file="./plots/combined_forecast.html", auto_open=True)

# Wyświetlenie wykresu w trybie offline (jeśli to konieczne)
fig.show()


In [None]:
import pandas as pd
from neuralprophet import NeuralProphet
from neuralprophet import set_random_seed
import plotly.graph_objects as go
import plotly.io as pio

data = pd.read_csv('./data/portugal.csv')

df = pd.DataFrame()
df['ds'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')
df['y'] = data['new_cases']

df = df[df['y'] > 0]

period1 = df[(df['ds'] >= '2020-01-01') & (df['ds'] < '2023-01-01')]
period2 = df[(df['ds'] >= '2023-01-01') & (df['ds'] <= '2025-01-01')]

# Forecast for the first period
set_random_seed(1234)
nm1 = NeuralProphet(n_changepoints=300,
                    daily_seasonality=True,
                    yearly_seasonality=True,
                    weekly_seasonality=False,
                    changepoints_range=0.95)

nm_result = nm1.fit(period1, freq='D')
future = nm1.make_future_dataframe(period1, periods=1500, n_historic_predictions=len(period1))
forecast_1 = nm1.predict(future)

forecast_1['yhat1'] = forecast_1['yhat1'].apply(lambda x: max(0, x))

# Forecast for the second period
nm2 = NeuralProphet(n_changepoints=300,
                    daily_seasonality=True,
                    yearly_seasonality=True,
                    weekly_seasonality=False,
                    changepoints_range=0.95)

nm_result2 = nm2.fit(period2, freq='D')
future2 = nm2.make_future_dataframe(period2, periods=1000, n_historic_predictions=len(period2))
forecast_2 = nm2.predict(future2)

forecast_2['yhat1'] = forecast_2['yhat1'].apply(lambda x: max(0, x))

# Two forecasts in one plot



In [246]:
overlap_forecast = forecast_1[forecast_1['ds'] <= period1['ds'].max()]
future_forecast = forecast_1[forecast_1['ds'] > period1['ds'].max()]

overlap_forecast2 = forecast_2[forecast_2['ds'] <= period2['ds'].max()]
future_forecast2 = forecast_2[forecast_2['ds'] > period2['ds'].max()]
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['ds'], y=df['y'], mode='lines',
                         name='observed data', line=dict(color='black')))

fig.add_trace(go.Scatter(x=overlap_forecast['ds'], y=overlap_forecast['yhat1'], mode='lines',
                         name='forecast based on period 2020-2022 (overlap)', line=dict(color='orange'), opacity=0.6))

fig.add_trace(go.Scatter(x=future_forecast['ds'], y=future_forecast['yhat1'], mode='lines',
                         name='forecast based on period 2020-2022', line=dict(color='orange')))

fig.add_trace(go.Scatter(x=overlap_forecast2['ds'], y=overlap_forecast2['yhat1'], mode='lines',
                         name='forecast based on period 2023-2024 (overlap)', line=dict(color='red'), opacity=0.6))

fig.add_trace(go.Scatter(x=future_forecast2['ds'], y=future_forecast2['yhat1'], mode='lines',
                         name='forecast based on period 2023-2024', line=dict(color='red')))

fig.update_layout(title='Forecast of new COVID-19 cases in Portugal',
                  xaxis_title='Date',
                  yaxis_title='New Cases',
                  template='plotly_white',
                  showlegend=True,
                  hovermode='x unified',
                  legend=dict(
                        x=0.7,
                        y=0.99,
                        xanchor='left',
                        yanchor='top',
                        bgcolor='rgba(255, 255, 255, 0.5)',
                        bordercolor='rgba(0, 0, 0, 0.5)',
                        borderwidth=1
                    )
                  )

pio.write_html(fig, file="./plots/9_cases_forecast.html", auto_open=True)


In [None]:
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=period1['ds'], y=period1['y'], mode='lines', name='observed', line=dict(color='black')))

fig1.add_trace(go.Scatter(x=overlap_forecast['ds'], y=overlap_forecast['yhat1'], mode='lines',
                         name='forecast (overlap)', line=dict(color='orange'), opacity=0.6))

fig1.add_trace(go.Scatter(x=future_forecast['ds'], y=future_forecast['yhat1'], mode='lines',
                         name='forecast', line=dict(color='orange')))

fig1.update_layout(title='Prediction based on first period: 2020-2022',
                   xaxis_title='Date',
                   yaxis_title='New Cases',
                   template='plotly_white',
                   hovermode='x')

fig1.write_html("./plots/9_cases_period1_forecast.html")

fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=period2['ds'], y=period2['y'], mode='lines', name='observed', line=dict(color='black')))

fig2.add_trace(go.Scatter(x=overlap_forecast2['ds'], y=overlap_forecast2['yhat1'], mode='lines',
                         name='forecast (overlap)', line=dict(color='red'), opacity=0.6))

fig2.add_trace(go.Scatter(x=future_forecast2['ds'], y=future_forecast2['yhat1'], mode='lines',
                         name='forecast', line=dict(color='red')))

fig2.update_layout(title='Prediction based on second period: 2023-2024',
                   xaxis_title='Date',
                   yaxis_title='New Cases',
                   template='plotly_white',
                   hovermode='x')

fig2.write_html("./plots/9_cases_period2_forecast.html")


In [None]:
fig_plotly = nm1.plot(forecast_2, plotting_backend='plotly')
pio.write_image(fig_plotly, 'plots/9_cases_period1_forecast.png', format='png')

In [None]:
pio.write_image(fig1, 'plots/9_cases_period1_forecast.png', format='png')

In [None]:
# fig, fig1, fig2 static in one plot
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(df['ds'], df['y'], label='Observed', color='black')
ax.plot(forecast_1['ds'], forecast_1['yhat1'], label='Forecast 2020-2022', color='orange')

ax.plot(forecast_2['ds'], forecast_2['yhat1'], label='Forecast 2023-2024', color='red')

plt.title('Forecast of new COVID-19 cases in Portugal')

plt.xlabel('Date')
plt.ylabel('New Cases')

plt.legend()

plt.grid(True, linestyle='--')

plt.show()

In [None]:
fig_stat = plt.figure(figsize=(10, 6))

plt.plot(forecast_1['ds'], forecast_1['yhat1'], label='forecast based on the period 2020-2022', color='orange') 
plt.plot(forecast_2['ds'], forecast_2['yhat1'], label='forecast based on the period 2023-2024', color='red')
plt.plot(df['ds'], df['y'], label='observed', color='black')

plt.title('Forecast of new COVID-19 cases in Portugal (NeuralProphet)')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.grid(True, which='both', linestyle='--', lw=0.5)
plt.legend()
plt.yticks([0, 50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 450000], ['0', '50k', '100k', '150k', '200k', '250k', '300k', '350k', '400k', '450k'])
plt.tight_layout()
plt.savefig('./plots/9_cases_forecast.png', dpi=300)

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(15, 15), gridspec_kw={'height_ratios': [1.5, 1, 1]})

axs[0].plot(df['ds'], df['y'], label='Observed', color='black')
axs[0].plot(overlap_forecast['ds'], overlap_forecast['yhat1'], label='Forecast 2020-2022 (overlap)', color='orange', alpha=0.6)
axs[0].plot(overlap_forecast2['ds'], overlap_forecast2['yhat1'], label='Forecast 2023-2024 (overlap)', color='red', alpha=0.6)
axs[0].plot(future_forecast['ds'], future_forecast['yhat1'], label='Forecast 2020-2022', color='orange')
axs[0].plot(future_forecast2['ds'], future_forecast2['yhat1'], label='Forecast 2023-2024', color='red')
axs[0].set_title('Forecast of New COVID-19 Deaths in Portugal (NeuralProphet)')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('New Cases')
axs[0].grid(True, which='both', linestyle='--', lw=0.5)
axs[0].set_yticks([0, 50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 450000])
axs[0].set_yticklabels(['0', '50k', '100k', '150k', '200k', '250k', '300k', '350k', '400k', '450k'])
axs[0].legend()

axs[1].plot(period1['ds'], period1['y'], label='Observed', color='black')
axs[1].plot(overlap_forecast['ds'], overlap_forecast['yhat1'], label='Forecast 2020-2022 (overlap)', color='orange', alpha=0.6)
axs[1].plot(future_forecast['ds'], future_forecast['yhat1'], label='Forecast 2020-2022', color='orange')
axs[1].set_title('Prediction Based on First Period: 2020-2022')
axs[1].set_xlabel('Date')
axs[1].set_ylabel('New Cases')
axs[1].set_yticks([0, 50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 450000])
axs[1].set_yticklabels(['0', '50k', '100k', '150k', '200k', '250k', '300k', '350k', '400k', '450k'])
axs[1].grid(True, which='both', linestyle='--', lw=0.5)
axs[1].legend()

axs[2].plot(period2['ds'], period2['y'], label='Observed', color='black')
axs[2].plot(overlap_forecast2['ds'], overlap_forecast2['yhat1'], label='Forecast 2023-2024 (overlap)', color='red', alpha=0.6)
axs[2].plot(future_forecast2['ds'], future_forecast2['yhat1'], label='Forecast 2023-2024', color='red')
axs[2].set_title('Prediction Based on Second Period: 2023-2024')
axs[2].set_xlabel('Date')
axs[2].set_ylabel('New Cases')
axs[2].set_yticks([0, 1000, 2000, 3000, 4000, 5000])
axs[2].set_yticklabels(['0', '1k', '2k', '3k', '4k', '5k'])
axs[2].grid(True, which='both', linestyle='--', lw=0.5)
axs[2].legend()

plt.tight_layout()
plt.savefig('./plots/9_combined_forecast_cases.png', dpi=300)


In [247]:
import pandas as pd
from neuralprophet import NeuralProphet
from neuralprophet import set_random_seed
import plotly.graph_objects as go
import plotly.io as pio
import matplotlib.pyplot as plt

data = pd.read_csv('./data/portugal.csv')

df = pd.DataFrame()
df['ds'] = pd.to_datetime(data['date'], format='%Y-%m-%d', errors='coerce')
df['y'] = data['new_deaths']

df = df[df['y'] > 0]

period1 = df[(df['ds'] >= '2020-01-01') & (df['ds'] < '2023-01-01')]
period2 = df[(df['ds'] >= '2023-01-01') & (df['ds'] <= '2025-01-01')]

# Forecast for the first period
set_random_seed(1234)
nm1 = NeuralProphet(n_changepoints=190,
                    daily_seasonality=True,
                    yearly_seasonality=True,
                    weekly_seasonality=False,
                    changepoints_range=0.95,
                    seasonality_mode='additive',
                    trend_reg=0.3,
                     seasonality_reg=0.2 )

nm_result = nm1.fit(period1, freq='D')
future = nm1.make_future_dataframe(period1, periods=1500, n_historic_predictions=len(period1))
forecast_1 = nm1.predict(future)

forecast_1['yhat1'] = forecast_1['yhat1'].apply(lambda x: max(0, x))

# Forecast for the second period
nm2 = NeuralProphet(n_changepoints=200,
                    daily_seasonality=True,
                    yearly_seasonality=True,
                    weekly_seasonality=False,
                    changepoints_range=0.95)

nm_result2 = nm2.fit(period2, freq='D')
future2 = nm2.make_future_dataframe(period2, periods=1000, n_historic_predictions=len(period2))
forecast_2 = nm2.predict(future2)

forecast_2['yhat1'] = forecast_2['yhat1'].apply(lambda x: max(0, x))


INFO - (NP.config.__post_init__) - Note: Trend changepoint regularization is experimental.
INFO - (NP.config.__post_init__) - Note: Fourier-based seasonality regularization is experimental.

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 99.31% of the data.

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.



Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 210

MPS available but not used. Set `accelerator` and `devices` using `Trainer(accelerator='mps'

Finding best initial lr:   0%|          | 0/209 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]


Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 99.31% of the data.

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.



Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.df_utils._infer_frequency) - Ma

Predicting: 10it [00:00, ?it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 98.529% of the data.

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.



Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 8
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 290

MPS available but not used. Set `accelerator` and `devices` using `Trainer(accelerator='mps', devices=1)`.




Finding best initial lr:   0%|          | 0/205 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]


Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 98.529% of the data.

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.



Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column

Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.


INFO - (NP.df_utils._infer_frequency) - M

Predicting: 9it [00:00, ?it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column


In [250]:
overlap_forecast = forecast_1[forecast_1['ds'] <= period1['ds'].max()]
future_forecast = forecast_1[forecast_1['ds'] > period1['ds'].max()]

overlap_forecast2 = forecast_2[forecast_2['ds'] <= period2['ds'].max()]
future_forecast2 = forecast_2[forecast_2['ds'] > period2['ds'].max()]

fig = go.Figure()

fig.add_trace(go.Scatter(x=df['ds'], y=df['y'], mode='lines',
                         name='observed data', line=dict(color='black')))

fig.add_trace(go.Scatter(x=overlap_forecast['ds'], y=overlap_forecast['yhat1'], mode='lines',
                         name='forecast based on period 2020-2022 (overlap)', line=dict(color='orange'), opacity=0.6))

fig.add_trace(go.Scatter(x=future_forecast['ds'], y=future_forecast['yhat1'], mode='lines',
                         name='forecast based on period 2020-2022', line=dict(color='orange')))

fig.add_trace(go.Scatter(x=overlap_forecast2['ds'], y=overlap_forecast2['yhat1'], mode='lines',
                         name='forecast based on period 2023-2024 (overlap)', line=dict(color='red'), opacity=0.6))

fig.add_trace(go.Scatter(x=future_forecast2['ds'], y=future_forecast2['yhat1'], mode='lines',
                         name='forecast based on period 2023-2024', line=dict(color='red')))

fig.update_layout(title='Forecast of new COVID-19 deaths in Portugal',
                  xaxis_title='Date',
                  yaxis_title='New Deaths',
                  template='plotly_white',
                  showlegend=True,
                  hovermode='x unified',
                  legend=dict(
                      x=0.7,
                      y=0.99,
                      xanchor='left',
                      yanchor='top',
                      bgcolor='rgba(255, 255, 255, 0.5)',
                      bordercolor='rgba(0, 0, 0, 0.5)',
                      borderwidth=1
                  ))

pio.write_html(fig, file="./plots/9_deaths_forecast.html", auto_open=True)


In [252]:
# Forecast plot for the first period
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=period1['ds'], y=period1['y'], mode='lines', name='observed', line=dict(color='black')))

fig1.add_trace(go.Scatter(x=overlap_forecast['ds'], y=overlap_forecast['yhat1'], mode='lines',
                         name='forecast (overlap)', line=dict(color='orange'), opacity=0.6))

fig1.add_trace(go.Scatter(x=future_forecast['ds'], y=future_forecast['yhat1'], mode='lines',
                         name='forecast', line=dict(color='orange')))

fig1.update_layout(title='Prediction based on first period: 2020-2022',
                   xaxis_title='Date',
                   yaxis_title='New Deaths',
                   template='plotly_white',
                   hovermode='x')

fig1.write_html("./plots/9_deaths_period1_forecast.html")

In [251]:
fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=period2['ds'], y=period2['y'], mode='lines', name='observed', line=dict(color='black')))

fig2.add_trace(go.Scatter(x=overlap_forecast2['ds'], y=overlap_forecast2['yhat1'], mode='lines',
                         name='forecast (overlap)', line=dict(color='red'), opacity=0.6))

fig2.add_trace(go.Scatter(x=future_forecast2['ds'], y=future_forecast2['yhat1'], mode='lines',
                         name='forecast', line=dict(color='red')))

fig2.update_layout(title='Prediction based on second period: 2023-2024',
                   xaxis_title='Date',
                   yaxis_title='New Deaths',
                   template='plotly_white',
                   hovermode='x')

fig2.write_html("./plots/9_deaths_period2_forecast.html")

In [None]:
# Static plot

fig, axs = plt.subplots(3, 1, figsize=(15, 15), gridspec_kw={'height_ratios': [1.5, 1, 1]})

axs[0].plot(df['ds'], df['y'], label='Observed', color='black')
axs[0].plot(overlap_forecast['ds'], overlap_forecast['yhat1'], label='Forecast 2020-2022 (overlap)', color='orange', alpha=0.6)
axs[0].plot(overlap_forecast2['ds'], overlap_forecast2['yhat1'], label='Forecast 2023-2024 (overlap)', color='red', alpha=0.6)
axs[0].plot(future_forecast['ds'], future_forecast['yhat1'], label='Forecast 2020-2022', color='orange')
axs[0].plot(future_forecast2['ds'], future_forecast2['yhat1'], label='Forecast 2023-2024', color='red')
axs[0].set_title('Forecast of New COVID-19 Deaths in Portugal (NeuralProphet)')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('New Deaths')
axs[0].grid(True, which='both', linestyle='--', lw=0.5)
axs[0].legend()

axs[1].plot(period1['ds'], period1['y'], label='Observed', color='black')
axs[1].plot(overlap_forecast['ds'], overlap_forecast['yhat1'], label='Forecast 2020-2022 (overlap)', color='orange', alpha=0.6)
axs[1].plot(future_forecast['ds'], future_forecast['yhat1'], label='Forecast 2020-2022', color='orange')
axs[1].set_title('Prediction Based on First Period: 2020-2022')
axs[1].set_xlabel('Date')
axs[1].set_ylabel('New Deaths')
axs[1].grid(True, which='both', linestyle='--', lw=0.5)
axs[1].legend()

axs[2].plot(period2['ds'], period2['y'], label='Observed', color='black')
axs[2].plot(overlap_forecast2['ds'], overlap_forecast2['yhat1'], label='Forecast 2023-2024 (overlap)', color='red', alpha=0.6)
axs[2].plot(future_forecast2['ds'], future_forecast2['yhat1'], label='Forecast 2023-2024', color='red')
axs[2].set_title('Prediction Based on Second Period: 2023-2024')
axs[2].set_xlabel('Date')
axs[2].set_ylabel('New Deaths')
axs[2].grid(True, which='both', linestyle='--', lw=0.5)
axs[2].legend()

plt.tight_layout()
plt.savefig('./plots/9_combined_forecast_deaths.png', dpi=300)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
new_cases,1562.0,3612.848,23109.63,0.0,0.0,0.0,0.0,405306.0
reproduction_rate,1025.0,1.031259,0.2771938,0.41,0.9,1.0,1.18,2.69
icu_patients,765.0,184.7895,188.7849,9.0,67.0,116.0,184.0,904.0
hosp_patients,774.0,1232.337,1329.66,9.0,417.25,691.5,1399.0,6869.0
new_tests,822.0,52099.03,60741.22,40.0,16969.0,37255.5,59093.0,402756.0
positive_rate,815.0,0.08478307,0.09698713,0.0066,0.0203,0.04,0.13035,0.5301
people_vaccinated,144.0,8121690.0,3078003.0,5186.0,8943441.0,9753919.5,9789436.0,9821414.0
people_fully_vaccinated,144.0,7167316.0,3084811.0,6.0,7753584.0,8850197.5,8905880.0,8909769.0
total_boosters,144.0,4394400.0,3166834.0,5.0,80036.0,6504711.5,7045710.0,7086326.0
stringency_index,1092.0,43.85999,26.09618,0.0,13.89,49.86,63.89,87.96
