In [1]:
import pandas as pd
import plotly.graph_objects as go
import torch

from darts.dataprocessing.transformers import Scaler

KeyboardInterrupt: 

In [None]:
df = pd.read_excel('dataset/kawaldata_clean.xlsx', parse_dates=True)
df.head()

## **COVID-19 Indonesia Statistics**

In [None]:
df_stats = pd.concat([df['tanggal'], df['Kasus harian'], df['Meninggal\n(baru)'], df['Sembuh\n(baru)']], axis=1)
df_stats = df_stats.loc[(df.tanggal >= "2021-01-13")]
df_stats = df_stats.dropna()

print(f'Total Statistic Days {len(df_stats)}')

In [None]:
line_stat = []
line_stat.append(go.Scatter(x=df_stats.tanggal, y=df_stats['Kasus harian'], name="Kasus Harian", mode='lines'))
line_stat.append(go.Scatter(x=df_stats.tanggal, y=df_stats['Sembuh\n(baru)'], name="Sembuh", mode='lines'))

fig = go.Figure(data=line_stat)
fig.update_layout(title_text="COVID-19 Case Indonesia")
fig.show()

## **Vaccination Indonesia Statistics**

In [None]:
df_vaccine = pd.concat([df['tanggal'], df['Dosis pertama (harian)'], df['Dosis kedua (harian)']], axis=1)
df_vaccine = df_vaccine.loc[(df.tanggal >= "2021-01-13")]

print(f'Total Vaccine Days {len(df_vaccine)}')

In [None]:
line_stat = []
line_stat.append(go.Scatter(x=df_vaccine.tanggal, y=df_vaccine['Dosis pertama (harian)'], name="Dosis Pertama", mode='lines'))
line_stat.append(go.Scatter(x=df_vaccine.tanggal, y=df_vaccine['Dosis kedua (harian)'], name="Dosis Kedua", mode='lines'))


fig = go.Figure(data=line_stat)
fig.update_layout(title_text="Vaccination Indonesia")
fig.show()

In [None]:
df_mobility = pd.read_csv("dataset/mobility_clean.csv")
df_mobility.tail()

In [None]:
len(df_mobility)

In [None]:
line_stat = []
line_stat.append(go.Scatter(x=df_mobility.date, y=df_mobility['retail_and_recreation_percent_change_from_baseline'], name="Retail And Recreation", mode='lines'))

fig = go.Figure(data=line_stat)
fig.update_layout(title_text="Indonesia Mobility")
fig.show()

In [None]:
df_bobot = pd.read_csv('dataset/bobot_2021.csv')
df_bobot = df_bobot.loc[(df_bobot.date >= "2021-01-13") & (df_bobot.date <= "2021-08-16")]
df_bobot.tail()

## **Time Series Feature Engineering**

- Drop Tanggal, Dosis kedua (harian) Column In df_vaccine
- Drop Meninggal Column In df_stats
- Filling NaN Value With 0
- Seperating Train Variable With Date

In [None]:
df_vaccine = df_vaccine.drop(columns=['tanggal', 'Dosis kedua (harian)'], axis=1)
df_mobility = df_mobility.drop(columns=['date', 'Unnamed: 0', 'index'], axis=1)
df_stats = df_stats.drop(columns=['Meninggal\n(baru)'], axis=1)
df_bobot = df_bobot.drop(columns=['Unnamed: 0', 'date'])


In [None]:
df_timeseries = pd.concat([
       df_vaccine.reset_index(drop=True), 
       df_stats.reset_index(drop=True),
    df_bobot.reset_index(drop=True)
], axis=1)

df_timeseries = df_timeseries.dropna()


In [None]:
variable = df_timeseries.loc[:, df_timeseries.columns != 'tanggal']
variable = list(variable)
variable

## **Time Series Modelling**

In [None]:
from darts import TimeSeries

scaler = Scaler()
series = scaler.fit_transform(TimeSeries.from_dataframe(df_timeseries, 'tanggal', variable))

train, val = series.split_after(pd.Timestamp("2021-07-01"))
train = series

In [None]:
from darts.models import NBEATSModel

model = NBEATSModel(
    input_chunk_length=15,
    output_chunk_length=7,
    generic_architecture=True,
    num_stacks=12,
    num_blocks=6,
    num_layers=6,
    layer_widths=512,
    n_epochs=200,
    batch_size=600,
)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model.fit(train, val_series=val, verbose=True)


## **Historical Prediction - Last Month** 

In [None]:
pred_series = model.historical_forecasts(
    train,
    start=pd.Timestamp("2021-03-20"),
    retrain=False,
    verbose=True
)


In [None]:
from darts.metrics import r2_score
ts_transformed = train.drop_before(pd.Timestamp("2021-03-20"))

ts_transformed['0'].univariate_component(0).plot(label='actual')
pred_series['0'].plot(label='predict')

print('R2 Score:', r2_score(
    ts_transformed.univariate_component(0), pred_series['0']))


## **Future Prediction**

In [None]:
prediction = model.predict(500)
prediction = prediction.pd_dataframe()

In [None]:
series = series.pd_dataframe()

In [None]:
from datetime import date

min_case = prediction[['0']].idxmin()
min_case = date(2021, 8, 17) - min_case[0].date()
min_case = abs(min_case.days)

dates = list(pd.date_range(start="2021-08-20", periods=min_case))

In [None]:
dates = list(pd.date_range(start="2021-08-20", periods=500))

In [None]:
prediction = prediction.reset_index()
series = series.reset_index()

for i, col in enumerate(variable):
    prediction = prediction.rename(columns={'{}'.format(i) : col})
    series = series.rename(columns={'{}'.format(i): col})

In [None]:
line_stat = []

for x in range(0,2):
    line_stat.append(go.Scatter(x=df_timeseries.tanggal, y=series[variable[x]], name=variable[x], mode='lines'))
    line_stat.append(go.Scatter(x=dates, y=prediction[variable[x]], name=f'Forecast {variable[x]}', mode='lines'))

fig = go.Figure(data=line_stat)
fig.update_layout(title_text=f"Predict COVID-19 Indonesia Within {min_case} Days Periods")
fig.show()