In [None]:
!pip install darts

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from darts.models import NBEATSModel
from darts.dataprocessing.transformers import Scaler
from darts import TimeSeries
from darts.metrics import mape
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

In [None]:
train_for = 500
predict_for = 14

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd

In [None]:
def smoothen(df, period):
    df_ma = df.copy()
    for i in range(period, len(df)):
        ma = 0
        for j in range(i - period + 1, i + 1):
            ma += df[j]
        df_ma[i] = ma / period
    return df_ma

In [None]:
df = pd.read_csv('/content/drive/MyDrive/covid-final-all.csv')
df['state_residence'] = df.apply(lambda row: row['state_residence'].lower(), axis=1)
print(df.columns)
print(df.state_residence.unique())
df=df.groupby(['Date']).agg({'F_pos': 'sum', 'M_pos': 'sum', 'T_pos':'sum', 'NIA_pos':'sum', 'new_case': 'sum'}).reset_index()
df.head()

date = df.iloc[train_for + 1]['Date']

df = df['new_case'].values

Index(['Unnamed: 0', 'state_residence', 'district_residence', 'Date', '<15',
       '15-24', '25-34', '35-44', '45-59', '60-80', '>80', '<15_pos',
       '15-24_pos', '25-34_pos', '35-44_pos', '45-59_pos', '60-80_pos',
       '>80_pos', 'M_pos', '2', 'F_pos', 'T', 'F', 'M', 'NIA', 'T_pos',
       'NIA_pos', 'O', 'O_pos', 'N', 'G', ',', 'D', 'new_case',
       'negative_case', 'Failed_test', 'Total_test',
       'negative_egene_screening', 'positive_egene_screening',
       'Failed_test_egene_screening', '<20_ct_value_screening',
       '20-30_ct_value_screening', '31-35_ct_value_screening',
       '>35_ct_value_screening', '<20_posct_value_screening',
       '20-30_posct_value_screening', '31-35_posct_value_screening',
       '>35_posct_value_screening', 'negative_rdrp_confirmatory',
       'positive_rdrp_confirmatory', 'Failed_test_rdrp_confirmatory',
       '<20_ct_value_rdrp', '20-30_ct_value_rdrp', '31-35_ct_value_rdrp',
       '>35_ct_value_rdrp', '<20_posct_value_rdrp', '20-30_po

In [None]:
# df = smoothen(df, 5)

In [None]:
series = TimeSeries.from_values(df)

In [None]:
window_size= 1
train, test = series[:train_for], series[train_for:train_for+predict_for]

In [None]:
my_stopper = EarlyStopping(
        monitor="train_loss",
        patience=7,
        min_delta=0.000000001,
        mode='min',
    )

pl_trainer_kwargs={"callbacks": [my_stopper]}

nbeats = NBEATSModel(
  input_chunk_length=window_size, 
  output_chunk_length=1,
  generic_architecture=True,
  pl_trainer_kwargs=pl_trainer_kwargs
)

In [None]:
train_scaler = Scaler()
scaled_train = train_scaler.fit_transform(train)

In [None]:
nbeats.fit(
    scaled_train,
    epochs=100
)

In [None]:
scaled_pred_nbeats = nbeats.predict(n=predict_for)

pred_nbeats = train_scaler.inverse_transform(scaled_pred_nbeats)

In [None]:
from darts.metrics import mape

mape_nbeats = mape(test, pred_nbeats)

print(mape_nbeats)

In [None]:
predictions = pred_nbeats.pd_dataframe().values
actual = test.pd_dataframe().values

In [None]:
import matplotlib.pyplot as plt

def append(a, b):
  c = []
  for ai in a:
    c.append(ai)
  for bi in b:
    c.append(bi)
  return c

In [None]:
window = 100
plt.figure()
plt.title("Predictions vs Actual results(N-hits)")
plt.plot(append(df[train_for - window:train_for], predictions), label="Predictions")
plt.plot(append(df[train_for - window:train_for], actual), label="Actual")
plt.xlabel("Time axis (Predictions for 14 days from " + date)
plt.ylabel("Number of covid cases")
plt.axvline(window, color = 'red', label = "predictions on right of this line")
plt.legend()
plt.show()

In [None]:
nbeats.save("model_" + date + ".pt")

In [None]:
print(actual.reshape(-1))

In [None]:
print(predictions.reshape(-1))