In [None]:
!pip install darts

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting darts
  Downloading darts-0.24.0-py3-none-any.whl (693 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m693.9/693.9 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting tbats>=1.1.0
  Downloading tbats-1.1.2-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning>=1.5.0
  Downloading pytorch_lightning-2.0.1.post0-py3-none-any.whl (718 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.6/718.6 kB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
Collecting pyod>=0.9.5
  Downloading pyod-1.0.9.tar.gz (149 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.0/150.0 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorboardX>=2.1
  Downloading t

In [None]:
from darts.models import NBEATSModel
from darts.dataprocessing.transformers import Scaler
from darts import TimeSeries
from darts.metrics import mape
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

In [None]:
train_for = 500
predict_for = 14

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def smoothen(df, period):
    df_ma = df.copy()
    for i in range(period, len(df)):
        ma = 0
        for j in range(i - period + 1, i + 1):
            ma += df[j]
        df_ma[i] = ma / period
    return df_ma

In [None]:
df = pd.read_csv('/content/drive/MyDrive/covid-final-all.csv')
df['state_residence'] = df.apply(lambda row: row['state_residence'].lower(), axis=1)
print(df.columns)
print(df.state_residence.unique())
df.head()

In [None]:
df=df.groupby(['Date']).agg({'F_pos': 'sum', 'M_pos': 'sum', 'T_pos':'sum', 'NIA_pos':'sum', 'new_case': 'sum'}).reset_index()
df.head()

date = df.iloc[train_for + 1]['Date']

In [None]:
class Model:
  def __init__(self, df, epochs = 100):
    self.df = df
    self.epochs = epochs
    self.window_size= 50
    # self.df = smoothen(self.df, 5)
    self.series = TimeSeries.from_values(df)
    self.train_scaler = Scaler()
    self.scaled_train = self.train_scaler.fit_transform(self.series)

    my_stopper = EarlyStopping(
            monitor="train_loss",
            patience=5,
            min_delta=0.000000005,
            mode='min',
        )

    pl_trainer_kwargs={"callbacks": [my_stopper]}

    self.nbeats = NBEATSModel(
      input_chunk_length=self.window_size, 
      output_chunk_length=1,
      generic_architecture=True,
      pl_trainer_kwargs=pl_trainer_kwargs,
      dropout=0.15
    )

  def fit(self):
    self.nbeats.fit(
      self.scaled_train,
      epochs=self.epochs)
  
  def predict(self, predict_for):
    scaled_pred = self.nbeats.predict(n=predict_for)
    pred = self.train_scaler.inverse_transform(scaled_pred)

    return pred

In [None]:
df_f = df['F_pos'].values
df_m = df['M_pos'].values
df_t = df['T_pos'].values
df_nia = df['NIA_pos'].values

df_total = df_f + df_m + df_t + df_nia

# n = 70  # the larger n is, the smoother curve will be
# b = [1.0 / n] * n
# a = 1
# df_total = lfilter(b, a, df_total)
# df_total = smoothen(df_total, 10)

In [None]:
plt.plot(df_total)

In [None]:
model_f = Model(df_f[:train_for])
model_m = Model(df_m[:train_for])
model_t = Model(df_t[:train_for])
model_nia = Model(df_nia[:train_for])

model_f.fit()
model_m.fit()
model_t.fit()
model_nia.fit()

In [None]:
model_f.nbeats.save("/content/drive/MyDrive/saved_models/model_f" + date + ".pt")
model_m.nbeats.save("/content/drive/MyDrive/saved_models/model_m" + date + ".pt")
model_t.nbeats.save("/content/drive/MyDrive/saved_models/model_t" + date + ".pt")
model_nia.nbeats.save("/content/drive/MyDrive/saved_models/model_nia" +  date + ".pt")

In [None]:
def mape_(p, a):
  m = 0.0
  for i in range(len(p)):
    m += abs(p[i] - a[i]) / a[i]
  return( m / len(p)) * 100

In [None]:
predictions = model_f.predict(predict_for) + model_m.predict(predict_for) + model_t.predict(predict_for) + model_nia.predict(predict_for)

actual = TimeSeries.from_values(df_total[train_for:train_for+predict_for])

predictions = predictions.pd_dataframe().values
actual = actual.pd_dataframe().values

MAPE = mape_(actual, predictions)
print("MAPE:", MAPE)

In [None]:
print(MAPE)

In [None]:
import matplotlib.pyplot as plt

def append(a, b):
  c = []
  for ai in a:
    c.append(ai)
  for bi in b:
    c.append(bi)
  return c

In [None]:
window = 100
plt.figure()
plt.title("Predictions vs Actual results(N-hits)")
plt.plot(append(df_total[train_for - window:train_for], predictions), label="Predictions")
plt.plot(append(df_total[train_for - window:train_for], actual), label="Actual")
plt.xlabel("Time axis (Predictions for 14 days from " + date)
plt.ylabel("Number of covid cases")
plt.axvline(window, color = 'red', label = "predictions on right of this line")
plt.legend()
plt.show()

In [None]:
print(predictions.reshape(-1))

In [None]:
print(actual.reshape(-1))