<a href="https://colab.research.google.com/github/Siddharth-Singh-2004/TransformersForForecasting/blob/main/TransformersToForecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# Define device, this can be 'cpu' or 'cuda' for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [27]:
data_folder = '/content/drive/MyDrive'

In [4]:
df=pd.read_csv('/content/drive/MyDrive/revenue.csv')

In [5]:
df['Location'] = "Bangalore"

In [6]:
df['Month']=pd.to_datetime(df['Month'])

  df['Month']=pd.to_datetime(df['Month'])


In [7]:
df['Revenue'] = df['Revenue'].str.replace(',', '').astype(float)

In [8]:
df['Index'] = range(1, len(df) + 1)

In [9]:
df.rename(columns={'Month':'Date'}, inplace=True)

In [10]:
df.head()

Unnamed: 0,Date,Revenue,Location,Index
0,2013-12-01,346200.0,Bangalore,1
1,2014-01-01,579522.0,Bangalore,2
2,2014-02-01,64850.0,Bangalore,3
3,2014-03-01,1907743.0,Bangalore,4
4,2014-04-01,45500.0,Bangalore,5


In [11]:
df.tail()

Unnamed: 0,Date,Revenue,Location,Index
122,2024-02-01,8058888.0,Bangalore,123
123,2024-03-01,8058249.0,Bangalore,124
124,2024-04-01,8823294.0,Bangalore,125
125,2024-05-01,7636227.0,Bangalore,126
126,2024-06-01,8543991.0,Bangalore,127


In [12]:
categorical_covariates = ['Index']

categorical_covariates_num_embeddings = []
for col in categorical_covariates:
  df[col] = df[col].astype('category').cat.codes
  categorical_covariates_num_embeddings.append(df[col].nunique())

categorical_static = ['Location']

categorical_static_num_embeddings = []
for col in categorical_static:
  df[col] = df[col].astype('category').cat.codes
  categorical_static_num_embeddings.append(df[col].nunique())

numeric_covariates = ['Revenue']

target_idx = np.where(np.array(numeric_covariates) == 'Revenue')[0][0]

In [13]:
target_idx

0

In [14]:
df.head()

Unnamed: 0,Date,Revenue,Location,Index
0,2013-12-01,346200.0,0,0
1,2014-01-01,579522.0,0,1
2,2014-02-01,64850.0,0,2
3,2014-03-01,1907743.0,0,3
4,2014-04-01,45500.0,0,4


In [15]:
df.tail()

Unnamed: 0,Date,Revenue,Location,Index
122,2024-02-01,8058888.0,0,122
123,2024-03-01,8058249.0,0,123
124,2024-04-01,8823294.0,0,124
125,2024-05-01,7636227.0,0,125
126,2024-06-01,8543991.0,0,126


In [16]:

def dataframe_to_tensor(df, numeric_covariates, categorical_covariates, categorical_static, target_idx):
  numeric_cov_arr = np.array(df[numeric_covariates].values.tolist())
  category_cov_arr = np.array(df[categorical_covariates].values.tolist())
  static_cov_arr = np.array(df[categorical_static].values.tolist())

  x_numeric = torch.tensor(numeric_cov_arr, dtype=torch.float32).transpose(2, 1)
  x_numeric = torch.log(x_numeric + 1e-5)
  x_category = torch.tensor(category_cov_arr, dtype=torch.long).transpose(2, 1)
  x_static = torch.tensor(static_cov_arr, dtype=torch.long)
  y = torch.tensor(numeric_cov_arr[:, target_idx], dtype=torch.float32)

  return x_numeric, x_category, x_static, y


window_size = 16
forecast_length = 16
num_val = 2

val_max_date = '2024-06-01'
train_max_date = str((pd.to_datetime(val_max_date) - pd.Timedelta(days=(window_size*num_val+forecast_length)*30)).date())

train_final = df[df['Date']<=train_max_date]
val_final = df[(df['Date']>train_max_date) & (df['Date']<=val_max_date)]

train_series = train_final.groupby(['Location']).agg(list).reset_index()
val_series = val_final.groupby(['Location']).agg(list).reset_index()

x_numeric_train_tensor, x_category_train_tensor, x_static_train_tensor, y_train_tensor = dataframe_to_tensor(train_series, numeric_covariates, categorical_covariates, categorical_static, target_idx)
x_numeric_val_tensor, x_category_val_tensor, x_static_val_tensor, y_val_tensor = dataframe_to_tensor(val_series, numeric_covariates, categorical_covariates, categorical_static, target_idx)


In [17]:
df.shape[0]

127

In [33]:

def divide_shuffle(df, div_num):
  if div_num == 0:  # Handle the case of an empty DataFrame
    return df
  space = df.shape[0]//div_num
  division = np.arange(0, df.shape[0], space)
  return pd.concat([df.iloc[division[i]:division[i]+space, :].sample(frac=1) for i in range(len(division))])

def create_time_blocks(time_length, window_size, forecast_length):
  start_idx = np.random.randint(0, window_size - 1)
  end_idx = time_length - window_size - forecast_length - 1
  time_indices = np.arange(start_idx, end_idx+1, window_size)[:-1]
  time_indices = np.append(time_indices, end_idx)
  return time_indices


In [19]:

def data_loader(x_numeric_tensor, x_category_tensor, x_static_tensor, y_tensor, batch_size, time_shuffle):

  num_series = x_numeric_tensor.shape[0]
  time_length = x_numeric_tensor.shape[1]
  index_pd = pd.DataFrame({'serie_idx':range(num_series)})
  index_pd['time_idx'] = [create_time_blocks(time_length, window_size, forecast_length) for n in range(index_pd.shape[0])]
  if time_shuffle:
    index_pd = index_pd.explode('time_idx')
    index_pd = index_pd.sample(frac=1)
  else:
    index_pd = index_pd.explode('time_idx').sort_values('time_idx')
    index_pd = divide_shuffle(index_pd,5)
  indices = np.array(index_pd).astype(int)

  for batch_idx in np.arange(0, indices.shapte[0], batch_size):
    cur_indices = indices[batch_idx:batch_idx+batch_size]

    x_numeric = torch.stack([x_numeric_tensor[n[0], n[1]:n[1]+window_size, :] for n in cur_indices])
    x_category = torch.stack([x_category_tensor[n[0], n[1]:n[1]+window_size, :] for n in cur_indices])
    x_static = torch.stack([x_static_tensor[n[0],:] for n in cur_indices])
    y = torch.stack([y_tensor[n[0], n[1]+window_size:n[1]+window_size+forecast_length] for n in cur_indices])

    yield x_numeric.to(device), x_category.to(device), x_static.to(device), y.to(device)

def val_loader(x_numeric_tensor, x_category_tensor, x_static_tensor, y_tensor, batch_size, num_val):

  num_time_series = x_numeric_tensor.shape[0]

  for i in range(num_val):

    for batch_idx in np.arange(0, num_time_series, batch_size):

      x_numeric = x_numeric_tensor[batch_idx:batch_idx+batch_size, window_size*i:window_size(i+1), :]
      x_category = x_category_tensor[batch_idx:batch_idx+batch_size, window_size*i:window_size(i+1), :]
      x_static = x_static_tensor[batch_idx:batch_idx+batch_size]
      y_val = y_tensor[batch_idx:batch_idx+batch_size, window_size*(i+1):window_size*(i+1)+forecast_length]

      yield x_numeric.to(device), x_category.to(device), x_static.to(device), y.to(device)



In [20]:
from torch import nn

In [25]:

class transformer_block(nn.Module):

  def __init__(self, embed_size, num_heads):
    super(transformer_block, self).__init__()

    self.attention = nn.MultiheadAttention(embed_size, num_heads, batch_first=True)
    self.fc = nn.Sequential(nn.Linear(embed_size, embed_size*4),
                            nn.LeakyReLU(),
                            nn.Linear(embed_size*4, embed_size))
    self.dropout = nn.Dropout(drop_prob)
    self.ln1 = nn.LayerNorm(embed_size, eps=1e-6)
    self.ln2 = nn.LayerNorm(embed_size, eps=1e-6)

  def forward(self, x):

    attn_out, _ = self.attention(x, x, x, need_weights=False)
    x = x + self.dropout(attn_out)
    x = self.ln1(x)

    fc_out = self.fc(x)
    x = x + self.dropout(fc_out)
    x = self.ln2(x)

    return x


class transformer_forecaster(nn.Module):

  def __init__(self, embed_size, num_heads, num_blocks):
    super(transformer_forecaster, self).__init__()

    num_len = len(numeric_covariates)
    self.embedding_cov = nn.ModuleList([nn.Embedding(n, embed_size-num_len) for n in categorical_covariates_num_embeddings])
    self.embedding_static = nn.ModuleList([nn.Embedding(n, embed_size-num_len) for n in categorical_static_num_embeddings])

    self.blocks = nn.ModuleList([transformer_block(embed_size, num_heads) for n in range(num_blocks)])

    self.forecast_head = nn.Sequential(nn.Linear(embed_size, embed_size*2),
                                       nn.LeakyReLU(),
                                       nn.Dropout(drop_prob),
                                       nn.Linear(embed_size*2, embed_size*4),
                                       nn.LeakyReLU(),
                                       nn.Linear(embed_size*4, forecast_length),
                                       nn.ReLU())

  def forward(self, x_numeric, x_category, x_static):

    tmp_list = []
    for i, embed_layer in enumerate(self.embedding_static):
      tmp_list.append(embed_layer(x_static[:, i]))
    categorical_static_embeddings = torch.stack(tmp_list).mean(dim=0).unsqueeze(1)

    tmp_list = []
    for i, embed_layer in enumerate(self.embedding_cov):
      tmp_list.append(embed_layer(x_category[:, :, i]))
    categorical_covariates_embeddings = torch.stack(tmp_list).mean(dim=0)

    T = categorical_covariates_embeddings.shape[1]

    embed_out = (categorical_covariates_embeddings + categorical_static_embeddings.repeat(1, T, 1))/2
    x = torch.concat((x_numeric, embed_out), dim=-1)

    for block in self.blocks:
      x = block(x)

    x = x.mean(dim=1)
    x = self.forecast_head(x)

    return x


In [22]:

class RMSELoss(nn.Module):

  def __init__(self):
    super().__init__()
    self.mse = nn.MSELoss()

  def forward(self, pred, actual):
    return torch.sqrt(self.mse(torch.log(pred + 1), torch.log(actual + 1)))



In [26]:

num_epoch = 1000
min_val_loss = 999

num_blocks = 1
embed_size = 500
num_heads = 50
batch_size = 128
learning_rate = 3e-4
time_shuffle = False
drop_prob = 0.1

model = transformer_forecaster(embed_size, num_heads, num_blocks).to(device)
criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)


In [34]:
for epoch in range(num_epoch):

  batch_loader = data_loader(x_numeric_train_tensor, x_category_train_tensor, x_static_train_tensor, y_train_tensor, batch_size, time_shuffle)
  train_loss = 0
  counter = 0

  model.train()
  for x_numeric, x_category, x_static, y in batch_loader:

    optimizer.zero_grad()
    preds = model(x_numeric, x_category, x_static)
    loss = criterion(preds, y)
    train_loss += loss.item()
    counter += 1
    loss.backward()
    optimizer.step()

  train_loss = train_loss/counter
  print(f'Epoch {epoch+1} train loss: {train_loss}')

  model.eval()
  val_batches = val_loader(x_numeric_val_tensor, x_category_val_tensor, x_static_val_tensor, y_val_tensor, batch_size, num_val)
  val_loss = 0
  counter = 0
  for x_numeric_val, x_category_val, x_static_val, y_val in val_batches:
    with torch.no_grad():
      preds = model(x_numeric_val, x_category_val, x_static_val)
      loss = criterion(preds, y_val).item()
    val_loss += loss
    counter += 1
  val_loss = val_loss/counter
  print(f'Epoch {epoch+1} val loss: {val_loss}')

  if val_loss < min_val_loss:
    print('saved')
    torch.save(model, data_folder+'/best_model')
    min_val_loss = val_loss

  scheduler.step()

ZeroDivisionError: division by zero