## Import Lib

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pandas as pd
import numpy as np

import matplotlib.pylab as plt

In [37]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## Read Data

In [20]:
df = pd.read_csv('AirPassengers.csv')
print(df.shape)
df.head(5)

(144, 2)


Unnamed: 0,Month,#Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


##  Data preprocessing


In [44]:
# Preprocess the data
scaler = MinMaxScaler(feature_range=(-1, 1))
df['Scaled'] = scaler.fit_transform(df['#Passengers'].values.reshape(-1,1))
df

Unnamed: 0,Month,#Passengers,Scaled
0,1949-01,112,-0.969112
1,1949-02,118,-0.945946
2,1949-03,132,-0.891892
3,1949-04,129,-0.903475
4,1949-05,121,-0.934363
...,...,...,...
139,1960-08,606,0.938224
140,1960-09,508,0.559846
141,1960-10,461,0.378378
142,1960-11,390,0.104247


In [45]:
# Convert data to PyTorch tensors
data_tensor = torch.FloatTensor(data['Scaled'].values).view(-1)

In [46]:
# Create sequences from the time series data
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        sequence = data[i:i+seq_length]
        target = data[i+seq_length:i+seq_length+1]
        sequences.append((sequence, target))
    return sequences


seq_length = 12
sequences = create_sequences(data_tensor, seq_length)

##### Split Data

In [33]:

# Split the dataset into training and testing sets
train_size = int(len(sequences) * 0.8)
test_size = len(sequences) - train_size
train_data, test_data = torch.utils.data.random_split(sequences, [train_size, test_size])


## Stacked LSTM

In [47]:
class StackedLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(StackedLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)

        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) # initial hidden state
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) # initial cell state

        out, _ = self.lstm1(x, (h0, c0))
        out, _ = self.lstm2(out, (h0, c0))

        out = self.fc(out[:, -1, :]) # get last hidden state and pass it through the fully connected layer
        return out

In [49]:
input_size = 1
hidden_size = 100
num_layers = 3
output_size = 1
num_epochs = 100
learning_rate = 0.001

# Instantiate the model
model = StackedLSTM(input_size, hidden_size, num_layers, output_size)
model.to(device)

StackedLSTM(
  (lstm1): LSTM(1, 100, num_layers=3, batch_first=True)
  (lstm2): LSTM(100, 100, num_layers=3, batch_first=True)
  (fc): Linear(in_features=100, out_features=1, bias=True)
)

In [50]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

##### Train model

In [52]:
for epoch in range(num_epochs):
    for sequence, target in train_data:
        sequence = sequence.unsqueeze(0).unsqueeze(2).to(device)
        target = target.unsqueeze(0).unsqueeze(2).to(device)

        # Forward pass
        outputs = model(sequence)
        loss = criterion(outputs, target)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [10/100], Loss: 0.0052
Epoch [20/100], Loss: 0.0017
Epoch [30/100], Loss: 0.0000
Epoch [40/100], Loss: 0.0004
Epoch [50/100], Loss: 0.0098
Epoch [60/100], Loss: 0.0162
Epoch [70/100], Loss: 0.0181
Epoch [80/100], Loss: 0.0128
Epoch [90/100], Loss: 0.0096
Epoch [100/100], Loss: 0.0069


##### Evaluate Model

In [57]:
model.eval()
with torch.no_grad():
    total_loss = 0.0
    num_samples = 0
    
    for sequence, target in test_data:
        sequence = sequence.unsqueeze(0).unsqueeze(2).to(device)
        target = target.unsqueeze(0).unsqueeze(2).to(device)
        outputs = model(sequence)
        
        loss = criterion(outputs, target)
        total_loss += loss.item() * sequence.size(0)  # Accumulate the total loss
        num_samples += sequence.size(0)
        
    # Calculate average test loss
    avg_loss = total_loss / num_samples
    print(f'Average test loss: {avg_loss:.4f}')

Average test loss: 0.0083
