<a href="https://colab.research.google.com/github/Regina-Arthur/Coding-Practice-Projects/blob/main/Pytorch_tutorial/RNN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
import pandas as pd

# Load CSV
df = pd.read_csv("/content/drive/MyDrive/traffic_prediction_dataset/traffic.csv")

print(df.head())

              DateTime  Junction  Vehicles           ID
0  2015-11-01 00:00:00         1        15  20151101001
1  2015-11-01 01:00:00         1        13  20151101011
2  2015-11-01 02:00:00         1        10  20151101021
3  2015-11-01 03:00:00         1         7  20151101031
4  2015-11-01 04:00:00         1         9  20151101041


In [3]:
# Convert to datetime
df['datetime'] = pd.to_datetime(df['DateTime'])

# Extract day of week
df['day_of_week'] = df['datetime'].dt.day_name()
df['day_of_week_num'] = df['datetime'].dt.day_of_week

# Extract time (hour and minute)
df['hour'] = df['datetime'].dt.hour

df['minute'] = df['datetime'].dt.minute


# Optional: keep only those columns
df = df[['day_of_week', 'day_of_week_num', 'hour', 'minute', 'Junction', 'Vehicles']]

print(df.head())

  day_of_week  day_of_week_num  hour  minute  Junction  Vehicles
0      Sunday                6     0       0         1        15
1      Sunday                6     1       0         1        13
2      Sunday                6     2       0         1        10
3      Sunday                6     3       0         1         7
4      Sunday                6     4       0         1         9


In [4]:
# Normalize hour
mean =df['hour'].mean()
std =df['hour'].std()
df['hour_norm'] = (df['hour']- mean)/std

mean =df['day_of_week_num'].mean()
std =df['day_of_week_num'].std()
df['day_of_week_num_norm'] = (df['day_of_week_num']- mean)/std

mean =df['Vehicles'].mean()
std =df['Vehicles'].std()
df['vehicles_norm'] = (df['Vehicles']- mean)/std

mean =df['Junction'].mean()
std =df['Junction'].std()
df['junction_norm'] = (df['Junction']- mean)/std

# Optional: keep only those columns
df = df[['day_of_week_num_norm', 'hour_norm', 'junction_norm', 'vehicles_norm']]

print(df.head())

   day_of_week_num_norm  hour_norm  junction_norm  vehicles_norm
0              1.501982  -1.661308      -1.220893      -0.375485
1              1.501982  -1.516846      -1.220893      -0.471870
2              1.501982  -1.372384      -1.220893      -0.616448
3              1.501982  -1.227923      -1.220893      -0.761026
4              1.501982  -1.083461      -1.220893      -0.664641


In [16]:
import numpy as np

# Select features (inputs)
features = ['day_of_week_num_norm', 'hour_norm', 'junction_norm', 'vehicles_norm']
data = df[features].values.astype(np.float32)

# Sequence length
seq_len = 1
X, y = [], []

for i in range(len(data) - seq_len):
    X.append(data[i:i+seq_len])
    y.append(data[i+1: i+1+seq_len])

X = np.array(X)
y = np.array(y).reshape(len(y),-1)

print(X.shape, y.shape)  # e.g. (N, 24, 4), (N, 1)

(48119, 1, 4) (48119, 4)


In [18]:
print(y)

[[ 1.5019825  -1.516846   -1.2208925  -0.4718701 ]
 [ 1.5019825  -1.3723844  -1.2208925  -0.616448  ]
 [ 1.5019825  -1.2279229  -1.2208925  -0.76102585]
 ...
 [ 0.5019908   1.3723844   1.8816291  -0.32729223]
 [ 0.5019908   1.516846    1.8816291  -0.03813647]
 [ 0.5019908   1.6613075   1.8816291  -0.52006274]]


In [19]:
import torch

X = torch.tensor(X)
y = torch.tensor(y)

print(X.shape, y.shape)

X_train = X[:40000]
X_test = X[40001:]
print(X_train.shape, X_test.shape)

y_train = y[:40000]
y_test = y[40001:]
print(y_train.shape, y_test.shape)

torch.Size([48119, 1, 4]) torch.Size([48119, 4])
torch.Size([40000, 1, 4]) torch.Size([8118, 1, 4])
torch.Size([40000, 4]) torch.Size([8118, 4])


In [20]:
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


In [58]:
import torch.nn as nn

class TimeSeriesRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # take last time step output
        return out

model = TimeSeriesRNN(input_size=X.shape[2], hidden_size=64, num_layers=10, output_size=4)


In [59]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")


Epoch 1, Loss: 0.0478
Epoch 2, Loss: 0.0446
Epoch 3, Loss: 0.0462
Epoch 4, Loss: 0.0124
Epoch 5, Loss: 0.0070
Epoch 6, Loss: 0.0110
Epoch 7, Loss: 0.0066
Epoch 8, Loss: 0.0110
Epoch 9, Loss: 0.0150
Epoch 10, Loss: 0.0920


In [60]:
class TimeSeriesGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.GRU = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.norm = nn.LayerNorm(hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.GRU(x)
        out = self.fc(out[:, -1, :])  # take last time step output
        return out

model = TimeSeriesGRU(input_size=X.shape[2], hidden_size=64, num_layers=10, output_size=4)


In [61]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.1532
Epoch 2, Loss: 0.0872
Epoch 3, Loss: 0.1173
Epoch 4, Loss: 0.0710
Epoch 5, Loss: 0.0425
Epoch 6, Loss: 0.0226
Epoch 7, Loss: 0.0184
Epoch 8, Loss: 0.0096
Epoch 9, Loss: 0.0080
Epoch 10, Loss: 0.0320


In [62]:
model.eval()  # set to evaluation mode
with torch.no_grad():
    predictions = model(X_test).squeeze().cpu().numpy()
    ground_truth = y_test.cpu().numpy()


for i in range(10):
    print(f"Prediction: {predictions[i]} | Ground Truth: {ground_truth[i]}")





Prediction: [-1.033073    0.898086    0.866308   -0.18318078] | Ground Truth: [-0.9979966   0.9389999   0.8474552  -0.23090698]
Prediction: [-1.0335131  1.0605503  0.8674403 -0.1104661] | Ground Truth: [-0.9979966  1.0834614  0.8474552 -0.4718701]
Prediction: [-1.0339342   1.2450196   0.89373726 -0.2797938 ] | Ground Truth: [-0.9979966   1.2279229   0.8474552  -0.42367747]
Prediction: [-1.0306605   1.4407966   0.90193266 -0.2709482 ] | Ground Truth: [-0.9979966   1.3723844   0.8474552  -0.18271434]
Prediction: [-1.0022185  1.5188355  0.8862902 -0.1455847] | Ground Truth: [-0.9979966   1.516846    0.8474552  -0.52006274]
Prediction: [-0.6130381   0.47600532  0.8734658  -0.43149278] | Ground Truth: [-0.9979966  1.6613075  0.8474552 -0.4718701]
Prediction: [ 0.11223    -1.5659701   0.826046   -0.56976056] | Ground Truth: [-0.4980008  -1.6613075   0.8474552  -0.56825536]
Prediction: [-0.6701043 -1.4443746  0.8170897 -0.6789016] | Ground Truth: [-0.4980008 -1.516846   0.8474552 -0.4718701]


In [48]:
class TimeSeriesLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.LSTM = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.LSTM(x)
        out = self.fc(out[:, -1, :])  # take last time step output
        return out

model = TimeSeriesLSTM(input_size=X.shape[2], hidden_size=64, num_layers=1, output_size=4)


In [49]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.1225
Epoch 2, Loss: 0.1119
Epoch 3, Loss: 0.0797
Epoch 4, Loss: 0.0708
Epoch 5, Loss: 0.0291
Epoch 6, Loss: 0.1504
Epoch 7, Loss: 0.0122
Epoch 8, Loss: 0.0350
Epoch 9, Loss: 0.0622
Epoch 10, Loss: 0.0699
