In [None]:
import pandas as pd
import numpy as np

data = pd.read_hdf('/content/metr-la.h5')
# Reshape the data to have a single column for the average speed measurement
speeds = data.values.reshape(-1, len(data.columns), order='F').T.reshape(-1)

# Create a new DataFrame with the reshaped data
df = pd.DataFrame({'sensor_id': np.repeat(data.columns, len(data)),
                   'time_sequence': np.tile(data.index, len(data.columns)),
                   'speed': speeds})

df.head(5)
df.shape

(7094304, 3)

In [None]:
data = pd.read_hdf('/content/metr-la.h5')
data.head(5)

Unnamed: 0,773869,767541,767542,717447,717446,717445,773062,767620,737529,717816,...,772167,769372,774204,769806,717590,717592,717595,772168,718141,769373
2012-03-01 00:00:00,64.375,67.625,67.125,61.5,66.875,68.75,65.125,67.125,59.625,62.75,...,45.625,65.5,64.5,66.428571,66.875,59.375,69.0,59.25,69.0,61.875
2012-03-01 00:05:00,62.666667,68.555556,65.444444,62.444444,64.444444,68.111111,65.0,65.0,57.444444,63.333333,...,50.666667,69.875,66.666667,58.555556,62.0,61.111111,64.444444,55.888889,68.444444,62.875
2012-03-01 00:10:00,64.0,63.75,60.0,59.0,66.5,66.25,64.5,64.25,63.875,65.375,...,44.125,69.0,56.5,59.25,68.125,62.5,65.625,61.375,69.857143,62.0
2012-03-01 00:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2012-03-01 00:20:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Extract the speeds from the dataset
speeds = np.array(data.iloc[:, :])

# Normalize the speeds
mean = np.mean(speeds)
std = np.std(speeds)
speeds = (speeds - mean) / std
speeds


array([[0.5259243 , 0.68632759, 0.66165016, ..., 0.27298067, 0.75419051,
        0.40253716],
       [0.44160976, 0.73225502, 0.57870658, ..., 0.10709351, 0.72677115,
        0.45189202],
       [0.50741623, 0.49507752, 0.30999681, ..., 0.37785974, 0.79649468,
        0.40870652],
       ...,
       [0.6554808 , 0.29354519, 0.78160988, ..., 0.474513  , 0.67741629,
        0.37031941],
       [0.64314209, 0.42104524, 0.60612595, ..., 0.45806138, 0.6986663 ,
        0.48273881],
       [0.56225496, 0.64999693, 0.64451306, ..., 0.48548074, 0.7377389 ,
        0.39773878]])

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader



In [None]:
# Define a function to create sliding windows
def create_sliding_windows(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size - 1):
        window = data[i:(i + window_size), :]
        X.append(window)
        y.append(data[i + window_size, :])
    return np.array(X), np.array(y)

# Create sliding windows
window_size = 12 # 1 hour of data
X, y = create_sliding_windows(speeds, window_size)

# Split the data into training and testing sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Define a PyTorch dataset
class TrafficDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create PyTorch dataloaders
train_dataset = TrafficDataset(X_train, y_train)
test_dataset = TrafficDataset(X_test, y_test)
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# Define a PyTorch model
class TrafficModel(torch.nn.Module):
    def __init__(self):
        super(TrafficModel, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=(3, 3), padding=(1, 1))
        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=(3, 3), padding=(1, 1))
        self.conv3 = torch.nn.Conv2d(64, 128, kernel_size=(3, 3), padding=(1, 1))
        self.fc1 = torch.nn.Linear(128 * 12 * 207, 512)
        self.fc2 = torch.nn.Linear(512, 207)
        
    def forward(self, x):
        x = x.unsqueeze(1)
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = x.view(-1, 128 * 12 * 207)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define a PyTorch loss function and optimizer
model = TrafficModel()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_dataloader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
    # Evaluate the model on the test set
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in test_dataloader:
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            test_loss += loss.item()
    
    # Print the epoch and the training and test losses
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")

# Predict the traffic speed
model.eval()
with torch.no_grad():
    X_pred = torch.tensor(X_test, dtype=torch.float32)
    y_pred = model(X_pred)

# Denormalize the predicted speeds
y_pred = y_pred.numpy()
y_pred = (y_pred * std) + mean


Epoch 1/10, Train Loss: 91.5398, Test Loss: 13.7545
Epoch 2/10, Train Loss: 30.7539, Test Loss: 11.2516
Epoch 3/10, Train Loss: 26.4202, Test Loss: 10.5651
Epoch 4/10, Train Loss: 24.2465, Test Loss: 10.2991
Epoch 5/10, Train Loss: 22.1717, Test Loss: 9.9344
Epoch 6/10, Train Loss: 20.4062, Test Loss: 9.7108
Epoch 7/10, Train Loss: 18.7976, Test Loss: 9.7140
Epoch 8/10, Train Loss: 18.1102, Test Loss: 10.2646
Epoch 9/10, Train Loss: 16.6651, Test Loss: 10.0106
Epoch 10/10, Train Loss: 15.4443, Test Loss: 9.8841
