In [8]:
import gzip
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import random

# Data-Processing 
(same process as in Transformer's)

In [10]:
yjmob1 = 'yjmob100k-dataset1.csv.gz' # dataset under normal scenes
yjmob_df = pd.read_csv(yjmob1, compression='gzip').sort_values(by=['uid', 'd', 't'], ignore_index=True)

# Retrieve all ids
uids = yjmob_df['uid'].unique()

# Just to reduce memory space
rand_indicies = [random.randint(0, len(uids)) for _ in range(200)] # only 200 data would be used
selected_uids = [uid for uid in uids[rand_indicies]] # selected_uids = uids[:200]
# selected_uids = uids[:200]

df = yjmob_df[yjmob_df['uid'].isin(selected_uids)] 

# Time
# df['combined_t'] = df['d']*47+df['t']

# Location
def spatial_token(x, y):
    return (x-1)+(y-1)*200
df['combined_xy'] = df.apply(lambda row: spatial_token(row['x'], row['y']), axis=1)

# Sort value
df = df.sort_values(by=['uid', 't'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['combined_xy'] = df.apply(lambda row: spatial_token(row['x'], row['y']), axis=1)


# Train-Test Split 
(~Transformer's)

In [11]:
# 7:3 split
train_uids, test_uids = train_test_split(selected_uids, test_size=0.30, random_state=42)

# Load training and testing data
df_train = df[df['uid'].isin(train_uids)]
df_test = df[df['uid'].isin(test_uids)]

# Batching 
(~Transformer's)

In [40]:
BATCH_SIZE = 55
STEP_SIZE = 600 ## TODO

In [41]:
def generate_sequences(data, data_t):
    return torch.tensor(data[:STEP_SIZE]),torch.tensor(data[STEP_SIZE]),\
                torch.tensor(data_t[:STEP_SIZE]),torch.tensor(data_t[STEP_SIZE])

In [42]:
# Group data by uid
grouped_data_train = df_train[['uid', 't', 'combined_xy']].groupby('uid')
grouped_data_train = [group for _, group in df_train.groupby('uid')]
grouped_data_test = df_test[['uid', 't', 'combined_xy']].groupby('uid')
grouped_data_test = [group for _, group in df_test.groupby('uid')]

In [43]:
class TrajectoryDataset(Dataset):
    def __init__(self, grouped_data):
        self.data = grouped_data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data_for_uid = self.data[idx]
        inputs, labels, positions, label_positions = generate_sequences(
                                                         data_for_uid['combined_xy'].values.tolist(),
                                                         data_for_uid['t'].values.tolist())
        return inputs, labels, positions, label_positions

train_dataset = TrajectoryDataset(grouped_data_train)
test_dataset = TrajectoryDataset(grouped_data_test)

In [44]:
def collate_fn(batch):
    # Unzip all batch
    inputs_batch, labels_batch, positions_batch, label_positions_batch = zip(*batch)
    
    # Pad the sequence with less length in a batch
    inputs_padded = torch.nn.utils.rnn.pad_sequence(inputs_batch, padding_value=0.0, batch_first=True)
    labels_padded = torch.tensor(np.array(labels_batch))
    positions_padded = torch.nn.utils.rnn.pad_sequence(positions_batch, padding_value=0, batch_first=True)
    label_positions_padded = torch.tensor(np.array(label_positions_batch))
    
    # Doing Addition here
    return inputs_padded+positions_padded, labels_padded+label_positions_padded

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

In [45]:
# Example
for inputs, labels in test_dataloader:
    print("Location Shape:", inputs.shape)
    print("Desired output Location Shape:", labels.shape)
    break

Location Shape: torch.Size([55, 600])
Desired output Location Shape: torch.Size([55])


# LSTM
https://machinelearningmastery.com/lstm-for-time-series-prediction-in-pytorch/

### Model Building (nn.PyTorch built-in LSTM)

In [46]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, embed_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.embed_dim = embed_dim
        self.layer_dim = layer_dim
        self.lstm = nn.LSTM(input_dim, embed_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(embed_dim, output_dim)
    
    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.layer_dim, x.size(0), self.embed_dim).to(x.device)
        c0 = torch.zeros(self.layer_dim, x.size(0), self.embed_dim).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Taking the output of the last sequence step
        out = self.fc(out[:, -1, :])
        return out

### Training

In [49]:
# Data related param
BATCH_SIZE = 50
STEP_SIZE = 600 # seq_size

# Model related param
EMBED_DIM = 256
INPUT_DIM = 1
LAYER_DIM = 2 
NUM_CLASS = 40000 # 200*200 grid loc

model = LSTMModel(input_dim=INPUT_DIM, embed_dim=EMBED_DIM, layer_dim=LAYER_DIM, output_dim=NUM_CLASS)
optimizer = optim.Adam(model.parameters(), lr=0.01316687985668029)
criterion = nn.CrossEntropyLoss()

```
Best parameters: {'batch_size': 55, 'embed_dim': 256, 'layer_dim': 2, 'learning_rate': 0.01316687985668029}
Best loss: 5.887406706809998
```

In [50]:
epochs = 10

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for inputs, labels in train_dataloader:
        inputs = inputs.float().unsqueeze(-1)
        labels = labels.long()
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch}: Loss {total_loss / len(train_dataloader)}")

Epoch 0: Loss 10.638633410135904
Epoch 1: Loss 6.900217215220134
Epoch 2: Loss 5.385567824045817
Epoch 3: Loss 5.324960867563884
Epoch 4: Loss 5.251415252685547
Epoch 5: Loss 5.121610959370931
Epoch 6: Loss 5.1421799659729
Epoch 7: Loss 5.152088801066081
Epoch 8: Loss 5.067200342814128
Epoch 9: Loss 5.041619777679443


### Hyperparameter-tuning (TODO)

In [26]:
import optuna

In [25]:
batch_sizes = [int(i) for i in range(5,101,5)]

In [37]:
def objective(trial):
    # Set up dataloader
    batch_size = trial.suggest_categorical('batch_size', batch_sizes) 
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    
    # Model Parameters
    NUM_CLASS = 40000
    STEP_SIZE = 600
    EMBED_DIM = trial.suggest_categorical('embed_dim', [64, 128, 256, 512])
    LAYER_DIM = trial.suggest_int('layer_dim', 1, 6)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Train the model
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    
    # Model instantiation
    model = LSTMModel(input_dim=1,
                      embed_dim=EMBED_DIM, 
                      layer_dim=LAYER_DIM, 
                      output_dim=NUM_CLASS)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    # Training loop
    total_loss = 0
    total_samples = 0
    for epoch in range(epochs):
        model.train()
        for inputs, labels in train_dataloader:
            inputs = inputs.float().unsqueeze(-1)
            labels = labels.long()
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item() * labels.size(0)
            total_samples += labels.size(0)
    
    final_avg_loss = total_loss / total_samples
    return final_avg_loss

In [39]:
# Hyperparameter tuning

# Create a study object and optimize the objective function
study = optuna.create_study()
study.optimize(objective, n_trials=50)

# Result
print('Best parameters:', study.best_params)
print('Best loss:', study.best_value)

[I 2024-06-03 18:45:42,611] A new study created in memory with name: no-name-c8668aba-a641-4d3e-acb8-9b0a34f35306
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
[I 2024-06-03 18:46:41,839] Trial 0 finished with value: 8.455938536780222 and parameters: {'batch_size': 100, 'embed_dim': 256, 'layer_dim': 5, 'learning_rate': 0.06594799113977783}. Best is trial 0 with value: 8.455938536780222.
[I 2024-06-03 18:48:03,823] Trial 1 finished with value: 9.804386799676077 and parameters: {'batch_size': 10, 'embed_dim': 128, 'layer_dim': 3, 'learning_rate': 0.00011053968934568852}. Best is trial 0 with value: 8.455938536780222.
[I 2024-06-03 18:50:53,270] Trial 2 finished with value: 7.077436123575483 and parameters: {'batch_size': 75, 'embed_dim': 512, 'layer_dim': 6, 'learning_rate': 0.007682081821027612}. Best is trial 2 with value: 7.077436123575483.
[I 2024-06-03 18:51:24,923] Trial 3 finished with value: 5.956384319918496 and parameters: {'batch_size': 45, 'embed_di

[I 2024-06-03 19:17:59,688] Trial 34 finished with value: 6.133187757219587 and parameters: {'batch_size': 55, 'embed_dim': 256, 'layer_dim': 3, 'learning_rate': 0.005901227143346018}. Best is trial 13 with value: 5.887406706809998.
[I 2024-06-03 19:19:00,826] Trial 35 finished with value: 6.502784276008606 and parameters: {'batch_size': 70, 'embed_dim': 256, 'layer_dim': 6, 'learning_rate': 0.027215063770919395}. Best is trial 13 with value: 5.887406706809998.
[I 2024-06-03 19:20:11,016] Trial 36 finished with value: 6.170829691205706 and parameters: {'batch_size': 80, 'embed_dim': 512, 'layer_dim': 3, 'learning_rate': 0.012873985594755052}. Best is trial 13 with value: 5.887406706809998.
[I 2024-06-03 19:21:01,235] Trial 37 finished with value: 6.297863163266864 and parameters: {'batch_size': 30, 'embed_dim': 128, 'layer_dim': 4, 'learning_rate': 0.08743119231230986}. Best is trial 13 with value: 5.887406706809998.
[I 2024-06-03 19:21:12,203] Trial 38 finished with value: 6.454817969

Best parameters: {'batch_size': 55, 'embed_dim': 256, 'layer_dim': 2, 'learning_rate': 0.01316687985668029}
Best loss: 5.887406706809998


### Inference

In [45]:
softmax = nn.Softmax(dim=1)
model.eval()
with torch.no_grad():
    for inputs, labels in test_dataloader:
        inputs = inputs.float().unsqueeze(-1)
        logits = model(inputs)
        probabilities = softmax(logits)
        predictions = torch.argmax(probabilities, dim=1)
        print(f"Predicted Locations: {predictions}")
        print(f"Actual Locations: {labels}")
        print()
        break

Predicted Locations: tensor([19180, 17996, 17996, 17996, 22999, 17996, 17996, 22999, 17996, 17996,
        17996, 17996, 17996, 17996, 17996, 17996, 17996, 17996, 17996, 22999,
        17996, 17996, 22999, 22999, 17996, 17996, 17996,  2319, 17996, 18172,
        17996, 17996, 22999, 22999, 17996, 17996, 17996, 22999, 17996, 17996,
        22999, 17996, 17996, 19180, 17996, 17996, 22999, 17996, 17996, 22999])
Actual Locations: tensor([28782, 18563, 34257, 17964, 16302, 15959, 17766,  3871, 19252, 16370,
         9111, 24168, 23442, 12166, 11673, 28947, 12359, 21437, 12859, 10353,
        26152, 40738,  8359,  2521, 27359, 22703, 26538, 21885, 40756, 16606,
        17520, 19472, 16642,  3481, 27266, 35966, 19095, 14692, 26958, 13700,
        22659, 25109, 14152, 29546, 25187, 15066,  8463, 17996, 24747,  3124])

