# Ablation Analysis: model trained without context

Dataset discretize with 100 Gaussian mixture clusters (mog_100.npy), 30 minute interval. Unfiltered.

## Requirements

- Pre-processed dataset with 100 clusters located at `{ROOT}/data/sh30-c100`
- Pre-processed dataset with 50 clusters located at `{ROOT}/data/sh30-c50`
- Pre-computed 50 clusters located at `{ROOT}/data/exploratory_analysis/mog_50.npy`
- Pre-computed 100 clusters located at `{ROOT}/data/exploratory_analysis/mog_100.npy`

## import and constants

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import tqdm
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

from src.path import ROOT
from src.ml.checkpoint import Checkpoint

# trajectory length
SEQ_LENGTH: int = 48

# cuda flag
USE_CUDA: bool = True

if USE_CUDA and not torch.cuda.is_available():
    USE_CUDA = False
    print('fallback to cpu as CUDA is not available on this device')

CHECKPOINT_PREFIX: str = 'sh30-c100-noctx'
CACHE_PREFIX: str = 'sh30-c100'

checkpoint = Checkpoint(
    checkpoint_interval=5,
    prefix=CHECKPOINT_PREFIX
)

## define dataset

### define path

Change the path variable here if you place your dataset files in a different location.

In [2]:
cluster_path = f'{ROOT}/exploratory_analysis/mog_100.npy'
dataset_path = str(ROOT.joinpath('data/sh30-c100'))

### split dataset

Split to pre-defined training set and test set.

In [3]:
import os
from datetime import date

from src.data_preprocess.trajectory import from_dataframe
from src.ml.dataset import get_shanghai_date

file_list = os.listdir(dataset_path)

def is_test(fname: str):
    '''
    returns True if file belongs to test set
    '''
    fdate = get_shanghai_date(fname)
    ref_date = date(2014, 6, 18)
    return fdate >= ref_date and (fdate - ref_date).days < 15


test_files = [fname for fname in file_list if is_test(fname)]
train_files = [fname for fname in file_list if not is_test(fname)]

### read basestations

In [4]:
from src.ml.dataset import create_point_to_class_map

all_candidates = torch.tensor(np.load(cluster_path), dtype=torch.float32)

point_to_class_map = create_point_to_class_map(all_candidates)

### load dataset

Load dataset files into in-memory tensors.

In [5]:
from torch.utils.data import random_split

from src.ml.dataset import TrajectoryDataset, get_shanghai_date, CACHE_PATH

def read_file(fname: str):
    df = pd.read_csv(f'{dataset_path}/{fname}')
    return get_shanghai_date(fname), [*from_dataframe(df, SEQ_LENGTH).values()]
    

train_set = TrajectoryDataset(sequence_length=SEQ_LENGTH, point_to_class_map=point_to_class_map)

if os.path.exists(f'{CACHE_PATH}/{CACHE_PREFIX}_train_data.pt'):
    train_set.load(f'{CACHE_PATH}/{CACHE_PREFIX}_train_data.pt')
else:
    train_set.read_files(
        train_files,
        read_file=read_file
    )

    train_set.save(f'{CACHE_PATH}/{CACHE_PREFIX}_train_data.pt')

# fix seed for reproducibility
train_set, valid_set = random_split(train_set, [0.8, 0.2], torch.Generator().manual_seed(123))

test_set = TrajectoryDataset(sequence_length=SEQ_LENGTH, point_to_class_map=point_to_class_map)

if os.path.exists(f'{CACHE_PATH}/{CACHE_PREFIX}_test_data.pt'):
    test_set.load(f'{CACHE_PATH}/{CACHE_PREFIX}_test_data.pt')
else:
    test_set.read_files(
        test_files,
        read_file=read_file
    )

    test_set.save(f'{CACHE_PATH}/{CACHE_PREFIX}_test_data.pt')

### Define pre-process pipeline

1. convert to Cartesian coordinates by tangent plane project. Choose center of plane (reference point) to be median of lat-long.
2. normalize to [-1, +1] for better gradients

In [6]:
from src.ml.utils import create_shanghai_preprocessor, to_cartesian

ref_lat = all_candidates[:, 0].median()
ref_long = all_candidates[:, 1].median()

all_candidates_cart = to_cartesian(all_candidates, ref_point=(ref_lat, ref_long))
min_x, max_x = all_candidates_cart[:, 0].min().item(), all_candidates_cart[:, 0].max().item()
min_y, max_y = all_candidates_cart[:, 1].min().item(), all_candidates_cart[:, 1].max().item()
del all_candidates_cart

preprocess = create_shanghai_preprocessor(
    x_range=(min_x, max_x),
    y_range=(min_y, max_y),
    ref_point=(ref_lat, ref_long)
)

## define model

In [7]:
from src.ml.model import TrajectoryModel
from src.ml.model.modules import TransformerTrajectoryEncoder, ContextFreeBaseStationEmbedding

model_dim = 128

base_station_embedding = ContextFreeBaseStationEmbedding(
    feat_dim=(2, 64),
    out_dim=model_dim,
    layer_norm=True
)

trajectory_encoder = TransformerTrajectoryEncoder(
    in_dim=model_dim,
    max_len=SEQ_LENGTH,
    hid_dim=(model_dim, model_dim * 2, 8),
    do_prob=0.2,
    n_blocks=4,
)

model = TrajectoryModel(
    base_station_embedding=base_station_embedding,
    trajectory_encoder=trajectory_encoder,
)

#optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.5)

## train model

### define train config

In [8]:
from src.ml.config import TrainConfig

config = TrainConfig(
    optimizer=optimizer,
    lr_scheduler=lr_scheduler,
    datasets={ 'train': train_set, 'valid': valid_set },
    n_epoch=5,
    all_candidates=all_candidates,
    verbose=True,
    cuda=USE_CUDA,
    checkpoint=checkpoint,
    preprocess=preprocess,
    batch_size=64
)

In [9]:
from src.ml.train import train

if USE_CUDA:
    model.cuda()

state = train(model, config)

  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
[train] 1: 100%|██████████| 6382/6382 [07:45<00:00, 13.71it/s]


loss: 0.31887871736210555
elapsed: 465.47038221359253
perplexity: 1.375584480274151
accuracy: 0.932148351898418


[valid] 1: 100%|██████████| 1596/1596 [01:13<00:00, 21.75it/s]


mdev: 1152.1501311873433
elapsed: 73.38372230529785
perplexity: 1.2645568850507463
accuracy: 0.9459654942416308


[test] 1: 0it [00:00, ?it/s]


elapsed: 0.0029337406158447266
perplexity: 1.0
accuracy: 0


[train] 2: 100%|██████████| 6382/6382 [07:34<00:00, 14.04it/s]


loss: 0.2275427501486391
elapsed: 454.5651731491089
perplexity: 1.2555111118935258
accuracy: 0.9460946181862006


[valid] 2: 100%|██████████| 1596/1596 [01:09<00:00, 22.91it/s]


mdev: 725.5428294131631
elapsed: 69.65797448158264
perplexity: 1.2533561433913258
accuracy: 0.9464259074071893


[test] 2: 0it [00:00, ?it/s]


elapsed: 0.0010063648223876953
perplexity: 1.0
accuracy: 0


[train] 3: 100%|██████████| 6382/6382 [07:28<00:00, 14.21it/s]


loss: 0.2206352829631968
elapsed: 449.00147891044617
perplexity: 1.2468685934070063
accuracy: 0.9464333057403564


[valid] 3: 100%|██████████| 1596/1596 [01:08<00:00, 23.38it/s]


mdev: 725.5561642563134
elapsed: 68.27413845062256
perplexity: 1.2466299074982474
accuracy: 0.9464259074445356


[test] 3: 0it [00:00, ?it/s]


perplexity: 1.0
accuracy: 0


[train] 4: 100%|██████████| 6382/6382 [07:32<00:00, 14.11it/s]


loss: 0.21761708424914897
elapsed: 452.20764780044556
perplexity: 1.243110969701829
accuracy: 0.9465519094384975


[valid] 4: 100%|██████████| 1596/1596 [01:15<00:00, 21.25it/s]


mdev: 725.5491156135885
elapsed: 75.1008927822113
perplexity: 1.2435345462455596
accuracy: 0.9464261113925088


[test] 4: 0it [00:00, ?it/s]


elapsed: 0.0024976730346679688
perplexity: 1.0
accuracy: 0


[train] 5: 100%|██████████| 6382/6382 [07:49<00:00, 13.61it/s]


loss: 0.2160526108193999
elapsed: 469.0064616203308
perplexity: 1.2411676761314427
accuracy: 0.9465818065380609


[valid] 5: 100%|██████████| 1596/1596 [01:13<00:00, 21.65it/s]


mdev: 725.5491156135885
elapsed: 73.71238684654236
perplexity: 1.2427644400003304
accuracy: 0.9464261113925088


[test] 5: 0it [00:00, ?it/s]

elapsed: 0.001535654067993164
perplexity: 1.0
accuracy: 0





## Experiment results:
| Model | Perplexity | Mean Error (m) | Accuracy |
|-----------|------------|------------|----------|
| w/ context | 1.24 | 725 | 0.9464261113925088 |
| w/o context | 1.24 | 721 | 0.9464692816387501 |