In [2]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch
from tqdm import trange
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
df = pd.read_csv('labeled_data/final_traj.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)
# split into training and test
train, test = train_test_split(df, test_size=0.2)

# train with KNN using TransportMode as target
X_train = train.drop('TransportMode', axis=1)
y_train = train['TransportMode']
X_test = test.drop('TransportMode', axis=1)
y_test = test['TransportMode']

In [55]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
            nn.LogSoftmax(dim=1)
        )
    
    def forward(self, x):
        return self.net(x)

traj_nn = MLP(4, 100, 11).to(DEVICE)

In [6]:
from sklearn.calibration import LabelEncoder

enc = LabelEncoder()
enc.fit(y_train.values.reshape(-1, 1))
enc_y_train = enc.transform(y_train.values.reshape(-1, 1))
enc.fit(y_test.values.reshape(-1, 1))
enc_y_test = enc.transform(y_test.values.reshape(-1, 1))

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [83]:
X_train_t = torch.tensor(X_train.values, dtype=torch.float32, requires_grad=True)[:X_train.shape[0] - X_train.shape[0] % 64, :]
y_train_t = torch.tensor(enc_y_train, dtype=torch.int64)[:enc_y_train.shape[0] - enc_y_train.shape[0] % 64]
X_test_t = torch.tensor(X_test.values, dtype=torch.float32)[:X_test.shape[0] - X_test.shape[0] % 64, :]
y_test_t = torch.tensor(enc_y_test, dtype=torch.int64)[:enc_y_test.shape[0] - enc_y_test.shape[0] % 64]
train_tensords = torch.utils.data.TensorDataset(X_train_t, y_train_t)
test_tensords = torch.utils.data.TensorDataset(X_test_t, y_test_t)

## Get data loaders

In [84]:
batch_size=64
train_loader = torch.utils.data.DataLoader(train_tensords, batch_size=batch_size, shuffle=True) 
test_loader = torch.utils.data.DataLoader(test_tensords, batch_size=batch_size, shuffle=False, num_workers=True)

In [9]:
sss = iter(train_loader)

In [76]:
def train_one_epoch(train_loader, model, device, optimizer, log_interval, epoch):
    model.train()
    losses = []
    counter = []
    
    for i, (img, label) in enumerate(train_loader):
        img, label = img.to(device), label.to(device)
        
        # ------------------
        # Write your implementation here.
        optimizer.zero_grad()
        ent_loss = torch.nn.CrossEntropyLoss()
        model_out = model(img)
        # target is 1 where ground truth is true
        loss = ent_loss(model_out, label)
        
        # optimize
        loss.backward()
        optimizer.step()
               
        # ------------------
    
        # Record training loss every log_interval and keep counter of total training images seen
        if (i+1) % log_interval == 0:
            losses.append(loss.item())
            counter.append(
                (i * batch_size) + img.size(0) + epoch * len(train_loader.dataset))

    return losses, counter

In [77]:
def test_one_epoch(test_loader, model, device):
    model.eval()
    test_loss = 0
    num_correct = 0
    
    with torch.no_grad():
        for i, (img, label) in enumerate(test_loader):
            img, label = img.to(device), label.to(device)

            # ------------------
            # Write your implementation here.
            
            output = model(img)
            pred = torch.argmax(output, dim=1) # Get index of largest log-probability and use that as prediction
            
            ent_loss = torch.nn.CrossEntropyLoss()
            # get the number of correct predictions and append to num_correct
            num_correct += (pred == label).sum().item()
            test_loss += ent_loss(output, label)

            # ------------------
            
    test_loss /= len(test_loader.dataset)
    return test_loss, num_correct

In [85]:
lr = 0.01
max_epochs=2
gamma = 0.95

# Recording data
log_interval = 100

# Instantiate optimizer (model was created in previous cell)
optimizer = torch.optim.SGD(traj_nn.parameters(), lr=lr)

train_losses = []
train_counter = []
test_losses = []
test_correct = []
for epoch in trange(max_epochs, leave=True, desc='Epochs'):
    train_loss, counter = train_one_epoch(train_loader, traj_nn, DEVICE, optimizer, log_interval, epoch)
    test_loss, num_correct = test_one_epoch(test_loader, traj_nn, DEVICE)

    # Record results
    train_losses.extend(train_loss)
    train_counter.extend(counter)
    test_losses.append(test_loss)
    test_correct.append(num_correct)

print(f"Test accuracy: {test_correct[-1]/len(test_loader.dataset)}")

Epochs: 100%|██████████| 2/2 [2:54:24<00:00, 5232.30s/it]  

Test accuracy: 0.6024590877437326





In [91]:
print(f"Test accuracy: {test_correct[-1]/len(test_loader.dataset)}")

Test accuracy: 0.6024590877437326
