# Linear Model

### Prepare venv

#### const parameters

In [5]:
dir = "../dataset/variance256_aug/"
out = "../results/confusion_matrices/linear/"
labels = ["canter", "trot", "walk"]

rng = 42

batch_size  = 8
epochs      = 10

#### imports

In [10]:
from IPython.display import clear_output
import numpy as np
import os

from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim

#### var parameters

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(out, exist_ok=True)

torch.manual_seed(rng)
torch.cuda.manual_seed_all(rng)
np.random.seed(rng)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

#### load variances

In [8]:
files = sorted(os.listdir(dir))

X_list, y_list = [], []

for label in labels:
    for file in sorted(os.listdir(dir + label)):
        var = np.load(dir + label + "/" + file).astype(np.float32)
        X_list.append(var)
        y_list.append(labels.index(label))

X = np.stack(X_list, axis=0)       # shape: (N, 256, 256)
y = np.array(y_list)               # shape: (N,)

#### train/test split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rng)

## Tensors

### Train

In [None]:
X_tensor = torch.tensor(X_train[:, None, :, :], dtype=torch.float32)
y_tensor = torch.tensor(y_train, dtype=torch.long)

dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

### Test

In [None]:
X_test_tensor = torch.tensor(X_test[:, None, :, :], dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Model

In [None]:
class LinearClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(256 * 256, 3)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.fc(x)

## Training

In [None]:
def train(dataLoader, model, criterion, optimizer):
    model.train()
    
    size = len(dataLoader.dataset)
    
    for batch, (X, y) in enumerate (dataLoader):
        X, y = X.to(device), y.to(device)
        
        # Prediction error
        pred = model(X)
        loss = criterion(pred, y)
        
        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if batch % 25 == 0:
            loss, current = loss.item(), (batch + 1) * batch_size
            print(f"loss: {loss:>7.7f}  \t[{current:>5d}/{size:>5d}]")

In [None]:
def eval(testLoader, model, criterion):
    model.eval()
    
    size = len(testLoader.dataset)
    num_batches = len(testLoader)
    
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for X, y in testLoader:
            X, y = X.to(device), y.to(device)
            
            pred = model(X)
            test_loss += criterion(pred, y)#.item()
            
            correct += (pred.argmax(dim=1) == y).sum().item()
            
    test_loss /= num_batches
    correct /= size
    
    print(f"Test Error: \n Accuracy: {(100*correct):>0.2f}%, Avg loss: {test_loss:>8.4f} \n")

## Loop
Training is repeated on 10 models to account for randomness

In [None]:
def calc_confusion_matrix(preds, labels):
    """ rows = true labels, cols = predicted labels """
    cm = torch.zeros((3, 3), dtype=torch.int64)
    for t, p in zip(labels, preds):
        cm[t, p] += 1
    return cm

In [None]:
def save_results(model, testLoader, run):
    model.eval()
    
    all_preds, all_labels = [], []

    with torch.no_grad():
        for X, y in testLoader:
            X, y = X.to(device), y.to(device)
            
            pred = model(X)
            preds = torch.argmax(pred, dim=1)
            
            all_preds.append(preds)
            all_labels.append(y)

    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    cm = calc_confusion_matrix(all_preds, all_labels)
    torch.save(cm, f"{out}{run}.pth")
    

In [None]:
for run in range(10):
    
    model = LinearClassifier().to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(
        model.parameters(),
        lr=1e-3,
        momentum=0.9)
    
    print("", end="\n\n\n")
    
    for epoch in range(epochs):
    
        train(loader, model, criterion, optimizer)
        clear_output(wait=True)
        eval(test_loader, model, criterion)
    
    save_results(model, test_loader, run)    
