In [1]:
import os
from os.path import dirname

root_dir = dirname(os.getcwd())
os.chdir(root_dir)

In [2]:
import pickle

with open('data/vehicle_data.pkl', 'rb') as f:
    vehicle_data = pickle.load(f)

In [3]:
import yaml
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score
from src.nn.dataloader import get_dataloader
from src.nn.model import NeuralNetwork, LogisticRegression

In [4]:
skf = StratifiedKFold(n_splits=10, shuffle=True)
num_epochs = 100

In [5]:
neural_train_acc = []
neural_train_prec = []
neural_train_rec = []
neural_test_acc = []
neural_test_prec = []
neural_test_rec = []

In [6]:
for i, (train_index, test_index) in enumerate(skf.split(vehicle_data['data'], vehicle_data['label'])):
    print('-'*80)
    print(f'Fold {i+1}')
    train_subset = {
        'data': vehicle_data['data'][train_index],
        'label': vehicle_data['label'][train_index],
    }
    test_subset = {
        'data': vehicle_data['data'][test_index],
        'label': vehicle_data['label'][test_index],
    }

    train_loader = get_dataloader(train_subset)
    test_loader = get_dataloader(test_subset)

    model = NeuralNetwork()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    best_loss = float('inf')
    best_metrics = {}

    for epoch in range(num_epochs):
        model.train()
        pred_label = []
        true_label = []
        running_loss = 0.0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            pred_label.extend(labels.cpu().numpy())
            true_label.extend(preds.cpu().numpy())

        epoch_loss = running_loss / len(train_loader)
        acc = accuracy_score(pred_label, true_label)
        prec = precision_score(pred_label, true_label,
                               average='macro', zero_division=0)
        rec = recall_score(pred_label, true_label,
                           average='macro', zero_division=0)

        if epoch_loss < best_loss:
            best_loss = epoch_loss
            neural_train_acc.append(acc)
            neural_train_prec.append(prec)
            neural_train_rec.append(rec)
            best_metrics = {
                'accuracy': acc,
                'precision': prec,
                'recall': rec
            }

        if (epoch + 1) % 20 == 0:
            print(
                f'Epoch {epoch+1}, Loss: {epoch_loss:.4f}, Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}')

    with open(f'results/fold{i+1}_neural_train.yaml', 'w') as f:
        yaml.safe_dump(best_metrics, f, sort_keys=False)

    model.eval()
    pred_label = []
    true_label = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            pred_label.extend(preds.cpu().numpy())
            true_label.extend(labels.cpu().numpy())

    acc = accuracy_score(pred_label, true_label)
    prec = precision_score(pred_label, true_label,
                           average='macro', zero_division=0)
    rec = recall_score(pred_label, true_label,
                       average='macro', zero_division=0)
    neural_test_acc.append(acc)
    neural_test_prec.append(prec)
    neural_test_rec.append(rec)
    eval_metrics = {
        'accuracy': acc,
        'precision': prec,
        'recall': rec
    }

    with open(f'results/fold{i+1}_neural_test.yaml', 'w') as f:
        yaml.safe_dump(eval_metrics, f, sort_keys=False)

    print('-'*80)
    print(f'Fold {i+1} Test Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}')
    print('-'*80)

--------------------------------------------------------------------------------
Fold 1
Epoch 20, Loss: 0.5824, Acc: 0.9950, Prec: 0.9951, Rec: 0.9951
Epoch 40, Loss: 0.5553, Acc: 1.0000, Prec: 1.0000, Rec: 1.0000
Epoch 60, Loss: 0.5529, Acc: 1.0000, Prec: 1.0000, Rec: 1.0000
Epoch 80, Loss: 0.5523, Acc: 1.0000, Prec: 1.0000, Rec: 1.0000
Epoch 100, Loss: 0.5519, Acc: 1.0000, Prec: 1.0000, Rec: 1.0000
--------------------------------------------------------------------------------
Fold 1 Test Acc: 0.8261, Prec: 0.8274, Rec: 0.8524
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Fold 2
Epoch 20, Loss: 0.5788, Acc: 0.9901, Prec: 0.9901, Rec: 0.9901
Epoch 40, Loss: 0.5553, Acc: 1.0000, Prec: 1.0000, Rec: 1.0000
Epoch 60, Loss: 0.5531, Acc: 1.0000, Prec: 1.0000, Rec: 1.0000
Epoch 80, Loss: 0.5523, Acc: 1.0000, Prec: 1.0000, Rec: 1.0000
Epoch 100, Loss: 0.5520, Acc: 1.0000, Prec:

In [7]:
avg_neural_train_result = {
    'accuracy': float(np.mean(neural_train_acc)),
    'precision': float(np.mean(neural_train_prec)),
    'recall': float(np.mean(neural_train_rec))
}

avg_neural_test_result = {
    'accuracy': float(np.mean(neural_test_acc)),
    'precision': float(np.mean(neural_test_prec)),
    'recall': float(np.mean(neural_test_rec))
}

with open('results/average_neural_train.yaml', 'w') as f:
    yaml.safe_dump(avg_neural_train_result, f, sort_keys=False)

with open('results/average_neural_test.yaml', 'w') as f:
    yaml.safe_dump(avg_neural_test_result, f, sort_keys=False)

In [8]:
logistic_train_acc = []
logistic_train_prec = []
logistic_train_rec = []
logistic_test_acc = []
logistic_test_prec = []
logistic_test_rec = []

In [9]:
for i, (train_index, test_index) in enumerate(skf.split(vehicle_data['data'], vehicle_data['label'])):
    print('-'*80)
    print(f'Fold {i+1}')
    train_subset = {
        'data': vehicle_data['data'][train_index],
        'label': vehicle_data['label'][train_index],
    }
    test_subset = {
        'data': vehicle_data['data'][test_index],
        'label': vehicle_data['label'][test_index],
    }

    train_loader = get_dataloader(train_subset)
    test_loader = get_dataloader(test_subset)

    model = LogisticRegression()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    best_loss = float('inf')
    best_metrics = {}

    for epoch in range(num_epochs):
        model.train()
        pred_label = []
        true_label = []
        running_loss = 0.0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            pred_label.extend(labels.cpu().numpy())
            true_label.extend(preds.cpu().numpy())

        epoch_loss = running_loss / len(train_loader)
        acc = accuracy_score(pred_label, true_label)
        prec = precision_score(pred_label, true_label,
                               average='macro', zero_division=0)
        rec = recall_score(pred_label, true_label,
                           average='macro', zero_division=0)
        logistic_train_acc.append(acc)
        logistic_train_prec.append(prec)
        logistic_train_rec.append(rec)

        if epoch_loss < best_loss:
            best_loss = epoch_loss
            best_metrics = {
                'accuracy': acc,
                'precision': prec,
                'recall': rec
            }

        if (epoch + 1) % 20 == 0:
            print(
                f'Epoch {epoch+1}, Loss: {epoch_loss:.4f}, Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}')

    with open(f'results/fold{i+1}_logistic_train.yaml', 'w') as f:
        yaml.safe_dump(best_metrics, f, sort_keys=False)

    model.eval()
    pred_label = []
    true_label = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            pred_label.extend(preds.cpu().numpy())
            true_label.extend(labels.cpu().numpy())

    acc = accuracy_score(pred_label, true_label)
    prec = precision_score(pred_label, true_label,
                           average='macro', zero_division=0)
    rec = recall_score(pred_label, true_label,
                       average='macro', zero_division=0)
    logistic_test_acc.append(acc)
    logistic_test_prec.append(prec)
    logistic_test_rec.append(rec)

    eval_metrics = {
        'accuracy': acc,
        'precision': prec,
        'recall': rec
    }

    with open(f'results/fold{i+1}_logistic_test.yaml', 'w') as f:
        yaml.safe_dump(eval_metrics, f, sort_keys=False)
    print('-'*80)
    print(f'Fold {i+1} Test Acc: {acc:.4f}, Prec: {prec:.4f}, Rec: {rec:.4f}')
    print('-'*80)

--------------------------------------------------------------------------------
Fold 1
Epoch 20, Loss: 0.8860, Acc: 0.7030, Prec: 0.7346, Rec: 0.7020
Epoch 40, Loss: 0.8245, Acc: 0.8069, Prec: 0.8132, Rec: 0.8065
Epoch 60, Loss: 0.7820, Acc: 0.8317, Prec: 0.8348, Rec: 0.8314
Epoch 80, Loss: 0.7554, Acc: 0.8564, Prec: 0.8571, Rec: 0.8562
Epoch 100, Loss: 0.7300, Acc: 0.8762, Prec: 0.8765, Rec: 0.8760
--------------------------------------------------------------------------------
Fold 1 Test Acc: 0.5652, Prec: 0.5714, Rec: 0.5602
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Fold 2
Epoch 20, Loss: 0.8852, Acc: 0.7277, Prec: 0.7358, Rec: 0.7272
Epoch 40, Loss: 0.8205, Acc: 0.8020, Prec: 0.8023, Rec: 0.8019
Epoch 60, Loss: 0.7859, Acc: 0.8119, Prec: 0.8127, Rec: 0.8118
Epoch 80, Loss: 0.7673, Acc: 0.8465, Prec: 0.8470, Rec: 0.8466
Epoch 100, Loss: 0.7466, Acc: 0.8515, Prec:

In [10]:
avg_logistic_train_result = {
    'accuracy': float(np.mean(logistic_train_acc)),
    'precision': float(np.mean(logistic_train_prec)),
    'recall': float(np.mean(logistic_train_rec))
}

avg_logistic_test_result = {
    'accuracy': float(np.mean(logistic_test_acc)),
    'precision': float(np.mean(logistic_test_prec)),
    'recall': float(np.mean(logistic_test_rec))
}

with open('results/average_logistic_train.yaml', 'w') as f:
    yaml.safe_dump(avg_logistic_train_result, f, sort_keys=False)

with open('results/average_logistic_test.yaml', 'w') as f:
    yaml.safe_dump(avg_logistic_test_result, f, sort_keys=False)