# Machine Learning baseline

In [1]:
from sklearn.model_selection import train_test_split
from fastcore.basics import Path, AttrDict
from dataset import SPLID
import torch

import os

In [2]:
config = AttrDict(
    challenge_data_dir = Path('~/Projects/splid-comp/dataset').expanduser(),
    valid_ratio = 0.1,
    kernel_size = 5,
    tolerance= 6, # Default evaluation tolerance
)

# Define the directory paths
train_data_dir = config.challenge_data_dir / "train_v2"

# Load the ground truth data
ground_truth = config.challenge_data_dir / 'train_labels_v2.csv'

datalist = []

# Searching for training data within the dataset folder
for file in os.listdir(train_data_dir):
    if file.endswith(".csv"):
        datalist.append(os.path.join(train_data_dir, file))

# Sort the training data and labels
datalist = sorted(datalist, key=lambda i: int(os.path.splitext(os.path.basename(i))[0]))


train_datalist, test_datalist = train_test_split(datalist, test_size=0.25)


In [3]:
from datetime import datetime
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.nn import functional as F
from torch.utils.tensorboard import SummaryWriter
from models import UTime

cols = ['Inclination (deg)', 'Longitude (deg)']
trn_data = SPLID(train_datalist, ground_truth, cols)
tst_data = SPLID(test_datalist, ground_truth, cols, classes=trn_data.le_type.classes_)

trn_loader = data.DataLoader(trn_data, shuffle=True, batch_size=10)
tst_loader = data.DataLoader(tst_data, shuffle=True, batch_size=10)

lr = 5e-6
n_epochs = 1000
best_tst_loss = 1_000_000.

model = UTime(len(trn_data.le_type.classes_))
model = model.cuda()
criterion = nn.NLLLoss()
opt = torch.optim.Adam(model.parameters(), lr=lr)

print('Start model training')

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/splid_trainer_{}'.format(timestamp))

for epoch in range(1, n_epochs + 1):
    
    print('EPOCH {}:'.format(epoch))
    
    running_loss = 0.
    last_loss = 0.
    model.train(True)
    
    for i, (x_batch, y_batch) in enumerate(trn_loader):
        
        x_batch = x_batch.cuda()
        y_batch = y_batch.cuda()
        # sched.step()
        opt.zero_grad()
        out = model(x_batch)
        loss = criterion(out, y_batch)
        loss.backward()
        opt.step()
        
        running_loss += loss.item()
        if i % 50 == 49:
            last_loss = running_loss / 50 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            tb_x = epoch * len(trn_loader) + i + 1
            writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0.
    
    running_tst_loss = 0.0
    model.eval()
    
    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, (x_batch, y_batch) in enumerate(tst_loader):
            x_batch = x_batch.cuda()
            y_batch = y_batch.cuda()
            outputs = model(x_batch)
            tst_loss = criterion(outputs, y_batch)
            running_tst_loss += tst_loss
    
    avg_tst_loss = running_tst_loss / (i + 1)
    print('LOSS train {} valid {}'.format(last_loss, avg_tst_loss))

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : last_loss, 'Validation' : avg_tst_loss },
                    epoch)
    writer.flush()
    
    # Track best performance, and save the model's state
    if avg_tst_loss < best_tst_loss:
        best_tst_loss = avg_tst_loss
        model_path = 'model_{}.pth'.format(timestamp)
        torch.save(model.state_dict(), 'saved_models/' + model_path)



Loading 1425 files...
Loaded file 0 of 1425
Loaded file 100 of 1425
Loaded file 150 of 1425
Loaded file 200 of 1425
Loaded file 250 of 1425
Loaded file 300 of 1425
Loaded file 400 of 1425
Loaded file 450 of 1425
Loaded file 500 of 1425
Loaded file 550 of 1425
Loaded file 600 of 1425
Loaded file 650 of 1425
Loaded file 700 of 1425
Loaded file 750 of 1425
Loaded file 800 of 1425
Loaded file 850 of 1425
Loaded file 900 of 1425
Loaded file 950 of 1425
Loaded file 1000 of 1425
Loaded file 1100 of 1425
Loaded file 1150 of 1425
Loaded file 1200 of 1425
Loaded file 1250 of 1425
Loaded file 1300 of 1425
Loaded file 1350 of 1425
Loaded file 1400 of 1425
Joining dataframes...
Done!
Loading 475 files...
Loaded file 0 of 475
Loaded file 100 of 475
Loaded file 150 of 475
Loaded file 200 of 475
Loaded file 250 of 475
Loaded file 300 of 475
Loaded file 350 of 475
Loaded file 400 of 475
Loaded file 450 of 475
Joining dataframes...
Done!
Start model training
EPOCH 1:
  batch 50 loss: 2.33897198677063
  

RuntimeError: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


The `NodeDetectionEvaluator` class in the evaluation module allows not only to
compute the general score for a given dataset, but get evaluations per object, and
even plots that show how the predictions look like in a timeline