# Training and Evaluation for CINIC10 using SimpleCNN and OptimalCNN

This notebook loads the converted dataset, trains the SimpleCNN model, and performs evaluation and inference on the validation set.

In [2]:
import os
import torch
import torch.optim as optim
import torch.nn as nn

from DataObjects import DataLoader
from Architectures.SimpleCNN import SimpleCNN
from Architectures.OptimalCNN import OptimalCNN
from Architectures.StochasticDepthCNN import StochasticDepthCNN

from typing import Optional, Tuple

from utils import save_model, load_model

In [3]:
def train_model(model: nn.Module, train_loader: DataLoader, val_loader: DataLoader,
                num_epochs: int = 10, lr: float = 0.001,
                device: torch.device = None) -> None:
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    criterion: nn.Module = nn.CrossEntropyLoss()
    optimizer: torch.optim.Optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(num_epochs):
        model.train()
        train_loss: float = 0.0
        train_correct: int = 0
        total_train: int = 0
        
        for batch in train_loader:
            inputs = batch.data.to(device)
            labels = batch.labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            train_correct += torch.sum(preds == labels).item()
            total_train += labels.size(0)
        
        avg_train_loss = train_loss / total_train
        train_acc = train_correct / total_train
        
        model.eval()
        val_loss: float = 0.0
        val_correct: int = 0
        total_val: int = 0
        
        with torch.no_grad():
            for batch in val_loader:
                inputs = batch.data.to(device)
                labels = batch.labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += torch.sum(preds == labels).item()
                total_val += labels.size(0)
        
        avg_val_loss = val_loss / total_val
        val_acc = val_correct / total_val
        
        print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {avg_train_loss:.4f}, Train acc: {train_acc:.4f} | Val loss: {avg_val_loss:.4f}, Val acc: {val_acc:.4f}")

def infer(model: nn.Module, data_loader: DataLoader,
          device: torch.device = None) -> list:
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    predictions: list = []
    
    with torch.no_grad():
        for batch in data_loader:
            inputs = batch.data.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds.cpu().tolist())
    
    return predictions


def evaluate(model: nn.Module,
             test_loader: Optional[DataLoader] = None,
             device: Optional[torch.device] = None) -> Tuple[float, float]:

    if test_loader is None:
        test_dir = os.path.join("Data", "Data_converted", "test")
        test_loader = DataLoader(test_dir, batch_size=64, shuffle=True)
        
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    model.eval()
    
    test_loss = 0.0
    test_correct = 0
    total_test = 0
    
    with torch.no_grad():
        for batch in test_loader:
            inputs = batch.data.to(device)
            labels = batch.labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            test_correct += torch.sum(preds == labels).item()
            total_test += labels.size(0)
    
    avg_test_loss = test_loss / total_test
    test_acc = test_correct / total_test
    
    print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_acc:.4f}")
    return avg_test_loss, test_acc


In [4]:
# Setup directories and DataLoaders
train_dir = os.path.join("Data", "Data_converted", "train")
val_dir = os.path.join("Data", "Data_converted", "valid")

train_loader = DataLoader(train_dir, batch_size=64, shuffle=True, max_per_class=150)
val_loader = DataLoader(val_dir, batch_size=64, shuffle=False, max_per_class=150) 

# Instantiate the model
model = SimpleCNN(num_classes=10)

In [5]:
# Train the model
train_model(model, train_loader, val_loader, num_epochs=10, lr=0.001)

Epoch 1/10 - Train loss: 10.5128, Train acc: 0.1200 | Val loss: 2.2250, Val acc: 0.1593
Epoch 2/10 - Train loss: 2.0804, Train acc: 0.2513 | Val loss: 2.0442, Val acc: 0.2760
Epoch 3/10 - Train loss: 1.7536, Train acc: 0.3887 | Val loss: 2.0514, Val acc: 0.2940
Epoch 4/10 - Train loss: 1.4589, Train acc: 0.5000 | Val loss: 1.9689, Val acc: 0.3480
Epoch 5/10 - Train loss: 1.1925, Train acc: 0.5860 | Val loss: 2.1933, Val acc: 0.3540
Epoch 6/10 - Train loss: 0.9969, Train acc: 0.6773 | Val loss: 2.3573, Val acc: 0.3353
Epoch 7/10 - Train loss: 0.7333, Train acc: 0.7527 | Val loss: 2.3798, Val acc: 0.3520
Epoch 8/10 - Train loss: 0.5155, Train acc: 0.8440 | Val loss: 2.6336, Val acc: 0.3580
Epoch 9/10 - Train loss: 0.3605, Train acc: 0.8947 | Val loss: 2.7477, Val acc: 0.3447
Epoch 10/10 - Train loss: 0.2417, Train acc: 0.9453 | Val loss: 3.0854, Val acc: 0.3607


In [6]:
test_dir = os.path.join("Data", "Data_converted", "test")
test_loader = DataLoader(test_dir, batch_size=64, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()

model.eval()
test_loss = 0.0
test_correct = 0
total_test = 0

with torch.no_grad():
    for batch in test_loader:
        inputs = batch.data.to(device)
        labels = batch.labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        test_correct += torch.sum(preds == labels).item()
        total_test += labels.size(0)

avg_test_loss = test_loss / total_test
test_acc = test_correct / total_test

print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_acc:.4f}")


Test Loss: 3.9039, Test Accuracy: 0.2948


In [7]:
# Full training
train_loader_full = DataLoader(train_dir, batch_size=64, shuffle=True)
val_loader_full = DataLoader(val_dir, batch_size=64, shuffle=False) 

# Instantiate the model
model_2 = SimpleCNN(num_classes=10)

In [8]:
# Train the model
train_model(model_2, train_loader, val_loader, num_epochs=10, lr=0.001)

Epoch 1/10 - Train loss: 7.2327, Train acc: 0.1047 | Val loss: 2.3028, Val acc: 0.1047
Epoch 2/10 - Train loss: 2.2779, Train acc: 0.1460 | Val loss: 2.1953, Val acc: 0.1887
Epoch 3/10 - Train loss: 2.0466, Train acc: 0.2713 | Val loss: 2.0523, Val acc: 0.2693
Epoch 4/10 - Train loss: 1.8280, Train acc: 0.3633 | Val loss: 2.0300, Val acc: 0.2853
Epoch 5/10 - Train loss: 1.5213, Train acc: 0.4660 | Val loss: 2.0767, Val acc: 0.3020
Epoch 6/10 - Train loss: 1.2565, Train acc: 0.5640 | Val loss: 2.1524, Val acc: 0.3327
Epoch 7/10 - Train loss: 1.0246, Train acc: 0.6607 | Val loss: 2.2738, Val acc: 0.3027
Epoch 8/10 - Train loss: 0.7595, Train acc: 0.7633 | Val loss: 2.4209, Val acc: 0.3320
Epoch 9/10 - Train loss: 0.5652, Train acc: 0.8220 | Val loss: 2.6935, Val acc: 0.3247
Epoch 10/10 - Train loss: 0.4049, Train acc: 0.8827 | Val loss: 2.9802, Val acc: 0.3273


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_2.to(device)

criterion = nn.CrossEntropyLoss()

model_2.eval()
test_loss = 0.0
test_correct = 0
total_test = 0

with torch.no_grad():
    for batch in test_loader:
        inputs = batch.data.to(device)
        labels = batch.labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        test_correct += torch.sum(preds == labels).item()
        total_test += labels.size(0)

avg_test_loss = test_loss / total_test
test_acc = test_correct / total_test

print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_acc:.4f}")


Test Loss: 3.9039, Test Accuracy: 0.2948


In [10]:
## Different architecture
model_Optimal = OptimalCNN(num_classes=10)
model_Stochastic = StochasticDepthCNN(num_classes=10)

In [11]:
# Train the model
# On full set, it trains for really long time, reserve around 6 hours or reduce the size of the training set
train_model(model_Optimal, train_loader_full, val_loader_full, num_epochs=10, lr=0.001)

Epoch 1/10 - Train loss: 1.3791, Train acc: 0.5009 | Val loss: 1.3286, Val acc: 0.5317
Epoch 2/10 - Train loss: 1.0787, Train acc: 0.6152 | Val loss: 1.0358, Val acc: 0.6293
Epoch 3/10 - Train loss: 0.9336, Train acc: 0.6678 | Val loss: 0.9514, Val acc: 0.6655
Epoch 4/10 - Train loss: 0.8217, Train acc: 0.7082 | Val loss: 0.9101, Val acc: 0.6766
Epoch 5/10 - Train loss: 0.7121, Train acc: 0.7475 | Val loss: 0.9434, Val acc: 0.6749
Epoch 6/10 - Train loss: 0.6028, Train acc: 0.7840 | Val loss: 0.9007, Val acc: 0.6975
Epoch 7/10 - Train loss: 0.4919, Train acc: 0.8260 | Val loss: 0.9196, Val acc: 0.6996
Epoch 8/10 - Train loss: 0.3865, Train acc: 0.8625 | Val loss: 0.9845, Val acc: 0.6997
Epoch 9/10 - Train loss: 0.3015, Train acc: 0.8927 | Val loss: 1.0719, Val acc: 0.7050
Epoch 10/10 - Train loss: 0.2324, Train acc: 0.9170 | Val loss: 1.1728, Val acc: 0.7018


In [12]:
train_model(model_Stochastic, train_loader_full, val_loader_full, num_epochs=10, lr=0.001)

Epoch 1/10 - Train loss: 1.4494, Train acc: 0.4754 | Val loss: 1.2139, Val acc: 0.5651
Epoch 2/10 - Train loss: 1.1357, Train acc: 0.5941 | Val loss: 1.0752, Val acc: 0.6161
Epoch 3/10 - Train loss: 1.0104, Train acc: 0.6397 | Val loss: 0.9719, Val acc: 0.6545
Epoch 4/10 - Train loss: 0.9092, Train acc: 0.6756 | Val loss: 0.9171, Val acc: 0.6757
Epoch 5/10 - Train loss: 0.8152, Train acc: 0.7086 | Val loss: 0.9799, Val acc: 0.6655
Epoch 6/10 - Train loss: 0.7283, Train acc: 0.7401 | Val loss: 0.8845, Val acc: 0.6925
Epoch 7/10 - Train loss: 0.6407, Train acc: 0.7723 | Val loss: 0.8974, Val acc: 0.6940
Epoch 8/10 - Train loss: 0.5581, Train acc: 0.8018 | Val loss: 0.8957, Val acc: 0.7019
Epoch 9/10 - Train loss: 0.4701, Train acc: 0.8325 | Val loss: 0.9906, Val acc: 0.6877
Epoch 10/10 - Train loss: 0.4028, Train acc: 0.8567 | Val loss: 1.0373, Val acc: 0.6853


In [13]:
# Evaluate
evaluate(model_Optimal, test_loader)

Test Loss: 1.2032, Test Accuracy: 0.6948


(1.2032027043369082, 0.6947666666666666)

In [14]:
evaluate(model_Stochastic, test_loader)

Test Loss: 1.0504, Test Accuracy: 0.6836


(1.050440318025483, 0.6835777777777777)

In [15]:
# Saving model into the pytorch format
save_model(model_Optimal, "Models_Pytorch_saved/OptimalCNN_trained_saved.pth")

Model saved successfully at Models_Pytorch_saved/OptimalCNN_trained_saved.pth


In [16]:
# Now load it
model_Optimal_loaded = load_model("Models_Pytorch_saved/OptimalCNN_trained_saved.pth")

Model loaded successfully from Models_Pytorch_saved/OptimalCNN_trained_saved.pth


In [17]:
# Check if evaluated the same with the loaded model
evaluate(model_Optimal_loaded, test_loader)
# yes!

Test Loss: 1.2032, Test Accuracy: 0.6948


(1.2032027043369082, 0.6947666666666666)