# Training with a smaller resnet

In [1]:
import numpy as np
import os
import auxiliaries as aux
import models as mdl
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader, Subset
#import IProgress
from tqdm.notebook import tqdm
import gc


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [2]:
#qua manca la parte in cui carico i dati
splits_folder='train_test_split'
root_dir = os.path.join(os.getcwd(), '../CompCars/data/cropped_image')
file_paths_train = os.path.join(os.getcwd(), f'../CompCars/data/{splits_folder}/classification/train.txt')

In [9]:
num_epochs = 12
batch_size = 8
learning_rate = 1e-4
classification_type = 'make'
patience = 2

In [10]:
#Create the Residual block
class ResidualBlock(nn.Module):
        def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
            super(ResidualBlock, self).__init__()
            self.conv1 = nn.Sequential(
                            nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                            nn.BatchNorm2d(out_channels),
                            nn.ReLU())
            self.conv2 = nn.Sequential(
                            nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                            nn.BatchNorm2d(out_channels))
            self.downsample = downsample
            self.relu = nn.ReLU()
            self.out_channels = out_channels

        def forward(self, x):
            residual = x
            out = self.conv1(x)
            out = self.conv2(out)
            if self.downsample:
                residual = self.downsample(x)
            out += residual
            out = self.relu(out)
            return out

In [11]:
class ResNet(nn.Module):
        def __init__(self, block, layers, num_classes):
            super(ResNet, self).__init__()
            self.inplanes = 64
            self.conv1 = nn.Sequential(
                            nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3),
                            nn.BatchNorm2d(64),
                            nn.ReLU())
            self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
            self.layer0 = self._make_layer(block, 64, layers[0], stride = 1) #doppia convoluzione
            self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
            self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
            self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
            self.avgpool = nn.AvgPool2d(7, stride=1)
            self.fc = nn.Linear(512, num_classes)

#sarebbero 3 gli in e 64 gli out
        
        def _make_layer(self, block, planes, blocks, stride=1):
            downsample = None
            if stride != 1 or self.inplanes != planes:

                downsample = nn.Sequential(
                    nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                    nn.BatchNorm2d(planes),
                )
            layers = []
            layers.append(block(self.inplanes, planes, stride, downsample))
            self.inplanes = planes
            for i in range(1, blocks):
                layers.append(block(self.inplanes, planes))

            return nn.Sequential(*layers)

        def forward(self, x):
            x = self.conv1(x)
            x = self.maxpool(x)
            x = self.layer0(x)
            x = self.layer1(x)
            x = self.layer2(x)
            x = self.layer3(x)

            x = self.avgpool(x)
            x = x.view(x.size(0), -1)
            x = self.fc(x)

            return x

In [12]:
# Set number of classes
if classification_type == 'make':
    num_classes = 163
elif classification_type == 'model':
    num_classes = 1712
else:
    print('Wrong classification type') 
#scegliere il numero di blocchi -> inizio con la standard per fare due blocchi per livello
model = mdl.MiniResNet(mdl.ResidualBlock,[2,2,2,2], num_classes).to(device)
#scegliere la loss e l'optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params = model.parameters(), lr=learning_rate,weight_decay=0.001)

In [13]:
# Define transformations (resize was arbitrary, normalize was requested from pytorch)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create custom dataset
dataset = aux.CustomImageDataset(root_dir=root_dir, file_paths=file_paths_train, classification_type=classification_type, 
                                 transform=transform, train=True, validation_split=0.25)

# Create training and validation subsets using the indices calculated during dataset.__init__()
train_subset = Subset(dataset, dataset.train_indices)
val_subset = Subset(dataset, dataset.val_indices)

# Create dataloaders for training and validation
train_loader = DataLoader(train_subset, batch_size=batch_size)
valid_loader = DataLoader(val_subset, batch_size=batch_size)

best_val_acc = 0  # Initialize best validation accuracy
epochs_without_improvement = 0  # Counter for epochs without improvement (early stopping)

In [14]:

# Training loop
for epoch in tqdm(range(num_epochs),leave=False):
    model.train() # Set model to training mode
    running_loss = 0.0  # Set variables for evaluation
    correct_train = 0
    total_train = 0
    i = 0
    
    # Loads one batch at a time
    for images, labels in tqdm(train_loader, desc=f'Currently running epoch number {epoch+1}', leave=False):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        i += 1  # Counting batches
        running_loss += loss.item()  # Update running loss

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Calculate accuracy
        predicted = torch.argmax(outputs.data, 1)
        total_train += outputs.size(0)
        correct_train += (predicted == labels).sum().item()

        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

    # Validation loop, it doesn't influence training, it's just to keep track of the model overfitting or not
    model.eval()  # Set model to evaluation mode
    running_val_loss = 0.0  # Create variables for evaluation
    correct_val = 0
    total_val = 0

    with torch.no_grad():   # Don't modify model during validation
        for images, labels in tqdm(valid_loader, leave=False):
            # Send tensors to device
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)  # Calculate loss for validation
            running_val_loss += loss.item()  # Update running validation loss

            # Count correctly classified inputs
            predicted = torch.argmax(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

            del images, labels, outputs

    # Calculate validation accuracy
    val_accuracy = 100 * correct_val / total_val

    # Print training and validation stats
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {running_loss / i}, Training Accuracy: {100 * correct_train / total_train}%, '
          f'Validation Loss: {running_val_loss / len(valid_loader)}, Validation Accuracy: {val_accuracy}%')

    # Early stopping check
    """
    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        epochs_without_improvement = 0  # Reset counter if there's improvement
        # Save the best model
        #torch.save(model.state_dict(), os.path.join(os.getcwd(), model_save_name))
    else:
        epochs_without_improvement += 1

    # If patience is exceeded, stop training
    if epochs_without_improvement >= patience:
       print(f'Early stopping triggered after {epoch+1} epochs without improvement in validation accuracy.')
    break
    """
# Save the model (final model if no early stopping occurred)
#torch.save(model.state_dict(), os.path.join(os.getcwd(), model_save_name))

  0%|          | 0/12 [00:00<?, ?it/s]

Currently running epoch number 1:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [1/12], Training Loss: 3.835925553356125, Training Accuracy: 8.175158175158176%, Validation Loss: 3.6659100978912234, Validation Accuracy: 10.564435564435565%


Currently running epoch number 2:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [2/12], Training Loss: 3.5204687356631386, Training Accuracy: 13.186813186813186%, Validation Loss: 3.4818774835316244, Validation Accuracy: 14.51048951048951%


Currently running epoch number 3:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [3/12], Training Loss: 3.18699647623118, Training Accuracy: 20.221445221445222%, Validation Loss: 3.2700206181722247, Validation Accuracy: 19.005994005994005%


Currently running epoch number 4:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [4/12], Training Loss: 2.7523953908292973, Training Accuracy: 31.8015318015318%, Validation Loss: 3.1947716998007007, Validation Accuracy: 21.353646353646354%


Currently running epoch number 5:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [5/12], Training Loss: 2.219420105337938, Training Accuracy: 46.19547119547119%, Validation Loss: 3.210278171027254, Validation Accuracy: 22.552447552447553%


Currently running epoch number 6:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [6/12], Training Loss: 1.6488594985039986, Training Accuracy: 62.08791208791209%, Validation Loss: 3.182673730774078, Validation Accuracy: 24.275724275724276%


Currently running epoch number 7:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [7/12], Training Loss: 1.120097772804859, Training Accuracy: 77.11455211455211%, Validation Loss: 3.326700797100029, Validation Accuracy: 23.301698301698302%


Currently running epoch number 8:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [8/12], Training Loss: 0.7573220577840005, Training Accuracy: 86.04728604728605%, Validation Loss: 3.580636631704852, Validation Accuracy: 22.42757242757243%


Currently running epoch number 9:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [9/12], Training Loss: 0.5301625188564095, Training Accuracy: 91.38361638361638%, Validation Loss: 3.5387926751268126, Validation Accuracy: 25.34965034965035%


Currently running epoch number 10:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [10/12], Training Loss: 0.38814292878864926, Training Accuracy: 93.53979353979354%, Validation Loss: 3.663990934451897, Validation Accuracy: 25.274725274725274%


Currently running epoch number 11:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [11/12], Training Loss: 0.3391117077747412, Training Accuracy: 93.989343989344%, Validation Loss: 3.5883193945218466, Validation Accuracy: 26.023976023976022%


Currently running epoch number 12:   0%|          | 0/1502 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

Epoch [12/12], Training Loss: 0.2654783194401015, Training Accuracy: 95.42957042957043%, Validation Loss: 3.640531888978924, Validation Accuracy: 28.07192807192807%
