In [None]:
import torchvision.models as models
import torch.nn as nn
from torchinfo import summary
import torch.optim
from tqdm import tqdm
import os
from torch.utils.data import Dataset, DataLoader
import json
from glob import glob
from PIL import Image
import torchvision.transforms.functional as F
from torchvision import transforms
import numpy as np
import numbers
import time
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import random
import csv
import seaborn as sn
import yaml
import pandas as pd
import datetime

# Parameters

In [None]:
data_dir = 'dataset' # name of dataset (with subfolder 'train', 'val', 'test')
results_dir = 'test'
overwrite = False

padded_w, padded_h = 224, 224 # size image in input of model
batch_size = 64
num_epochs = 100
freeze = False # freeze all layers other that modified ones (end fully connected layers)

# select optimizer and params
optim = 'SGD' # SGD, ADAM
lr = 0.01

# Select loss function and params
loss_f = 'MSE' # MAE, MSE or Huber
delta = 2 # only for Huber loss


In [None]:

splits = []
for split in ['test', 'val', 'train']:
    if split in os.listdir(data_dir):
        splits .append(split)

if loss_f is not 'Huber':
    delta = None

# Inspection of Data distribution

In [None]:
def parse_json(path):
    fname = glob('*.json', dir_fd=path)[0]
    with open(os.path.join(path, fname)) as json_f:
        json_data = json.load(json_f)
        labels = list(json_data.values())
    return labels
colors = ['red', 'lime']
labels = ['train', 'val']
data_train = parse_json(os.path.join('dataset', 'train'))
data_val = parse_json(os.path.join('dataset', 'val'))
bins = np.linspace(min(data_train), max(data_train), max(data_train))
plt.style.use('ggplot')
plt.hist((data_train, data_val), bins, color = colors, label = labels)
plt.legend(prop={'size': 10})
plt.title('label repartition')
plt.xlabel('number of boxes on image')
plt.ylabel('instances')
plt.show()
import statistics
print('moyenne train: ', statistics.mean(data_train))

## Original model

In [None]:
model = models.resnet18(pretrained=True)
summary(model, input_size=(16, 3, 224, 224))

## Modified model for regression

In [None]:
model.fc.in_features # last layer input size

In [None]:
# modify last fully connected layer (was for classification) to have a regression head
model.fc = nn.Sequential( 
    nn.Linear(512, 1024),
    nn.BatchNorm1d(1024),
    nn.ReLU(),
    nn.Linear(1024, 1024),
    nn.BatchNorm1d(1024),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Linear(512, 1)
)
with torch.no_grad():
    print(model.fc[-1].bias.shape)
    model.fc[-1].bias = nn.Parameter(torch.full(model.fc[-1].bias.shape,3.0))
    print(model.fc[-1].bias)
# gel des couches
if freeze:
    for param in model.parameters():
        param.requires_grad = False

    for param in model.fc.parameters():
        param.requires_grad = True
        
summary(model, input_size=(16, 3, 224, 224))


# Dataloaders

In [None]:
data_transforms = transforms.Compose([
        transforms.Resize((padded_h, padded_w)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


In [None]:
class MyDataset(Dataset):
    def __init__(self, data_dir, split='train', transform=None):
        self.data_dir = os.path.join(data_dir, split)
        self.transform = transform
        self.image_files = [file for file in os.listdir(self.data_dir) if file.endswith('.jpg') or file.endswith('.jpeg')]
        self.labels = self.parse_json()
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.data_dir, self.image_files[idx])
        image = Image.open(image_path).convert("RGB")
        label = self.labels[self.image_files[idx]]
        label = torch.tensor(label, dtype=torch.float32)
        # Apply transformations
        if self.transform:
            image = self.transform(image)
        return image, label
    
    def collate_fn(self, batch):
        images_zip, labels_zip = zip(*batch)
        images_batch = torch.stack(images_zip, dim = 0)
        labels_batch = torch.stack(labels_zip, dim = 0)
        return (images_batch, labels_batch)
    
    def parse_json(self):
        fname = glob('*.json', dir_fd=self.data_dir)[0]
        with open(os.path.join(self.data_dir, fname)) as json_f:
            labels = json.load(json_f)
        return labels

# dict with the dataloaders
dataloaders = {}
for split in splits:
    dataset = MyDataset(data_dir = data_dir, split = split, transform = data_transforms)
    dataloaders[split] = DataLoader(dataset=dataset, shuffle=True, batch_size = batch_size, collate_fn = dataset.collate_fn)


# Dossier

In [None]:
if not overwrite:
    k = 0
    while os.path.isdir(results_dir+str(k)):
         k+=1
    results_dir = results_dir+str(k)
    try:
        os.mkdir(results_dir)
    except OSError as error:  
            print(error)
else:
    try:
        os.mkdir(results_dir)
    except OSError as error:  
            print(error)

# Training and evaluation functions

In [None]:
def train_step(inputs, targets, model, optimizer, criterion):
    device =torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    mae_loss = nn.L1Loss()
    model.train()
    optimizer.zero_grad()
    inputs = inputs.to(device)
    targets = targets.to(device)
    predictions = model(inputs)
    loss = criterion(predictions.squeeze(), targets)
    mae = mae_loss(predictions.squeeze(), targets)
    loss.backward()
    optimizer.step()
    acc = 0
    for i in range(len(predictions)):
        pred = round(predictions[i].item())
        target = int(targets[i].item())
        if pred == target:
            acc +=1
    acc /= len(predictions)
    del inputs, targets
    torch.cuda.empty_cache()
    return acc, loss.item(), mae.item()


In [None]:
def val_step(inputs, targets, model, criterion):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    mae_loss = nn.L1Loss()
    model.eval()
    inputs = inputs.to(device)
    targets = targets.to(device)
    acc = 0
    with torch.no_grad():
        predictions = model(inputs)
        loss = criterion(predictions.squeeze(), targets)
        mae = mae_loss(predictions.squeeze(), targets)
        for i in range(len(predictions)):
            pred = round(predictions[i].item())
            target = int(targets[i].item())
            if pred == target:
                acc +=1
    acc /= len(predictions)
    del inputs, targets
    torch.cuda.empty_cache()
    return acc, loss.item(), mae.item()

# Training

In [None]:
match loss_f:
    case 'MSE':
        criterion = nn.MSELoss()
    case 'MAE':
        criterion = nn.L1Loss()
    case 'Huber':
        criterion = nn.HuberLoss(delta = delta)
        
match optim:
    case 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    case 'Adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(), lr = 1., rho = 0.95, eps=1e-6)
    case 'Adagrad':
        optimizer = torch.optim.Adagrad(model.parameters())
    case 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters())
    case 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), betas=[0.9, 0.8])
    case 'Rprop':
        optimizer = torch.optim.Rprop(model.parameters())

In [None]:
average_training_losses = []
average_training_accs = []
average_val_accs = []
average_val_losses = []
average_training_maes = []
average_val_maes = []
best_val_accs = 0
best_epoch = 0

start_time = time.time()
# utiliser le GPU si disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
with open(os.path.join(results_dir, 'record.tsv'), 'w') as tsvfile:
    writer = csv.writer(tsvfile, delimiter = '\t', lineterminator = '\n')
    writer.writerow(['epoch', 'train_loss', 'train_acc', 'val_loss', 'val_acc'])
    for epoch in range(num_epochs):
        training_losses = []
        training_accs = []
        training_maes = []
        desc = 'Epoch ' + str(epoch) + '/' + str(num_epochs)
        for (x_train, y_train) in tqdm(dataloaders['train'], desc = desc):
            acc, loss, mae = train_step(x_train, y_train, model, optimizer, criterion)
            training_losses.append(loss)
            training_accs.append(acc)
            training_maes.append(mae)
        average_training_loss = sum(training_losses) / len(training_losses)
        average_training_acc = sum(training_accs) / len(training_accs)
        average_training_mae = sum(training_maes) / len(training_maes)
        average_training_losses.append(average_training_loss)
        average_training_accs.append(average_training_acc)
        average_training_maes.append(average_training_mae)

        # Evaluation
        val_accs = []
        val_loss = []
        val_maes = []
        for x_val, y_val in dataloaders['val']:
            acc, loss, mae = val_step(x_val, y_val, model, criterion)
            val_accs.append(acc)
            val_loss.append(loss)
            val_maes.append(mae)
        average_val_loss = sum(val_loss) / len(val_loss)
        average_val_acc = sum(val_accs) / len(val_accs)
        average_val_mae = sum(val_maes)/len(val_maes)
        average_val_accs.append(average_val_acc)
        average_val_losses.append(average_val_loss)
        average_val_maes.append(average_val_mae)
        if best_val_accs < average_val_acc:
            torch.save(model.state_dict(), os.path.join(results_dir, 'best.pt'))
            best_val_accs = average_val_acc
            best_epoch = epoch
            print("New best model: ", best_val_accs)
        writer.writerow([epoch, average_training_loss, average_training_acc, average_val_loss, average_val_acc])
        print(f'Training Loss: {average_training_loss}, Validation Acc: {average_val_acc}, Validation Loss: {average_val_loss}')

end_t = time.time()
time_elapsed=end_t - start_time
legend = ['train', 'val']
print("time_elapsed: {}".format(time_elapsed))
plt.style.use('dark_background')

fig = plt.figure(layout="constrained", figsize=(20,10))
gs = GridSpec(3, 1, figure=fig, wspace = 0.1)
fig.suptitle('Results', fontsize=16)

ax = fig.add_subplot(gs[0])
intervals = np.arange(len(average_training_losses))
ax.plot(intervals, average_training_losses, color = 'b')
intervals = np.arange(1,len(average_training_losses))
ax.plot(intervals, average_val_losses[1:], color = 'g')
ax.set_ylabel("loss: " + criterion._get_name())

ax = fig.add_subplot(gs[1])
intervals = np.arange(len(average_training_accs))
ax.plot(intervals, average_training_maes, color = 'b')
intervals = np.arange(1,len(average_training_accs))
ax.plot(intervals, average_val_maes[1:], color = 'g')
ax.set_ylabel("loss: MAE")

ax = fig.add_subplot(gs[2])
intervals = np.arange(len(average_training_accs))
ax.plot(intervals, average_training_accs, color = 'b')
ax.plot(intervals, average_val_accs, color = 'g')
ax.set_xlabel("epoch")
ax.set_ylabel("accuracy")

fig.legend(legend)
fig.savefig(os.path.join(results_dir, 'results.png'))
print("Evaluation accuracy (best) = ", best_val_accs)
# clear GPU cache memory
torch.cuda.empty_cache()

yaml_dict = {'best epoch': best_epoch,
             'best val acc': best_val_accs,
             'train loss': average_training_losses[best_epoch],
             'train acc': average_training_accs[best_epoch],
             'train mae loss': average_training_maes[best_epoch],
             'val loss': average_val_accs[best_epoch],
             'val mae loss': average_val_maes[best_epoch],
             'time': str(datetime.timedelta(seconds=time_elapsed))}
with open(os.path.join(results_dir, 'results.yaml'), 'w') as yamlf:
    yaml.dump(yaml_dict, yamlf, default_flow_style=False, allow_unicode=True)


In [None]:

yaml_dict = {'imgsz': [padded_w, padded_h],
             'batch size': batch_size, 
             'epochs': num_epochs, 
             'loss function': loss_f,
             'delta': delta,
             'optim': optim, 
             'lr' : lr}
with open(os.path.join(results_dir, 'args.yaml'), 'w') as yamlf:
    yaml.dump(yaml_dict, yamlf, default_flow_style=False, allow_unicode=True)

In [None]:
model_stats = summary(model, input_size=(batch_size, 3, 224, 224), row_settings=("depth", "ascii_only"))
summary_str = str(model_stats)
with open(os.path.join(results_dir, 'model.txt'), 'w') as modelf:
    modelf.write(summary_str)

# Evaluation

In [None]:

model.load_state_dict(torch.load(os.path.join('best.pt')))

In [None]:
path = os.path.join('dataset', 'val')
plt.style.use('default')
def parse_json_dict(path):
    fname = glob('*.json', dir_fd=path)[0]
    with open(os.path.join(path, fname)) as json_f:
        labels = json.load(json_f)
    return labels
labels = parse_json_dict(path)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
fnames= glob('*.jpg', dir_fd = path)
fnames = random.sample(fnames, 1)
model.eval()
model.to(device)
for fname in fnames:
    image = Image.open(os.path.join(path, fname)).convert("RGB")
    print(np.asarray(image).shape)
    plt.imshow(image)
    plt.show()
    image = data_transforms(image)
    image = torch.unsqueeze(image, dim=0)
    image = image.to(device)
    pred = model(image)
    print('predicted: ', pred[0][0].item())
    print('predicted round: ', round(pred[0][0].item()))
    print('truth: ', labels[fname])

In [None]:
def get_confusion_matrix(model, path, transform):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    fnames= glob('*.jpg', dir_fd = path)
    labels = parse_json_dict(path)
    # init matrix
    indices_t = np.linspace(1, max(labels.values()), max(labels.values()))
    indices_p = np.linspace(1, 40, 40)
    confusion_matrix = np.zeros((indices_p.size+1, indices_t.size), dtype = np.int16)
    # predictions
    model.eval()
    for fname in fnames:
        image = Image.open(os.path.join(path, fname)).convert("RGB")
        image = transform(image)
        image = torch.unsqueeze(image, dim=0)
        image = image.to(device)
        pred = model(image)
        pred = round(pred[0][0].item())
        if pred in indices_p:
            confusion_matrix[pred-1][labels[fname]-1]+=1
        else:
            confusion_matrix[-1][labels[fname]-1]+=1
    return confusion_matrix, indices_p, indices_t

In [None]:
path = os.path.join('dataset', 'val')
conf_mat, inds_p, inds_t = get_confusion_matrix(model, path, data_transforms)
inds_p = np.append(inds_p, 'other')
df_cm = pd.DataFrame(conf_mat, index = [i for i in inds_p],
                  columns = [i for i in inds_t])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)
plt.ylabel('predicted')
plt.xlabel('ground truth')
plt.title('Val confusion matrix')
#plt.savefig(os.path.join(results_dir, 'val_conf_mat.png'))

# train
path = os.path.join('dataset', 'train')
conf_mat, inds_p, inds_t = get_confusion_matrix(model, path, data_transforms)
inds_p = np.append(inds_p, 'other')
df_cm = pd.DataFrame(conf_mat, index = [i for i in inds_p],
                  columns = [i for i in inds_t])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)
plt.ylabel('predicted')
plt.xlabel('ground truth')
plt.title('train confusion matrix')
#plt.savefig(os.path.join(results_dir, 'train_conf_mat.png'))