In [3]:
import torch
from torch.optim import SGD
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import accuracy_score
from os.path import join
import tqdm


def train_classifier(model, train_loader, test_loader, exp_name='experiment', lr=0.01, epochs=10, momentum=0.99, logdir='logs'):
    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()
    writer = SummaryWriter(join(logdir, exp_name))
    loss_meter = AverageValueMeter()
    acc_meter = AverageValueMeter()
    #device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = 'cpu'
    model.to(device)

    loader = {
        'train': train_loader,
        'test': test_loader
    }

    global_step = 0

    for e in range(epochs):
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            loss_meter.reset()
            acc_meter.reset()

            with torch.set_grad_enabled(phase=='train'):
                with tqdm.tqdm(enumerate(loader[phase]), total=len(loader[phase]), desc=f"{phase.capitalize()} Epoch {e+1}/{epochs}", unit="batch") as pbar:
                    for i, batch in pbar:
                        x = batch[0].to(device)
                        y = batch[1].to(device)
                        output = model(x)

                        n = x.shape[0]
                        global_step += n
                        loss = criterion(output, y)

                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                            optimizer.zero_grad()

                        accuracy = accuracy_score(y.to('cpu'), output.to('cpu').max(1)[1])
                        loss_meter.add(loss.item(), n)
                        acc_meter.add(accuracy, n)

                        pbar.set_postfix(loss=loss_meter.value(), accuracy=acc_meter.value())

                        if phase == 'train':
                            writer.add_scalar('loss/train', loss_meter.value(), global_step=global_step)
                            writer.add_scalar('accuracy/train', acc_meter.value(), global_step=global_step)
            writer.add_scalar('loss/' + phase, loss_meter.value(), global_step=global_step)
            writer.add_scalar('accuracy/' + phase, acc_meter.value(), global_step=global_step)

        torch.save(model.state_dict(), '%s-%d.pth' % (exp_name, e+1))

    return model

In [4]:
class AverageValueMeter:
    def __init__(self):
        self.reset()
        self.sum = 0
        self.num = 0
        
    def reset(self):
        self.sum = 0
        self.num = 0
        
    def add(self, val, n=1):
        self.sum += val*n
        self.num += n
        
    def value(self):
        try:
            return self.sum/self.num
        except:
            return None

In [5]:
from torch import nn


class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.C1=nn.Conv2d(1,6,5)
        self.S2=nn.AvgPool2d(2)
        self.C3=nn.Conv2d(6,16,5)
        self.S4=nn.AvgPool2d(2)
        # Fully connected layers
        self.F5=nn.Linear(256,120)
        self.F6=nn.Linear(120,84)
        self.F7=nn.Linear(84,22)
        
        self.activation = nn.Tanh()
    
    def forward(self,x):
        x=self.C1(x)
        x=self.S2(x)
        x = self.activation(x)
        x=self.C3(x)
        x=self.S4(x)
        x = self.activation(x)
        x = self.F5(x.view(x.shape[0],-1)) # reshape tensori
        x = self.activation(x)
        x=self.F6(x)
        x = self.activation(x)
        x=self.F7(x)
        return x

In [6]:
net = LeNet()
sum([p.numel() for p in net.parameters()])

45446

In [7]:
from torchvision import transforms
import os
from PIL import Image
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

def split_train_val_test(dataset, perc=None):  # default 60% train, 10% val, 30% test

    if perc is None:
        perc = [0.6, 0.3]
    train, test = train_test_split(dataset, test_size=perc[1], train_size=perc[0])

    return train, test

sign_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'T', 'U', 'V', 'W', 'X',
               'Y']

dict_alph = {'a': 0,'b': 1,'c': 2,'d': 3,'e': 4,'f': 5,'h': 6,'i': 7,'k': 8,'l': 9,'m': 10,
             'n': 11,'o': 12,'p': 13,'q': 14,'r': 15,'t': 16,'u': 17,'v': 18,'w': 19,'x': 20,'y': 21
}

DATASET1_FOLDER = "../data/Dataset_Elvio"
DATASET2_FOLDER = "../data/Dataset_Manuel"
DATASET3_FOLDER = "../data/Dataset_Michele"
DATASET4_FOLDER = "../data/Dataset_Juliana"

DATASET_ESTESO = 'Dataset_michele_esteso/'
datasets = [DATASET2_FOLDER]
#datasets = [DATASET2_FOLDER, DATASET3_FOLDER, DATASET4_FOLDER,DATASET_ESTESO]
#letters = ['a','b','c','d','e', 'f', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 't', 'u', 'v', 'w', 'x', 'y']

In [8]:
transform_aug = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((28, 28)),
    transforms.RandomPerspective(0.3,0.2),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    # transforms.Normalize(mean=mean, std=std),
])

transform_base = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=mean, std=std),
])


# DATASET CREATION WITH NEW DATA AUG 
data = []

for letter in dict_alph:
    for dataset in datasets:
        sub_folder = os.path.join(dataset, letter)
        for img_name in os.listdir(sub_folder):
            img_path = os.path.join(sub_folder, img_name)
            im0 = transform_base(Image.open(img_path))
            #im1 = transform_aug(Image.open(img_path))
            #im2 = transform_aug(Image.open(img_path))
            label = dict_alph[img_name[0]]
            data.append((im0, label))
            #data.append((im1, label))
            #data.append((im2, label))
print(data[0][0].shape)  # Should be (1, 64, 64) for grayscale images
print("DATASET SIZE:",len(data))

torch.Size([1, 28, 28])
DATASET SIZE: 3300


In [9]:
train, test = split_train_val_test(data, [0.7, 0.3])

In [10]:
train_data = DataLoader(train, batch_size=32, num_workers=2, shuffle=True)
test_data = DataLoader(test, batch_size=32, num_workers=2)

In [62]:
lenet = LeNet()
lenet = train_classifier(lenet, train_data, test_data, exp_name='lenet_model', lr=0.01, epochs=10)

Train Epoch 1/10: 100%|██████████| 73/73 [00:05<00:00, 12.80batch/s, accuracy=0.045, loss=3.1]  
Test Epoch 1/10: 100%|██████████| 31/31 [00:03<00:00,  9.53batch/s, accuracy=0.0535, loss=3.09]
Train Epoch 2/10: 100%|██████████| 73/73 [00:04<00:00, 16.93batch/s, accuracy=0.0961, loss=3.04]
Test Epoch 2/10: 100%|██████████| 31/31 [00:02<00:00, 10.72batch/s, accuracy=0.413, loss=2.71]
Train Epoch 3/10: 100%|██████████| 73/73 [00:04<00:00, 16.42batch/s, accuracy=0.614, loss=1.34]
Test Epoch 3/10: 100%|██████████| 31/31 [00:03<00:00, 10.10batch/s, accuracy=0.874, loss=0.387]
Train Epoch 4/10: 100%|██████████| 73/73 [00:03<00:00, 18.47batch/s, accuracy=0.931, loss=0.183]
Test Epoch 4/10: 100%|██████████| 31/31 [00:03<00:00,  9.20batch/s, accuracy=0.997, loss=0.032] 
Train Epoch 5/10: 100%|██████████| 73/73 [00:04<00:00, 14.97batch/s, accuracy=0.998, loss=0.0165]
Test Epoch 5/10: 100%|██████████| 31/31 [00:03<00:00,  8.23batch/s, accuracy=0.999, loss=0.0072] 
Train Epoch 6/10: 100%|██████████

In [16]:
import numpy as np
def test_classifier(model, loader):
    #device = "cuda" if torch.cuda.is_available() else "cpu"
    device = 'cpu'
    model.to(device)
    model.eval()

    predictions, labels = [], []

    with torch.no_grad():
        with tqdm.tqdm(loader, desc="Testing") as pbar:
            for batch in pbar:
                x = batch[0].to(device)
                y = batch[1].to(device)
                output = model(x)
                preds = output.to("cpu").max(1)[1].numpy()
                labs = y.to("cpu").numpy()
                predictions.extend(list(preds))
                labels.extend(list(labs))

    return np.array(predictions), np.array(labels)

In [68]:
lenet_predictions_train, lenet_labels_train = test_classifier(lenet, train_data)
lenet_predictions_test, lenet_labels_test = test_classifier(lenet, test_data)
print("Accuracy di training: %0.4f"%(accuracy_score(lenet_labels_train, lenet_predictions_train)))
print("Accuracy di test: %0.4f"%(accuracy_score(lenet_labels_test, lenet_predictions_test)))

Testing: 100%|██████████| 73/73 [00:09<00:00,  7.41it/s]
Testing: 100%|██████████| 31/31 [00:09<00:00,  3.35it/s]

Accuracy di training: 1.0000
Accuracy di test: 0.9990





In [18]:
def perc_error(gt, pred):
    return 100*(1-accuracy_score(gt, pred))

In [70]:
print("Errore LeNet su DIGITS training: %0.2f%%"%(perc_error(lenet_labels_train, lenet_predictions_train)))

Errore LeNet su DIGITS training: 0.00%


In [None]:
torch.save(lenet, "lenet_lr01_ep10" + '.pt')

In [13]:
import torch.nn as nn

class LeNetV2(nn.Module):
    def __init__(self):
        super(LeNetV2, self).__init__()
        # Definiamo il primo livello. Dobbiamo effettuare una convoluzione 2D (ovvero su immagini)
        # Utilizziamo il modulo Conv2d che prende in input:
        # - il numero di canali in input: 1 (si tratta di immagini in scala di grigio)
        # - il numero di canali in output: 6 (le mappe di feature)
        # - la dimensione del kernel: 5 (sta per "5 X 5")
        self.C1 = nn.Conv2d(1, 6, 5)

        # Definiamo il livello di subsampling. Questo viene implementato usando il modulo "MaxPool2d"
        # Il modulo prende in input la dimensione dei neighbourhood rispetto ai quali calcolare i valori massimi: 2
        self.S2 = nn.MaxPool2d(2)

        # Definiamo il livello C3 in maniera analoga a quanto fatto per il livello C1:
        self.C3 = nn.Conv2d(6, 16, 5)

        # Definiamo il successivo max pooling 2d
        self.S4 = nn.MaxPool2d(2)

        # Definiamo il primo layer FC
        self.F5 = nn.Linear(256, 120)

        # Definiamo il secondo Layer FC
        self.F6 = nn.Linear(120, 84)

        # Definiamo il terzo layer FC
        self.F7 = nn.Linear(84, 22)

        # Definiamo inoltre un modulo per calcolare l'attivazione ReLU
        self.activation = nn.ReLU()

    def forward(self, x):
        # Applichiamo le diverse trasformazioni in cascata
        x = self.C1(x)
        x = self.S2(x)
        x = self.activation(x) # inseriamo le attivazioni ove opportuno
        x = self.C3(x)
        x = self.S4(x)
        x = self.activation(x) # inseriamo le attivazioni ove opportuno

        x = self.F5(x.view(x.shape[0], -1)) # dobbiamo effettuare un "reshape" del tensore
        x = self.activation(x)
        x = self.F6(x)
        x = self.activation(x)
        x = self.F7(x)

        return x

In [14]:
# Alleniamo il nuovo modello
lenet_v2 = LeNetV2()
lenet_v2 = train_classifier(lenet_v2, train_data, test_data, exp_name='lenet_v2', lr=0.01, epochs=10)

Train Epoch 1/10: 100%|██████████| 73/73 [00:05<00:00, 13.14batch/s, accuracy=0.0455, loss=3.1] 
Test Epoch 1/10: 100%|██████████| 31/31 [00:03<00:00,  9.07batch/s, accuracy=0.0384, loss=3.1]
Train Epoch 2/10: 100%|██████████| 73/73 [00:06<00:00, 11.68batch/s, accuracy=0.0437, loss=3.09]
Test Epoch 2/10: 100%|██████████| 31/31 [00:03<00:00,  9.30batch/s, accuracy=0.0384, loss=3.09]
Train Epoch 3/10: 100%|██████████| 73/73 [00:04<00:00, 15.53batch/s, accuracy=0.148, loss=2.78] 
Test Epoch 3/10: 100%|██████████| 31/31 [00:07<00:00,  4.27batch/s, accuracy=0.386, loss=2.17]
Train Epoch 4/10: 100%|██████████| 73/73 [00:04<00:00, 14.67batch/s, accuracy=0.461, loss=1.53]
Test Epoch 4/10: 100%|██████████| 31/31 [00:03<00:00, 10.01batch/s, accuracy=0.631, loss=1.17]
Train Epoch 5/10: 100%|██████████| 73/73 [00:04<00:00, 15.99batch/s, accuracy=0.652, loss=1.04] 
Test Epoch 5/10: 100%|██████████| 31/31 [00:04<00:00,  6.92batch/s, accuracy=0.738, loss=1.36]
Train Epoch 6/10: 100%|██████████| 73/73

In [19]:
lenet_v2_predictions_test, lenet_v2_labels_test = test_classifier(lenet_v2, test_data)
print("Errore LeNet su DIGITS training: %0.2f%%"%(perc_error(lenet_labels_train, lenet_predictions_train)))
print("Errore LeNetV2 su DIGITS training: %0.2f%%"%(perc_error(lenet_labels_test, lenet_v2_predictions_test)))

Testing: 100%|██████████| 31/31 [00:09<00:00,  3.33it/s]


NameError: name 'lenet_labels_train' is not defined

In [None]:
torch.save(lenet_v2, "lenet_v2_lr01_ep10" + '.pt')