In [1]:
import torch
from torch.optim import SGD
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import accuracy_score
from os.path import join
import tqdm


def train_classifier(model, train_loader, test_loader, exp_name='experiment', lr=0.01, epochs=10, momentum=0.99, logdir='logs'):
    optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    criterion = nn.CrossEntropyLoss()
    writer = SummaryWriter(join(logdir, exp_name))
    loss_meter = AverageValueMeter()
    acc_meter = AverageValueMeter()
    #device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = 'cpu'
    model.to(device)

    loader = {
        'train': train_loader,
        'test': test_loader
    }

    global_step = 0

    for e in range(epochs):
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            loss_meter.reset()
            acc_meter.reset()

            with torch.set_grad_enabled(phase=='train'):
                with tqdm.tqdm(enumerate(loader[phase]), total=len(loader[phase]), desc=f"{phase.capitalize()} Epoch {e+1}/{epochs}", unit="batch") as pbar:
                    for i, batch in pbar:
                        x = batch[0].to(device)
                        y = batch[1].to(device)
                        output = model(x)

                        n = x.shape[0]
                        global_step += n
                        loss = criterion(output, y)

                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                            optimizer.zero_grad()

                        accuracy = accuracy_score(y.to('cpu'), output.to('cpu').max(1)[1])
                        loss_meter.add(loss.item(), n)
                        acc_meter.add(accuracy, n)

                        pbar.set_postfix(loss=loss_meter.value(), accuracy=acc_meter.value())

                        if phase == 'train':
                            writer.add_scalar('loss/train', loss_meter.value(), global_step=global_step)
                            writer.add_scalar('accuracy/train', acc_meter.value(), global_step=global_step)
            writer.add_scalar('loss/' + phase, loss_meter.value(), global_step=global_step)
            writer.add_scalar('accuracy/' + phase, acc_meter.value(), global_step=global_step)

        torch.save(model.state_dict(), '%s-%d.pth' % (exp_name, e+1))

    return model

In [2]:
import numpy as np
def test_classifier(model, loader):
    #device = "cuda" if torch.cuda.is_available() else "cpu"
    device = 'cpu'
    model.to(device)
    model.eval()

    predictions, labels = [], []

    with torch.no_grad():
        with tqdm.tqdm(loader, desc="Testing") as pbar:
            for batch in pbar:
                x = batch[0].to(device)
                y = batch[1].to(device)
                output = model(x)
                preds = output.to("cpu").max(1)[1].numpy()
                labs = y.to("cpu").numpy()
                predictions.extend(list(preds))
                labels.extend(list(labs))

    return np.array(predictions), np.array(labels)

In [3]:
def perc_error(gt, pred):
    return 100*(1-accuracy_score(gt, pred))

In [4]:
class AverageValueMeter:
    def __init__(self):
        self.reset()
        self.sum = 0
        self.num = 0
        
    def reset(self):
        self.sum = 0
        self.num = 0
        
    def add(self, val, n=1):
        self.sum += val*n
        self.num += n
        
    def value(self):
        try:
            return self.sum/self.num
        except:
            return None

In [5]:
from torch import nn
class LeNetColor(nn.Module):
    def __init__(self):
        super(LeNetColor, self).__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 18, 5), # 3 canali in input, 18 mappe di feature in output, kernel 5x5 | input: 3x32x32 -> output: 18x28x28
            nn.MaxPool2d(2), # Max pooling 2x2 | input: 18x28x28 -> output: 18x14x14
            nn.ReLU(),
            nn.Conv2d(18, 28, 5), # 18 canali in input, 28 mappe di feature in output, kernel 5x5 | input: 18x14x14 -> output: 28x10x10
            nn.MaxPool2d(2), # Max pooling 2x2 | input: 28x10x10 -> output: 28x5x5
            nn.ReLU()
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(700,360), #input 28*5*5=700, output 360
            nn.ReLU(),
            nn.Linear(360,252), #input 360, output 252
            nn.ReLU(),
            nn.Linear(252,22) #input 252, output 100
        )
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x.view(x.shape[0], -1))
        return x

In [6]:
class MiniAlexNet(nn.Module):
    def __init__(self, input_channels=3, out_classes=22):
        super(MiniAlexNet, self).__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(input_channels, 16, 5, padding=2), # 3 canali in input, 16 mappe di feature in output, kernel 5x5 | input: 3x32x32 -> output: 16x32x32
            nn.MaxPool2d(2), # Max pooling 2x2 | input: 32x32x32 -> output: 32x16x16
            nn.ReLU(),
            
            nn.Conv2d(16, 32, 5, padding=2), # 16 canali in input, 32 mappe di feature in output, kernel 5x5 | input: 16x16x16 -> output: 32x16x16
            nn.MaxPool2d(2), # Max pooling 2x2 | input: 64x16x16 -> output: 64x8x8
            nn.ReLU(),
            
            nn.Conv2d(32, 64, 3, padding=1), # 32 canali in input, 64 mappe di feature in output, kernel 3x3 | input: 32x8x8 -> output: 64x8x8
            nn.ReLU(),
            
            nn.Conv2d(64, 128, 3, padding=1), # 64 canali in input, 128 mappe di feature in output, kernel 3x3 | input: 64x8x8 -> output: 128x8x8
            nn.ReLU(),
            
            nn.Conv2d(128, 256, 3, padding=1), # 128 canali in input, 256 mappe di feature in output, kernel 3x3 | input: 128x8x8 -> output: 256x8x8
            nn.MaxPool2d(2), # Max pooling 2x2 | input: 128x8x8 -> output: 128x4x4
            nn.ReLU()
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(256*4*4, 2048), #input 256*4*4=4096, output 2048
            nn.ReLU(),
            
            nn.Linear(2048, 1024), #input 2048, output 1024
            nn.ReLU(),
            
            nn.Linear(1024, out_classes) #input 1024, output 100
        )
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x.view(x.shape[0], -1))
        return x

In [7]:
from torchvision import transforms
import os
from PIL import Image
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

def split_train_val_test(dataset, perc=None):  # default 60% train, 10% val, 30% test

    if perc is None:
        perc = [0.6, 0.3]
    train, test = train_test_split(dataset, test_size=perc[1], train_size=perc[0])

    return train, test

sign_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'T', 'U', 'V', 'W', 'X',
               'Y']

dict_alph = {'a': 0,'b': 1,'c': 2,'d': 3,'e': 4,'f': 5,'h': 6,'i': 7,'k': 8,'l': 9,'m': 10,
             'n': 11,'o': 12,'p': 13,'q': 14,'r': 15,'t': 16,'u': 17,'v': 18,'w': 19,'x': 20,'y': 21
}

DATASET1_FOLDER = "../data/Dataset_Elvio"
DATASET2_FOLDER = "../data/Dataset_Manuel"
DATASET3_FOLDER = "../data/Dataset_Michele"
DATASET4_FOLDER = "../data/Dataset_Juliana"

DATASET_ESTESO = 'Dataset_michele_esteso/'
datasets = [DATASET2_FOLDER]
#datasets = [DATASET2_FOLDER, DATASET3_FOLDER, DATASET4_FOLDER,DATASET_ESTESO]
#letters = ['a','b','c','d','e', 'f', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 't', 'u', 'v', 'w', 'x', 'y']

In [13]:
transform_base = transforms.Compose([
    #transforms.Grayscale(num_output_channels=1),
    transforms.Resize((32,32)),
    transforms.ToTensor(),
])

data = []

for letter in dict_alph:
    for dataset in datasets:
        sub_folder = os.path.join(dataset, letter)
        for img_name in os.listdir(sub_folder):
            img_path = os.path.join(sub_folder, img_name)
            im0 = transform_base(Image.open(img_path))

            label = dict_alph[img_name[0]]
            data.append((im0, label))

print(data[0][0].shape)  # Should be (1, 64, 64) for grayscale images
print("DATASET SIZE:",len(data))

torch.Size([3, 32, 32])
DATASET SIZE: 3300


In [14]:
train, test = split_train_val_test(data, [0.7, 0.3])

In [15]:
train_data = DataLoader(train, batch_size=32, num_workers=2, shuffle=True)
test_data = DataLoader(test, batch_size=32, num_workers=2)

In [16]:
lenet_color = LeNetColor()
lenet_color = train_classifier(lenet_color, train_data, test_data, exp_name='lenet_color', lr=0.01, epochs=20)

Train Epoch 1/20: 100%|██████████| 73/73 [00:06<00:00, 11.01batch/s, accuracy=0.0403, loss=3.09]
Test Epoch 1/20: 100%|██████████| 31/31 [00:03<00:00,  8.11batch/s, accuracy=0.0323, loss=3.08]
Train Epoch 2/20: 100%|██████████| 73/73 [00:05<00:00, 13.59batch/s, accuracy=0.302, loss=2.43] 
Test Epoch 2/20: 100%|██████████| 31/31 [00:03<00:00,  8.96batch/s, accuracy=0.665, loss=1.05]
Train Epoch 3/20: 100%|██████████| 73/73 [00:05<00:00, 13.51batch/s, accuracy=0.832, loss=0.624]
Test Epoch 3/20: 100%|██████████| 31/31 [00:03<00:00,  8.71batch/s, accuracy=0.863, loss=1.32]
Train Epoch 4/20: 100%|██████████| 73/73 [00:05<00:00, 13.41batch/s, accuracy=0.781, loss=0.926]
Test Epoch 4/20: 100%|██████████| 31/31 [00:03<00:00,  9.13batch/s, accuracy=0.822, loss=0.657]
Train Epoch 5/20: 100%|██████████| 73/73 [00:05<00:00, 13.36batch/s, accuracy=0.36, loss=2.31]  
Test Epoch 5/20: 100%|██████████| 31/31 [00:03<00:00,  9.54batch/s, accuracy=0.0475, loss=3.11]
Train Epoch 6/20: 100%|██████████| 73

KeyboardInterrupt: 

In [None]:
# calcoliamo l'accuracy di test
lenet_color_predictions_test, lenet_color_labels_test = test_classifier(lenet_color, test_data)
print("Accuracy LeNetColor su CIFAR100 test: %0.2f%%"%(accuracy_score(lenet_color_labels_test, lenet_color_predictions_test)))

In [None]:
torch.save(lenet_color, "lenet_color_lr01_ep20" + '.pt')

In [None]:
# Alleniamo il modello
mini_alex_net = MiniAlexNet()
mini_alex_net = train_classifier(mini_alex_net, train_data, test_data, exp_name='mini_alex_net', lr=0.01, epochs=30)

In [None]:
# calcoliamo l'accuracy di test
mini_alex_net_predictions_test, mini_alex_net_labels_test = test_classifier(mini_alex_net, test_data)
print("Accuracy LeNetColor su CIFAR100 test: %0.2f%%"%(accuracy_score(mini_alex_net_labels_test, mini_alex_net_predictions_test)))

In [None]:
torch.save(mini_alex_net, "mini_alex_net_lr01_ep30" + '.pt')