# **Trabajo unidad 4: Clasificación de gestos de manos**
# INFO257 Inteligencia Artificial

**Integrantes**: *Patricio Canales*, *Eleazar Vásquez*

# **Descarga del dataset**

In [1]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms 
from torch.utils.data import DataLoader
import numpy as np
from torchvision import models

In [None]:
if torch.cuda.is_available():
  display(torch.cuda.get_device_name(0))

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

'Tesla P100-PCIE-16GB'

cuda


# Dataloaders

In [None]:
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

valid_transforms = transforms.Compose([transforms.Resize(255),
                                       transforms.CenterCrop(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])


test_transforms = transforms.Compose([transforms.Resize(255),
                                       transforms.CenterCrop(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])


train_dataset = ImageFolder('gestos/train', transform=train_transforms)
valid_dataset = ImageFolder('gestos/valid', transform=valid_transforms)
test_dataset = ImageFolder('gestos/test', transform=test_transforms)

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=32)
valid_loader = DataLoader(valid_dataset, shuffle=False, batch_size=256)
test_loader = DataLoader(test_dataset, shuffle=True, batch_size=128)

dataloaders_dict = {"train": train_loader, "valid": valid_loader}

# **Entrenamiento**




Fuente código de entrenamiento https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

In [None]:
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=30):
  best_valid_loss = np.inf
  for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)


    for phase in ['train', 'valid']:
      if phase == 'train':
        model.train()
      else:
        model.eval()

      running_loss = 0.0
      running_corrects = 0

      for inputs, labels in dataloaders_dict[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        with torch.set_grad_enabled(phase == 'train'):
          outputs = model(inputs)
          loss = criterion(outputs, labels)

          _, preds = torch.max(outputs, 1)
          if phase == 'train':
            loss.backward()
            optimizer.step()

          running_loss += loss.item() * inputs.size(0)
          running_corrects += torch.sum(preds == labels.data)

      epoch_loss = running_loss / len(dataloaders_dict[phase].dataset)
      epoch_acc = running_corrects.double() / len(dataloaders_dict[phase].dataset)
      if ( epoch_loss < best_valid_loss) and (phase == 'valid'):
        print("entra a guardar...")
        best_valid_loss = epoch_loss
        torch.save({'current_epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'current_valid_loss': epoch_loss
                   }, 'best_model.pt')

      print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

# **Calculo de rendimiento**

In [None]:
def testing(model):
  targets, predictions = [], []
  for mbdata, label in test_loader:
      mbdata, label = mbdata.to(device), label.to(device)
      logits = model.forward(mbdata)
      predictions.append(logits.argmax(dim=1).detach().cpu().numpy())     
      targets.append(label.cpu().numpy()) 
  predictions = np.concatenate(predictions) 
  targets = np.concatenate(targets)

  from sklearn.metrics import confusion_matrix, classification_report

  cm = confusion_matrix(targets, predictions)
  display(cm)
  print(classification_report(targets, predictions))


# **Modelo ResNet**

In [None]:
resnet = models.resnet18(pretrained=True, progress=True)
#display(resnet)

In [None]:
for param in resnet.parameters(): 
    param.requires_grad = False

neuronsResnet = resnet.fc.in_features

In [None]:

#resnet.fc = torch.nn.Linear(neuronsResnet, 4)
resnet.fc = torch.nn.Sequential(torch.nn.Dropout(p=0.5, inplace=False),
                                  torch.nn.Linear(in_features=neuronsResnet, out_features=256, bias=True),
                                  torch.nn.ReLU(inplace=True),
                                  torch.nn.Dropout(p=0.5, inplace=False),  
                                  torch.nn.Linear(in_features=256, out_features=4, bias=True))

resnet.to(device)

criterionResnet = torch.nn.CrossEntropyLoss()
optimizerResnet = torch.optim.Adam(resnet.parameters(), lr=1e-3)
criterionResnet.to(device)

nEpochResnet = 10

###  Entrenamiento del modelo

In [None]:

train_model(resnet, dataloaders_dict, criterionResnet, optimizerResnet, nEpochResnet)
resnet.load_state_dict(torch.load('best_model.pt')['model_state_dict'])

Epoch 0/9
----------
train Loss: 0.8443 Acc: 0.6284
entra a guardar...
valid Loss: 0.7051 Acc: 0.6565
Epoch 1/9
----------
train Loss: 0.7080 Acc: 0.6948
valid Loss: 0.8758 Acc: 0.5930
Epoch 2/9
----------
train Loss: 0.6627 Acc: 0.7130
valid Loss: 0.8810 Acc: 0.6020
Epoch 3/9
----------
train Loss: 0.6634 Acc: 0.7120
valid Loss: 0.9786 Acc: 0.5805
Epoch 4/9
----------
train Loss: 0.6404 Acc: 0.7210
entra a guardar...
valid Loss: 0.6888 Acc: 0.6535
Epoch 5/9
----------
train Loss: 0.6238 Acc: 0.7267
valid Loss: 0.7593 Acc: 0.6630
Epoch 6/9
----------
train Loss: 0.6227 Acc: 0.7327
valid Loss: 0.6936 Acc: 0.6840
Epoch 7/9
----------
train Loss: 0.6132 Acc: 0.7339
valid Loss: 0.7408 Acc: 0.6520
Epoch 8/9
----------
train Loss: 0.6185 Acc: 0.7304
valid Loss: 0.7952 Acc: 0.6580
Epoch 9/9
----------
train Loss: 0.5908 Acc: 0.7477
valid Loss: 0.7588 Acc: 0.6565


<All keys matched successfully>

### Matriz confución y reporte de clasificación

In [None]:
resnet.to(device)
testing(resnet)

array([[ 8,  3, 13,  6],
       [ 0, 23,  5,  2],
       [ 1,  4, 16,  9],
       [ 0,  4, 12, 14]])

              precision    recall  f1-score   support

           0       0.89      0.27      0.41        30
           1       0.68      0.77      0.72        30
           2       0.35      0.53      0.42        30
           3       0.45      0.47      0.46        30

    accuracy                           0.51       120
   macro avg       0.59      0.51      0.50       120
weighted avg       0.59      0.51      0.50       120



# **Modelo AlexNet**

In [None]:
alexnet = models.alexnet(pretrained=True, progress=True)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-4df8aa71.pth


HBox(children=(FloatProgress(value=0.0, max=244418560.0), HTML(value='')))




In [None]:
for param in alexnet.parameters(): 
    param.requires_grad = False

alexnet.classifier[6] = torch.nn.Linear(alexnet.classifier[6].in_features,4)
alexnet.to(device)

criterionAlexNet = torch.nn.CrossEntropyLoss()
optimizerAlexNet = torch.optim.Adam(alexnet.parameters(), lr=0.0001)
criterionAlexNet.to(device)

nEpochAlexNet = 10

### Entrenamiento del modelo

In [None]:
train_model(alexnet, dataloaders_dict, criterionAlexNet, optimizerAlexNet, nEpochAlexNet)

Epoch 0/9
----------
train Loss: 0.6638 Acc: 0.7504
valid Loss: 0.6942 Acc: 0.6475
Epoch 1/9
----------
train Loss: 0.4209 Acc: 0.8455
valid Loss: 0.6559 Acc: 0.6765
Epoch 2/9
----------
train Loss: 0.3756 Acc: 0.8596
valid Loss: 0.5741 Acc: 0.7275
Epoch 3/9
----------
train Loss: 0.3458 Acc: 0.8709
valid Loss: 0.6584 Acc: 0.7000
Epoch 4/9
----------
train Loss: 0.3207 Acc: 0.8799
valid Loss: 0.6556 Acc: 0.7035
Epoch 5/9
----------
train Loss: 0.3102 Acc: 0.8808
valid Loss: 0.6626 Acc: 0.7050
Epoch 6/9
----------
train Loss: 0.3037 Acc: 0.8811
valid Loss: 0.6549 Acc: 0.7075
Epoch 7/9
----------
train Loss: 0.2900 Acc: 0.8877
valid Loss: 0.6323 Acc: 0.7160
Epoch 8/9
----------
train Loss: 0.3003 Acc: 0.8797
valid Loss: 0.7319 Acc: 0.7065
Epoch 9/9
----------
train Loss: 0.2723 Acc: 0.8914
valid Loss: 0.6637 Acc: 0.7255


### Matriz de confución y reporte de clasificación

In [None]:
testing(alexnet)

array([[ 9,  5,  3, 13],
       [ 0, 14,  2, 14],
       [ 0, 10,  8, 12],
       [ 0,  6,  0, 24]])

              precision    recall  f1-score   support

           0       1.00      0.30      0.46        30
           1       0.40      0.47      0.43        30
           2       0.62      0.27      0.37        30
           3       0.38      0.80      0.52        30

    accuracy                           0.46       120
   macro avg       0.60      0.46      0.45       120
weighted avg       0.60      0.46      0.45       120



# **Modelo MobileNet v2**


In [None]:
mobilenet = models.mobilenet_v2(pretrained=True)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


HBox(children=(FloatProgress(value=0.0, max=14212972.0), HTML(value='')))




In [None]:
for param in mobilenet.parameters(): 
    param.requires_grad = False

mobilenet.classifier[1] = torch.nn.Linear(mobilenet.classifier[1].in_features,4)
mobilenet.to(device)

criterionMobileNet = torch.nn.CrossEntropyLoss()
optimizerMobileNet = torch.optim.Adam(mobilenet.parameters(), lr=1e-3)
criterionMobileNet.to(device)

nEpochMobileNet = 10


### Entrenamiento del modelo

In [None]:
train_model(mobilenet, dataloaders_dict, criterionMobileNet, optimizerMobileNet, nEpochMobileNet)

Epoch 0/9
----------
train Loss: 0.6015 Acc: 0.7492
valid Loss: 0.6536 Acc: 0.6870
Epoch 1/9
----------
train Loss: 0.4381 Acc: 0.8269
valid Loss: 0.7267 Acc: 0.6660
Epoch 2/9
----------
train Loss: 0.4107 Acc: 0.8343
valid Loss: 0.9407 Acc: 0.6265
Epoch 3/9
----------
train Loss: 0.4008 Acc: 0.8329
valid Loss: 0.4991 Acc: 0.7925
Epoch 4/9
----------
train Loss: 0.3842 Acc: 0.8451
valid Loss: 0.5190 Acc: 0.7855
Epoch 5/9
----------
train Loss: 0.3870 Acc: 0.8418
valid Loss: 0.5298 Acc: 0.7880
Epoch 6/9
----------
train Loss: 0.3703 Acc: 0.8504
valid Loss: 0.3610 Acc: 0.8730
Epoch 7/9
----------
train Loss: 0.3887 Acc: 0.8418
valid Loss: 0.6920 Acc: 0.7110
Epoch 8/9
----------
train Loss: 0.3944 Acc: 0.8371
valid Loss: 0.4977 Acc: 0.8060
Epoch 9/9
----------
train Loss: 0.4013 Acc: 0.8375
valid Loss: 0.5785 Acc: 0.7485


### Matriz de confución y reporte de clasificación

In [None]:
testing(mobilenet)

array([[11,  0,  5, 14],
       [ 0,  8,  7, 15],
       [ 0,  0, 16, 14],
       [ 0,  0,  6, 24]])

              precision    recall  f1-score   support

           0       1.00      0.37      0.54        30
           1       1.00      0.27      0.42        30
           2       0.47      0.53      0.50        30
           3       0.36      0.80      0.49        30

    accuracy                           0.49       120
   macro avg       0.71      0.49      0.49       120
weighted avg       0.71      0.49      0.49       120



# **Modelo GoogleNet**

In [None]:
googlenet = models.googlenet(pretrained=True, progress=True)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /root/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


HBox(children=(FloatProgress(value=0.0, max=52147035.0), HTML(value='')))




In [None]:
for param in googlenet.parameters(): 
    param.requires_grad = False



In [None]:

googlenet.fc = torch.nn.Sequential(torch.nn.Dropout(p=0.5, inplace=False),
                                  torch.nn.Linear(in_features=neuronsG, out_features=512, bias=True),
                                  torch.nn.ReLU(inplace=True),
                                  torch.nn.Dropout(p=0.5, inplace=False),  
                                  torch.nn.Linear(in_features=512, out_features=4, bias=True))
#googlenet.fc = torch.nn.Linear(neuronsG, 4)
googlenet.to(device)

criterionGoogleNet = torch.nn.CrossEntropyLoss()
optimizerGoogleNet = torch.optim.Adam(googlenet.parameters(), lr=1e-3)
criterionGoogleNet.to(device)  

nEpochGoogleNet = 10

### Entrenamiento del modelo

In [None]:
train_model(googlenet, dataloaders_dict, criterionGoogleNet, optimizerGoogleNet, nEpochGoogleNet)

Epoch 0/9
----------
train Loss: 0.6493 Acc: 0.7749
valid Loss: 0.5576 Acc: 0.7410
Epoch 1/9
----------
train Loss: 0.4374 Acc: 0.8438
valid Loss: 0.6736 Acc: 0.6860
Epoch 2/9
----------
train Loss: 0.3869 Acc: 0.8580
valid Loss: 0.4388 Acc: 0.8100
Epoch 3/9
----------
train Loss: 0.3727 Acc: 0.8599
valid Loss: 0.5759 Acc: 0.7290
Epoch 4/9
----------
train Loss: 0.3602 Acc: 0.8656
valid Loss: 0.4928 Acc: 0.7710
Epoch 5/9
----------
train Loss: 0.3525 Acc: 0.8641
valid Loss: 0.5084 Acc: 0.7660
Epoch 6/9
----------
train Loss: 0.3611 Acc: 0.8579
valid Loss: 0.5391 Acc: 0.7480
Epoch 7/9
----------
train Loss: 0.3531 Acc: 0.8618
valid Loss: 0.5840 Acc: 0.7490
Epoch 8/9
----------
train Loss: 0.3460 Acc: 0.8687
valid Loss: 0.4388 Acc: 0.7940
Epoch 9/9
----------
train Loss: 0.3427 Acc: 0.8655
valid Loss: 0.6372 Acc: 0.6975


### Matriz de confución

In [None]:
testing(googlenet)

array([[23,  3,  0,  4],
       [ 2, 15,  1, 12],
       [ 1,  1, 14, 14],
       [ 2,  0, 11, 17]])

              precision    recall  f1-score   support

           0       0.82      0.77      0.79        30
           1       0.79      0.50      0.61        30
           2       0.54      0.47      0.50        30
           3       0.36      0.57      0.44        30

    accuracy                           0.57       120
   macro avg       0.63      0.57      0.59       120
weighted avg       0.63      0.57      0.59       120

