In [1]:
#entrenamiento
import torch
import torch.optim as optim
device = torch.device('cuda')


In [2]:
import torchvision.transforms as transforms

In [3]:
import torch
from google.colab import drive
drive.mount('/gdrive')
import sys
sys.path.append('/gdrive/My Drive/dl-pytorch/')


Mounted at /gdrive


In [4]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
class SIGNSDataset(Dataset):
  def __init__(self, base_dir, split="train", transform=None):
    path = os.path.join(base_dir, "{}_signs".format(split))
    files = os.listdir(path)

    self.filenames = [os.path.join(path, f) for f in files if f.endswith(".jpg")]

    self.targets = [int(f[0]) for f in files]

    self.transform = transform
  
  def __len__(self):
    return len(self.filenames)


  def __getitem__(self, idx):
    image = Image.open(self.filenames[idx])
    if self.transform:
      image = self.transform(image)

    return image, self.targets[idx]
    

  



In [5]:
import torch.nn as nn
import torch.nn.functional as F #hay capas que no tienen parametros y se invocan como funciones
class Net(nn.Module):
  def __init__(self, num_channels):
    super(Net,self).__init__() #inizializacion
    # 3 capas convolucionales y 2 capas fully connected
    self.num_channels = num_channels
    
    self.conv1 = nn.Conv2d(3, self.num_channels, 3, stride = 1, padding = 1) #canales de entradqa, canales de salida, kernel size
    self.bn1 = nn.BatchNorm2d(self.num_channels)
    self.conv2 = nn.Conv2d(self.num_channels, self.num_channels*2, 3, stride = 1, padding = 1)
    self.bn2 = nn.BatchNorm2d(self.num_channels*2)
    self.conv3 = nn.Conv2d(self.num_channels*2, self.num_channels*4, 3, stride = 1, padding = 1)
    self.bn3 = nn.BatchNorm2d(self.num_channels*4)

    #en las capas sin parametros, maxpool: divide el tamaño de la imagen entre 2
    self.fc1 = nn.Linear(self.num_channels*4*8*8, self.num_channels*4) # entrada y salida            linear o fullyConnected
    self.fcbn1 = nn.BatchNorm1d(self.num_channels*4)
    self.fc2 = nn.Linear(self.num_channels*4, 6) # 6 es la salida final de la red
    
  def forward(self,x):
      #Empieza 3x64x64
      x = self.bn1(self.conv1(x)) # num_channels x 64 x 64
      x = F.relu(F.max_pool2d(x, 2)) # num_channels x 32 x 32
      x = self.bn2(self.conv2(x)) # num_channels*2 x 32 x32
      x = F.relu(F.max_pool2d(x, 2)) #num_channels*2 x 16 x 16
      x = self.bn3(self.conv3(x)) # num_channels*4 x16x16
      x = F.relu(F.max_pool2d(x, 2)) # num_channels*4 x 8 x 8

      #flatten
      x = x.view(-1, self.num_channels*4*8*8) #-1 flatten, size

      #fc
      x = F.relu(self.fcbn1( self.fc1(x)))
      x = F.dropout(x, p  = 0.8, training=True)
      x = self.fc2(x)

      #log_softmax

      x = F.log_softmax(x, dim=1)

      return x             

In [6]:
net = Net(32).to(device) #el parametro es el numero de canales

loss_fn = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum = 0.9)

In [7]:
#suma de valores y cantidad o tamaño de valores
class RunningMetric():
  def __init__(self):
      self.S = 0
      self.N = 0
  def update(self, val, size):
    self.S+=val
    self.N+= size

  def __call__(self):
    return self.S/float(self.N)

In [8]:
#num_epoch = 100 #pasadas dataset
#loop de entrenamiento

def train_and_evaluate(model, optimizer, loss_fn, dataloaders,device, num_epoch = 10, lr = 0.001):
  for g in optimizer.param_groups:
    g['lr'] = lr
  for epoch in range(num_epoch):
    print('Epoch {} / {} '.format(epoch+1, num_epoch))
    print('-'*10)

    for phase in ['train', 'val']:
      if phase == 'train':
        model.train()
      else:
        model.eval()
      running_loss = RunningMetric() #perdida
      running_acc = RunningMetric() #precision

      for inputs, targets in dataloaders[phase]:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad() #reset
        with torch.set_grad_enabled(phase == 'train'):
          outputs = net(inputs)
          _, preds = torch.max(outputs,1)
          loss = loss_fn(outputs, targets)
          if phase == 'train':
            loss.backward() # gradientes calculados automaticamente
            optimizer.step() #Actualiza parametros

        batch_size = inputs.size()[0]
        running_loss.update(loss.item()*batch_size, batch_size)

        running_acc.update(torch.sum(preds == targets).float(), batch_size)
      #print("Loss: {:.4f} Acc: {:.4f} ".format(running_loss(), running_acc()))
      print("Loss{}: {:.4f} Acc{}: {:.4f} lr : {}".format(phase,running_loss(),phase,running_acc(),lr))



  return model

In [9]:

from torchvision import models
vgg = models.vgg16(pretrained=True)
vgg = vgg.to(device)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [10]:
for param in vgg.parameters():
  param.requires_grad = False
last_sequential_layer = list(vgg.children())[-1]
*list_of_layers, last_layer = list(last_sequential_layer.children())
in_features = last_layer.in_features

In [11]:
vgg.fc = nn.Linear(in_features,6)
vgg.fc.requires_grad = True
vgg.classifier = nn.Sequential(*(list_of_layers+[vgg.fc]))

In [12]:
transform = transforms.Compose(
  [transforms.RandomHorizontalFlip(), #data augmentation
   transforms.ToTensor(),
   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])    
  ]
)

trainset = SIGNSDataset('/gdrive/My Drive/dl-pytorch/datasets/64x64_SIGNS', split="train", transform = transform)
trainloader = DataLoader(trainset,batch_size=32)

valset = SIGNSDataset('/gdrive/My Drive/dl-pytorch/datasets/64x64_SIGNS', split="val", transform = transform)
valloader = DataLoader(trainset,batch_size=32)

testset = SIGNSDataset('/gdrive/My Drive/dl-pytorch/datasets/64x64_SIGNS', split="test", transform = transform)
testloader = DataLoader(trainset,batch_size=32)

dataloaders = {'train':trainloader,
              'val':valloader,
              'test':testloader}

In [14]:
loss_fn = nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum = 0.9)

train_and_evaluate(vgg, optimizer, loss_fn, dataloaders, device, num_epoch = 100)

Epoch 1 / 100 
----------
Losstrain: 1.6939 Acctrain: 0.3079 lr : 0.001
Lossval: 1.3603 Accval: 0.4688 lr : 0.001
Epoch 2 / 100 
----------
Losstrain: 1.2489 Acctrain: 0.5359 lr : 0.001
Lossval: 1.1290 Accval: 0.5984 lr : 0.001
Epoch 3 / 100 
----------
Losstrain: 1.0339 Acctrain: 0.6447 lr : 0.001
Lossval: 0.9509 Accval: 0.6794 lr : 0.001
Epoch 4 / 100 
----------
Losstrain: 0.9305 Acctrain: 0.6667 lr : 0.001
Lossval: 0.8549 Accval: 0.7164 lr : 0.001
Epoch 5 / 100 
----------
Losstrain: 0.8246 Acctrain: 0.7292 lr : 0.001
Lossval: 0.7926 Accval: 0.7535 lr : 0.001
Epoch 6 / 100 
----------
Losstrain: 0.7892 Acctrain: 0.7431 lr : 0.001
Lossval: 0.7185 Accval: 0.7755 lr : 0.001
Epoch 7 / 100 
----------
Losstrain: 0.6886 Acctrain: 0.7743 lr : 0.001
Lossval: 0.6209 Accval: 0.8310 lr : 0.001
Epoch 8 / 100 
----------
Losstrain: 0.6350 Acctrain: 0.8009 lr : 0.001
Lossval: 0.5803 Accval: 0.8310 lr : 0.001
Epoch 9 / 100 
----------
Losstrain: 0.5931 Acctrain: 0.8322 lr : 0.001
Lossval: 0.5598 

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1