<a href="https://colab.research.google.com/github/JoseLuis96001/Sign_Image_Classification_Neural_Network/blob/main/ClassificationNNv3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Jose Llumiquinga Molina

In [None]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F #capas sin parametros
import torch.optim as optim


from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.utils import make_grid

import os
from PIL import Image
import sys
sys.path.append('/content/drive/MyDrive/10mo Sem/Computer Vision/Assignment 4/6464signs')

In [None]:
# The network is created by inheriting from the class nn.Module
class Net(nn.Module):
  # All layers that have parameters
  def __init__(self, num_channels):
    super(Net,self).__init__() # the parent class is initialized
    
    self.num_channels = num_channels
    
    # convolutional layers, extractors of characteristics
    self.conv1 = nn.Conv2d(3, self.num_channels, 3, stride = 1, padding = 1) #(original channels, n expansion channels, kernel size,stride,padding )
    self.bn1 = nn.BatchNorm2d(self.num_channels)
    self.conv2 = nn.Conv2d(self.num_channels, self.num_channels*2, 3, stride = 1, padding = 1)
    self.bn2 = nn.BatchNorm2d(self.num_channels*2)
    self.conv3 = nn.Conv2d(self.num_channels*2, self.num_channels*4, 3, stride = 1, padding = 1)
    self.bn3 = nn.BatchNorm2d(self.num_channels*4)
    
    # linear layers (fully con)
    self.fc1 = nn.Linear(self.num_channels*4*8*8, self.num_channels*4)
    self.fcbn1 = nn.BatchNorm1d(self.num_channels*4)
    self.fc2 = nn.Linear(self.num_channels*4, 6) #6 because I have 6 classes
    
  def forward(self,x):
      #Start 3x64x64
      x = self.bn1(self.conv1(x)) # num_channels x 64 x 64
      x = F.relu(F.max_pool2d(x, 2)) # num_channels x 32 x 32
      x = self.bn2(self.conv2(x)) # num_channels*2 x 32 x32
      x = F.relu(F.max_pool2d(x, 2)) #num_channels*2 x 16 x 16
      x = self.bn3(self.conv3(x)) # num_channels*4 x16x16
      x = F.relu(F.max_pool2d(x, 2)) # num_channels*4 x 8 x 8 

      #flatten
      x = x.view(-1, self.num_channels*4*8*8)

      #fc (take tensors of dimension 1)
      x = F.relu(self.fcbn1(self.fc1(x)))
      x = F.dropout(x, p = 0.8, training=True)
      x = self.fc2(x)

      #log_softmax (to have a multi-class classification)

      x = F.log_softmax(x, dim=1)

      return x    

In [None]:
# Implementing the dataset, inherits from pytorch
class SIGNSDataset(Dataset):
  def __init__(self, base_dir, split="train", transform=None):
    # sets the attributes of the dataset
    path = os.path.join(base_dir, "{}_signs".format(split))
    files = os.listdir(path)
    
    self.filenames = [os.path.join(path,f) for f in files if f.endswith(".jpg")] #routes of the images
    
    self.targets = [int(f[0]) for f in files] #the name of the files contains the labels
    self.transform = transform # transform to preprocess the images
    
  def __len__(self):
    return len(self.filenames)
  
  def __getitem__(self,idx):
    image = Image.open(self.filenames[idx])
    if self.transform:
      image = self.transform(image)
    return image, self.targets[idx]

In [None]:
transform = transforms.Compose(
  [transforms.RandomHorizontalFlip(), #data augmentation
   transforms.ToTensor(),
   transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))    #default values mean, std,
  ]
)

In [None]:
trainver = trainset = SIGNSDataset('/content/drive/MyDrive/10mo Sem/Computer Vision/Assignment 4/6464signs', split="train")
trainver[0][0]

In [None]:
trainset = SIGNSDataset('/content/drive/MyDrive/10mo Sem/Computer Vision/Assignment 4/6464signs', split="train", transform = transform)#transforms.ToTensor())

In [None]:
dataloader = DataLoader(trainset, batch_size=32) # batch data delivery

In [None]:
valset = SIGNSDataset('/content/drive/MyDrive/10mo Sem/Computer Vision/Assignment 4/6464signs', split="val", transform = transform)
valloader = DataLoader(valset,batch_size=32)

In [None]:
testset = SIGNSDataset('/content/drive/MyDrive/10mo Sem/Computer Vision/Assignment 4/6464signs', split="test", transform = transform)
testloader = DataLoader(testset,batch_size=32)

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # 

In [None]:
for inputs, targets in dataloader:
  out = make_grid(inputs)
  imshow(out)
  print(targets)
  break

In [None]:
device = torch.device('cuda') #to configure and execute CUDA operations

In [None]:
device

In [None]:
net = Net(32).to(device) # 32 no of channels
loss_fn = nn.NLLLoss() # The negative log likelihood loss. It is useful to train a classification problem with C classes.
optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum = 0.9) # it needs the network parameters to modify them

In [None]:
# To calculate the averages
class RunningMetric():
  def __init__(self):
    self.S = 0
    self.N = 0
    
  def update(self, val, size):
    self.S += val # sum
    self.N += size # amount of data 
    
  def __call__(self):
    return self.S/float(self.N)

In [None]:
num_epochs = 100

In [None]:
for epoch in range(num_epochs):
  print('Epoch {}/{}'.format(epoch+1, num_epochs))
  print('-'*10)
  
  running_loss = RunningMetric() # loss
  running_acc = RunningMetric() # accuracy
  
  for inputs, targets in dataloader:
    inputs, targets = inputs.to(device), targets.to(device) # to the gpu
    optimizer.zero_grad() # standard-set the gradients to zero for each batch
    
    outputs = net(inputs) # 32 probabilities
    _, preds = torch.max(outputs,1) # take the max prob
    loss = loss_fn(outputs, targets) # calculate the loss
    
    loss.backward() # automatically calculated gradients
    optimizer.step() # update parameters
    
    batch_size = inputs.size()[0]
    running_loss.update(loss.item()*batch_size,
                       batch_size)
    running_acc.update(torch.sum(preds == targets).float(),
                       batch_size)
  print("Loss: {:.4f} Acc: {:.4f} ".format(running_loss(),
                                          running_acc()))

In [None]:
#PATH = '/content/drive/MyDrive/10mo Sem/Computer Vision/Assignment 4/hands_net.pth'
#torch.save(net.state_dict(), PATH)

In [None]:
# Test
for inputs, targets in testloader:
  out = make_grid(inputs)
  imshow(out)
  print(targets)
  break

In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()

In [None]:
outputs = net(images.to(device))

In [None]:
#outputs

In [None]:
_, predicted = torch.max(outputs, 1)

In [None]:
predicted

In [None]:
# most of the predictions correspond to the true labels

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network test images: %d %%' % (
    100 * correct / total))

In [None]:
%cd /content/drive/MyDrive/10mo Sem/Computer Vision/Assignment 4

In [None]:
!sudo apt-get install texlive-xetex texlive-fonts-recommended texlive-generic-recommended

In [None]:
!jupyter nbconvert ClassificationNNv3.ipynb --to pdf

In [None]:
a = torch.randn(24, 6)
a

In [None]:
torch.max(a, 1)