In [112]:
# link colab and drive
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [113]:
# CV project
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import pandas as pd
import os
import torchvision
from torchvision import datasets, transforms, models
from IPython import display
import shelve
from PIL import Image
import glob
import matplotlib
from matplotlib.pyplot import *

# if available use GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device: {}'.format(device))

trainPath = '/content/drive/My Drive/Units/CV_project/Images/train'
testPath =  '/content/drive/My Drive/Units/CV_project/Images/test'
dataTrain = torchvision.datasets.ImageFolder(trainPath)
dataTest = torchvision.datasets.ImageFolder(testPath)
print(dataTrain.classes)
print(dataTrain.imgs[99][0])
print(len(dataTrain.imgs[99][0]))
im = Image.open(dataTrain.imgs[1][0])
%matplotlib inline
im
ii=torch.from_numpy(np.asarray(im))
ii

Device: cpu
['Bedroom', 'Coast', 'Forest', 'Highway', 'Industrial', 'InsideCity', 'Kitchen', 'LivingRoom', 'Mountain', 'Office', 'OpenCountry', 'Store', 'Street', 'Suburb', 'TallBuilding']
/content/drive/My Drive/Units/CV_project/Images/train/Bedroom/image_0216.jpg
76


tensor([[208, 183, 181,  ..., 111, 111, 110],
        [207, 183, 181,  ..., 111, 111, 110],
        [208, 184, 181,  ..., 112, 112, 110],
        ...,
        [101, 101,  99,  ...,  28,  28,  29],
        [ 96,  93,  93,  ...,  24,  25,  26],
        [ 98,  96,  98,  ...,  25,  25,  24]], dtype=torch.uint8)

In [174]:
from torch.utils.data import Dataset, Sampler, DataLoader, SubsetRandomSampler

class customDataset(Dataset):
  '''
    Our dataset is a list of lists: [path_to_image, class]
  '''

  def __init__(self, root_path, transform):
    '''
      This function takes in input the path of the folder containing class folders,
      the tranformation to be applied to each image and creates the dataset
    '''
    self.data = torchvision.datasets.ImageFolder(root_path)
    #self.datasource = torchvision.datasets.ImageFolder(root_path)
    #self.data = []
    #for i in range(len(self.datasource)):
    #  true_class = self.datasource[i][1]
    #  im = Image.open(self.datasource.imgs[i][0])
    #  img_tensor = transform(im)
    #  self.data.append([img_tensor,true_class])
    
  def __getitem__(self, key):
    '''
      This function access the image, transforms it and returns: the image as 
      a tensor and the class (tensor of 15 with 1 in correct index)

      Or is it better that the dataset contains already read images like now?
    '''
    true_class = self.data[key][1]
    
    im = Image.open(self.data.imgs[key][0])
    img_tensor = transform(im)
    return img_tensor, true_class
    #return self.data[key]

  def __len__(self):
    return len(self.data)

  def get_keys(self):
    '''
      I need this function in the sampler. But keys are just numbers 0:(len-1)
      probably I don't need this function
    '''
    return range(len(self.data))

# just to tensor, i can resize, force to double, normalize, crop, ....
def transform(img):
  t = transforms.Resize([64,64],interpolation=Image.BILINEAR)
  i = t(img)
  i = torch.from_numpy(np.asarray(i))
  i.unsqueeze_(0)
  #i = i.type(torch.cuda.FloatTensor)
  return i

trainingData = customDataset(trainPath,transform)
x = trainingData[1][0]
print(x.shape, x.type())
print(x)

l = trainingData.get_keys()
l[657]

torch.Size([1, 64, 64]) torch.ByteTensor
tensor([[[186, 179, 177,  ..., 116, 114, 111],
         [187, 179, 177,  ..., 118, 116, 113],
         [189, 180, 178,  ..., 119, 117, 115],
         ...,
         [101,  92,  85,  ...,  30,  28,  27],
         [100,  93,  84,  ...,  28,  28,  28],
         [ 96,  87,  81,  ...,  29,  27,  26]]], dtype=torch.uint8)


657

In [202]:
def split(dataset, val_size):
  '''
    @dataset: a customDataset object
    @val_size: percentage of the dataset that should compose the validation set

    This function allows us to split our dataset into 
    a validation set and a training set. This is used internally in 
    Loader
  '''

  # We want to split our dataset given itself and the % of sample for validation
  # or do not shuffle and sample index from uniform (so almost same proportion train-val for each class since ordered)
  num = len(dataset)
  index = list(dataset.get_keys())
  np.random.shuffle(index) # pick at random
  #indexes = np.random.random_integers(0,num-1,int(0.85*num))
  flag_split = int(val_size * num)

  train_index = index[flag_split:]
  validation_index = index[:flag_split]

  # https://pytorch.org/docs/stable/data.html -> Samples elements randomly from a given list of indices, without replacement
  train_sampler = SubsetRandomSampler(train_index)
  validation_sampler = SubsetRandomSampler(validation_index)

  return train_sampler, validation_sampler


def loaders(dataset, val_size, batch_size, num_workers):
  ''' 
    @dataset: a customDataset object
    @val_size: the percentage (in [0,1]) of the validation set data
    @batch_size: the number of data in each batch
    @num_workers: number of subprocesses to use in the data loader
  '''

  train_sampler, validation_sampler = split(dataset, val_size)
  train_loader = DataLoader(dataset,
                            batch_size = batch_size,
                            sampler = train_sampler,
                            num_workers = num_workers)
  val_loader = DataLoader(dataset,
                          batch_size = batch_size,
                          sampler = validation_sampler,
                          num_workers = num_workers)
  return train_loader, val_loader

In [245]:
import torch.nn.functional as F

# now I can try to build a cnn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.input_dim = 1 * 64 * 64
        self.n_classes = 15
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, stride=1)
        #self.conv1.weight = torch.nn.init.normal_(self.conv1.weight, mean=0.0, std=0.01)
        
        self.maxpooling = nn.MaxPool2d(kernel_size=2,stride=2)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1)
        #self.conv2.weight = torch.nn.init.normal_(self.conv2.weight, mean=0.0, std=0.01)
        
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
        #self.conv3.weight = torch.nn.init.normal_(self.conv3.weight, mean=0.0, std=0.01)
        
        self.fc1 = nn.Linear(12*12*32,15) # 12*12*32 no padding default
       # self.fc1.weight = torch.nn.init.normal_(self.fc1.weight, mean=0.0, std=0.01)
        
        
        
        
    def forward(self, x, verbose=False):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.maxpooling(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.maxpooling(x)
        x = self.conv3(x)
        x = F.relu(x)
        #print(x.shape) # here is m x 32 x 12 x 12
        # but it's transformed in (mx32m12) x 12 instead I want m x (32x12x12)
        x = x.view(x.size(0), -1)
        #print(x.shape)
        x = self.fc1(x)
        #x = F.softmax(x, dim=1) # tensor is mx15 I want softmax according to dim 1
        return x

        
net = CNN()
for param in net.parameters():
    param.requires_grad = True
net.to(device) # so I put the model on GPU
print(net)

#print(net.conv1.weight)

CNN(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (maxpooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=4608, out_features=15, bias=True)
)


In [None]:
# train and test sets
# just to tensor, i can resize, force to double, normalize, crop, ....
def transform(img):
  t = transforms.Resize([64,64],interpolation=Image.BILINEAR)
  i = t(img)
  i = torch.from_numpy(np.asarray(i)/255)
  i.unsqueeze_(0)
  return i

trainingSet = customDataset(trainPath, transform)
testSet = customDataset(testPath, transform)


# train and validation dataloaders
batch_size = 32
num_workers = 1
trainLoader, valLoader = loaders(trainingSet, 0.15, batch_size, num_workers)

lr = 0.01
momentum = 0.9
epochs = 20

n_batches = len(trainLoader)
print(n_batches)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)

net.train() 
for e in range(epochs):
    correct_classified = 0
    for i, data in enumerate(trainLoader):
        
        batch = data[0].to(device)
        batch = batch.float()
        labels = data[1].to(device)    


        optimizer.zero_grad() 
        outputs = net(batch)
        loss = criterion(outputs, labels) # here I'm not computing the right quantity!
        # outputs is mx15 but labels just 15, I need each label to be a vector 
        predicted_class = torch.argmax(outputs, dim=1)
        correct_classified = correct_classified + sum((predicted_class==labels).int())

        loss.backward()
        optimizer.step()


        if i % 5 == 0:
            print("[EPOCH]: {}, [BATCH]: {}/{}, [TRAINING LOSS]: {}".format(e, i, n_batches, loss.item()))
            #display.clear_output(wait=True)
    # for now print accuracy at each epoch
    print('[TRAINING ACCURACY AT EPOCH {}]: {}'.format(e,correct_classified/(n_batches*batch_size)))




40


  app.launch_new_instance()


[EPOCH]: 0, [BATCH]: 0/40, [TRAINING LOSS]: 2.461299419403076
[EPOCH]: 0, [BATCH]: 5/40, [TRAINING LOSS]: 2.064401626586914
[EPOCH]: 0, [BATCH]: 10/40, [TRAINING LOSS]: 1.6929833889007568
[EPOCH]: 0, [BATCH]: 15/40, [TRAINING LOSS]: 1.370700478553772
[EPOCH]: 0, [BATCH]: 20/40, [TRAINING LOSS]: 1.6902341842651367
[EPOCH]: 0, [BATCH]: 25/40, [TRAINING LOSS]: 1.3244773149490356
[EPOCH]: 0, [BATCH]: 30/40, [TRAINING LOSS]: 1.239205241203308
[EPOCH]: 0, [BATCH]: 35/40, [TRAINING LOSS]: 1.4019213914871216
[TRAINING ACCURACY AT EPOCH 0]: 0.598437488079071
[EPOCH]: 1, [BATCH]: 0/40, [TRAINING LOSS]: 0.5575956106185913
[EPOCH]: 1, [BATCH]: 5/40, [TRAINING LOSS]: 0.8753030300140381
[EPOCH]: 1, [BATCH]: 10/40, [TRAINING LOSS]: 0.8142128586769104
[EPOCH]: 1, [BATCH]: 15/40, [TRAINING LOSS]: 0.6829840540885925
[EPOCH]: 1, [BATCH]: 20/40, [TRAINING LOSS]: 0.8562234044075012
[EPOCH]: 1, [BATCH]: 25/40, [TRAINING LOSS]: 1.0574771165847778
[EPOCH]: 1, [BATCH]: 30/40, [TRAINING LOSS]: 0.519023835659027

In [237]:
print(40*32)
1500*0.85

1280


1275.0

In [226]:
net.conv1.weight

Parameter containing:
tensor([[[[ 0.0126, -0.0148,  0.0014],
          [-0.0205, -0.0042, -0.0003],
          [ 0.0153,  0.0148, -0.0039]]],


        [[[ 0.0120,  0.0092,  0.0069],
          [ 0.0054,  0.0034,  0.0168],
          [ 0.0155, -0.0031, -0.0109]]],


        [[[-0.0053,  0.0023,  0.0136],
          [-0.0067, -0.0041, -0.0028],
          [-0.0050,  0.0128,  0.0011]]],


        [[[-0.0166, -0.0088, -0.0125],
          [-0.0199, -0.0196,  0.0011],
          [ 0.0099, -0.0140, -0.0018]]],


        [[[ 0.0018,  0.0061,  0.0061],
          [-0.0103, -0.0104,  0.0138],
          [-0.0128, -0.0068, -0.0085]]],


        [[[-0.0019,  0.0147,  0.0006],
          [-0.0026,  0.0089,  0.0234],
          [ 0.0119,  0.0327, -0.0014]]],


        [[[ 0.0051,  0.0067,  0.0107],
          [-0.0003,  0.0154, -0.0010],
          [ 0.0076,  0.0110,  0.0215]]],


        [[[-0.0127,  0.0022, -0.0094],
          [ 0.0002, -0.0086, -0.0109],
          [-0.0033,  0.0071,  0.0067]]]], requires_gr