In this notebook we load, modify and train the AlexNet model

In [1]:
import os
from torch_snippets import *
from torchvision import transforms, models
from sklearn.model_selection import train_test_split, StratifiedKFold
from pathlib import PurePath
import imgaug.augmenters as iaa
import numpy as np
import pandas as pd
from scipy import interpolate
from scipy.stats import shapiro
import matplotlib
from tqdm import tnrange,notebook
import cv2 as cv2
from glob import glob
import math
import torch 
from torch.utils.data import Sampler
from torch.autograd import Variable
from torchsummary import summary
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(torch.cuda.device_count())
print(torch.cuda.get_device_capability())
print(torch.cuda.get_device_properties(0))

Exception: No module named 'sklego'


In [2]:
tfms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # imagenet
])

Our Dataset overload is now a bit more sophisticated as it now implements k-fold stratified cross validation. Use sklearn for this

In [3]:
splits = None
df_train = None
class pathchData(Dataset):
    def __init__(self, split, train_data, n_splits=5, aug=None):
        global splits
        global df_train
        self.train_data  = train_data
        self.split       = split
        self.aug         = aug
        if splits == None:
            splitter = StratifiedKFold(n_splits, shuffle=True, random_state=0)
            df_train = pd.read_csv('spotgarbage-GINI-master\spotgarbage\\train.csv', delimiter=',')
            splits = []
            for train_idx, test_idx in splitter.split(df_train['name'], df_train['class']):
                splits.append((train_idx, test_idx))
            torch.save(splits, 'spotgarbage-GINI-master\spotgarbage\\splits.pt')

    def __len__(self):
        return len(splits[self.split][0]) if self.train_data else len(splits[self.split][1])
    def __getitem__(self, ix):
        i = 0 if self.train_data else 1
        #fullName = df_train['name'][splits[self.split][0][ix]] if self.train_data else df_train['name'][splits[self.split][1][ix]]
        #garbageClass = df_train['class'][splits[self.split][0][ix]] if self.train_data else df_train['class'][splits[self.split][1][ix]]

        fullName = df_train['name'][splits[self.split][i][ix]]
        garbageClass = df_train['class'][splits[self.split][i][ix]]

        image = read(f'spotgarbage-GINI-master/spotgarbage/patches/{fullName}',1)
        image = cv2.resize(image, (227, 227))
        return image, 1 if garbageClass == 'garbage' else 0
    def choose(self): return self[randint(len(self))]
    def collate_fn(self, batch):
        ims, garbageClasses = list(zip(*batch))
        ims = torch.cat([tfms(im.copy()/255.)[None] for im in ims]).float().to(device)
        garbageClasses = torch.tensor(garbageClasses).to(device)
        return ims, garbageClasses

Now that we have created the datasets lets validate the content. First how much data in each training and validation set

In [None]:
for i in range(N_folds):
    print(len(trn_ds[i]))
    print(len(val_ds[i]))

Second, do the indexed data look different in each fold

In [None]:
for i in range(N_folds):
    #print(trn_ds[i][9])
    #print(val_ds[i][9])

    #print(trn_ds[i][108000 - 1])
    #print(val_ds[i][22000 - 1])

Third, check out the number of garbage and non garbage classes

In [None]:
Ngarb = 0
Nnongarb = 0
for i in range(N_folds):
    for j in range(208000):
        if trn_ds[i][j][1] == 1:
             Ngarb += 1

        if trn_ds[i][j][1] == 0:
             Nnongarb += 1

    for j in range(52000):
        if val_ds[i][j][1] == 1:
             Ngarb += 1

        if val_ds[i][j][1] == 0:
             Nnongarb += 1

print(Ngarb)
print(Nnongarb)

In [None]:
plt.imshow(trn_ds[0][600][0])
plt.show()

In [4]:
alexnet = torch.hub.load('pytorch/vision:v0.9.0', 'alexnet', pretrained=True).to(device)

for param in alexnet.parameters():
    param.requires_grad = False

alexnet.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 6 * 6, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 2),
        ).to(device)

summary(alexnet, (3,227,227));

Using cache found in C:\Users\janop/.cache\torch\hub\pytorch_vision_v0.9.0
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 56, 56]          23,296
              ReLU-2           [-1, 64, 56, 56]               0
         MaxPool2d-3           [-1, 64, 27, 27]               0
            Conv2d-4          [-1, 192, 27, 27]         307,392
              ReLU-5          [-1, 192, 27, 27]               0
         MaxPool2d-6          [-1, 192, 13, 13]               0
            Conv2d-7          [-1, 384, 13, 13]         663,936
              ReLU-8          [-1, 384, 13, 13]               0
            Conv2d-9          [-1, 256, 13, 13]         884,992
             ReLU-10          [-1, 256, 13, 13]               0
           Conv2d-11          [-1, 256, 13, 13]         590,080
             ReLU-12          [-1, 256, 13, 13]               0
        MaxPool2d-13        

And now for the training loop

In [9]:
def train_batch(model, data, optimizer, criterion):
    model.train()
    im, garbClass = data
    out  = model(im)
    optimizer.zero_grad()
    loss = criterion(out, garbClass)
    acc = (torch.max(out,1)[1] == garbClass).float().mean()
    loss.backward()
    optimizer.step()
    return loss.item(), acc.item()

In [10]:
@torch.no_grad()
def validate_batch(model, data, criterion):
    model.eval()
    im, garbClass = data
    out = model(im)
    loss = criterion(out, garbClass)
    acc = (torch.max(out,1)[1] == garbClass).float().mean()
    return loss.item(), acc.item()

In [11]:
optimizer = optim.SGD(alexnet.parameters(), lr=1e-3, momentum=0.9, weight_decay=5*1e-5)
#criterion =  nn.BCELoss()# binary cross entropy loss
criterion = nn.CrossEntropyLoss()
n_epochs = 12 # 12
N_folds = 5 # 5
trn_ds = [pathchData(x, True) for x in range(N_folds)]
val_ds = [pathchData(x, False) for x in range(N_folds)]
trn_dl = [DataLoader(x, batch_size=100, shuffle=True, collate_fn=x.collate_fn) for x in trn_ds]
val_dl = [DataLoader(x, batch_size=100, shuffle=True, collate_fn=x.collate_fn) for x in val_ds]
log = Report(n_epochs * N_folds)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.25)

We have 260.000 patches and with a batch size of 100 this gives us 2.600 batch iterations for 1 epoch. Each epoch takes 1.700 secs to complete, so if we run 60 of them it would take 1.700 minutes = 28 hours to complete. We subdivide the 60 epochs into 5 folds of 12 epochs each.

In [12]:
for fold in range(N_folds):
    for ex in range(n_epochs):
        N = len(trn_dl[fold])
        train_epoch_losses = []
        for bx, data in enumerate(trn_dl[fold]):
            loss, acc = train_batch(alexnet, data, optimizer, criterion)
            train_epoch_losses.append(loss) 
            log.record(ex + (bx + 1)/N, trn_loss=loss, trn_acc=acc, end='\r')
            
        N = len(val_dl[fold])
        val_epoch_losses = []
        for bx, data in enumerate(val_dl[fold]):
            loss, acc = validate_batch(alexnet, data, criterion)
            val_epoch_losses.append(loss) 
            log.record(ex + (bx + 1)/N, val_loss=loss, val_acc=acc, end='\r')
  
        torch.save(alexnet, 'spotgarbage-GINI-master\spotgarbage\\alexnet_' + str(fold) + '_' + str(ex) + '.pth')
        torch.save(train_epoch_losses, 'spotgarbage-GINI-master\spotgarbage\\train_epoch_losses_' + str(fold) + '_' + str(ex) + '.pt')
        torch.save(val_epoch_losses, 'spotgarbage-GINI-master\spotgarbage\\val_epoch_losses_' + str(fold) + '_' + str(ex) + '.pt')

        scheduler.step()
        log.report_avgs(ex+1)
   
print('Training complete')

EPOCH: 1.000	trn_loss: 0.311	trn_acc: 0.867	val_loss: 0.215	val_acc: 0.911	(1656.53s - 97735.21s remaining)
EPOCH: 2.000	trn_loss: 0.218	trn_acc: 0.910	val_loss: 0.144	val_acc: 0.946	(3278.65s - 95080.75s remaining)
EPOCH: 3.000	trn_loss: 0.173	trn_acc: 0.932	val_loss: 0.121	val_acc: 0.957	(4863.11s - 92399.05s remaining)
EPOCH: 4.000	trn_loss: 0.157	trn_acc: 0.939	val_loss: 0.105	val_acc: 0.963	(6324.43s - 88541.98s remaining)
EPOCH: 5.000	trn_loss: 0.148	trn_acc: 0.943	val_loss: 0.105	val_acc: 0.962	(8881.34s - 97694.76s remaining)
EPOCH: 5.634	trn_loss: 0.205	trn_acc: 0.910	(10280.44s - 99199.82s remaining)

KeyboardInterrupt: 

In [13]:
torch.max(5,1)

TypeError: max() received an invalid combination of arguments - got (int, int), but expected one of:
 * (Tensor input)
 * (Tensor input, name dim, bool keepdim, *, tuple of Tensors out)
 * (Tensor input, Tensor other, *, Tensor out)
 * (Tensor input, int dim, bool keepdim, *, tuple of Tensors out)


In [15]:
im, garbClass = next(iter(val_dl[0]))

In [17]:
out  = alexnet(im)

In [18]:
out

tensor([[ -3.3597,   3.4526],
        [ -3.2485,   2.8520],
        [  0.3248,  -0.2680],
        [  2.7095,  -2.4543],
        [ -2.8512,   2.6342],
        [  4.7454,  -4.6222],
        [ -5.2491,   5.0491],
        [ -2.3398,   2.1222],
        [  3.3411,  -3.2198],
        [ -4.8146,   5.0951],
        [ -1.9445,   2.0668],
        [ -0.2203,   0.3922],
        [ -1.6055,   1.4649],
        [  1.0948,  -1.0070],
        [  6.0334,  -5.6466],
        [ -2.1474,   2.1200],
        [  2.5957,  -2.3841],
        [  4.3421,  -4.3331],
        [ -1.8319,   1.7649],
        [  1.6184,  -1.7473],
        [ -1.7212,   1.5752],
        [ -1.4695,   1.3732],
        [ -0.7901,   0.5961],
        [ -1.3765,   1.1080],
        [ -5.0110,   4.6685],
        [ -1.8524,   1.8293],
        [ -1.1089,   1.0997],
        [ -0.4512,   0.4505],
        [ -4.0563,   4.0085],
        [ -1.9422,   1.5386],
        [  1.0727,  -1.1284],
        [ -0.3423,   0.3862],
        [ 12.2856, -12.3254],
        [ 

In [19]:
acc = (torch.max(out,1)[1] == garbClass).float().mean()

In [20]:
acc

tensor(0.9600, device='cuda:0')

In [21]:
torch.max(out,1)[1]

tensor([1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1,
        1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
        1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0,
        0, 1, 1, 1], device='cuda:0')

In [22]:
torch.max(out,1)

torch.return_types.max(
values=tensor([ 3.4526,  2.8520,  0.3248,  2.7095,  2.6342,  4.7454,  5.0491,  2.1222,
         3.3411,  5.0951,  2.0668,  0.3922,  1.4649,  1.0948,  6.0334,  2.1200,
         2.5957,  4.3421,  1.7649,  1.6184,  1.5752,  1.3732,  0.5961,  1.1080,
         4.6685,  1.8293,  1.0997,  0.4505,  4.0085,  1.5386,  1.0727,  0.3862,
        12.2856,  1.3280,  1.3943,  2.4063,  0.7023,  4.4786,  1.9658,  5.3673,
         0.3716,  2.9919,  2.2898,  2.2599,  0.2966,  2.8255,  1.0197,  1.9151,
         2.8816,  2.6603,  2.5599,  0.2379,  4.7103,  3.9452,  3.9534,  2.0555,
         3.7394,  1.2321,  1.4673,  2.2293,  6.5259,  1.8104,  2.2221,  0.5999,
         0.4273,  1.9722, 12.6674,  5.3509,  0.5017,  3.6658,  2.5241,  2.3603,
         1.2751,  4.1808,  0.7436,  0.7468,  1.0394,  2.2641,  3.5449,  6.4017,
         1.3355,  2.9460,  0.4760,  1.1803,  4.5968,  1.4386,  3.5434,  0.9716,
         2.1668,  0.2228,  1.8259,  6.3194,  6.3600,  3.3500,  0.1376,  2.0991,
         