In [1]:
import os
import sys
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import pandas as pd
from torch.optim.lr_scheduler import StepLR
from Dataset import CustomDataset
import torchvision.transforms as transforms
from pathlib import Path
from metrics import F1Score

batch_size = 48
f1Metric = F1Score()



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
csvPath = Path("processedLabels.csv")
imgPath = Path("train-jpg")

data = pd.read_csv(csvPath)
data = data.sample(frac = 1)
data = data.reset_index(drop=True)
data.head()

datasetLen = len(data)
validationSetLen =  int(datasetLen*0.1)
validationSet = data[:validationSetLen]

trainSet = data[validationSetLen:]

print(len(trainSet))
print(len(validationSet))
trainSet = trainSet.reset_index(drop=True)


transformations = transforms.Compose([transforms.ToTensor()])
composed = transforms.Compose([
    transforms.RandomCrop(256, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
dataTrain = CustomDataset(trainSet, imgPath, transform=composed)
dataTest = CustomDataset(validationSet, imgPath, transform=composed)


25513
2834


In [3]:
## Helper Functions
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')


def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)


class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""

    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [4]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

def F1Compute(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return f1Metric(labels.float(), preds.float())

    

class BaseModule(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)
        loss = F.cross_entropy(out.float(), labels) 
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)   
        loss = F.cross_entropy(out, labels)   
        acc = accuracy(out, labels)
        f1score = F1Compute(out,labels)
        return {'val_loss': loss.detach(), 'val_acc': acc, "f1score": f1score}
    
    def train_val_step(self, batch):
        images, labels = batch 
        out = self(images)   
        labels=labels.to(torch.int64)

        loss = F.cross_entropy(out, labels)   
        acc = accuracy(out, labels)        
        return {'train_loss': loss.detach(), 'train_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_f1s = [x['f1score'] for x in outputs]
        epoch_f1 = torch.stack(batch_f1s).mean() 
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item(), "f1": epoch_f1.item()}
       
    def epoch_end(self, epoch, result):
        print("Epoch {}, Validation Loss: {:.4f}, Validation Accuracy: {:.4f}, F1: {:.4f}"
              .format(epoch + 1, result['val_loss'], result['val_acc'], result["f1"]))
        

@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []

    if opt_func == torch.optim.SGD:
        optimizer = opt_func(model.parameters(), lr, momentum=0.9)
    else:
        optimizer = opt_func(model.parameters(), lr)
        
    scheduler = StepLR(optimizer, step_size=10, gamma=0.2)
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        scheduler.step()

        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history


def plotResults(history,name):
    losses = [entry['val_loss'] for entry in history]
    accuracy = [entry["val_acc"] for entry in history]
    train_loss = [entry["train_loss"] for entry in history]
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5))
    fig.suptitle('Model Results')

    ax1.plot(losses, '-o', label="Validation Loss")
    ax1.plot(train_loss, "-s", label="Training Loss")
    ax1.legend()
    ax1.set_ylim([0,5])
    ax1.set(xlabel = 'Epoch', ylabel="Loss")

    
    ax2.set(xlabel = 'Epoch', ylabel="Values")
    ax2.plot(accuracy, "-r")

    # plt.legend()
    ax1.set_title('Loss vs. Number of Epochs');
    ax2.set_title("Accuracy on Validation Set");
    plt.savefig("{}-results.png".format(name))
    plt.show()

    
histories = {}

In [5]:
device = get_default_device()


train = DataLoader(dataTrain,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0, # 1 for CUDA
                          pin_memory=True, # CUDA only
                          drop_last=True,
                  )

test = DataLoader(dataTest,
                          batch_size=batch_size,
                          num_workers=0, # 1 for CUDA
                          pin_memory=True, # CUDA only
                          drop_last=True
                         )

train_loader = DeviceDataLoader(train, device)
val_loader = DeviceDataLoader(test, device)

print(device)

cuda


In [6]:
class Net(BaseModule):
    def __init__(self):
        super(Net, self).__init__()

        self.net = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8
    
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4
    
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4
            nn.Flatten(), 
            nn.Linear(16384 , 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 2),
          
        )
    def forward(self, x):
        return self.net(x)

name = "ConvNet1"
model = to_device(Net(), device)
# model = model.cuda()
history = [evaluate(model, val_loader)]
num_epochs = 20
opt_func = torch.optim.Adam
lr = 6e-4
history += fit(num_epochs, lr, model, train_loader, val_loader)
histories[name] = history

Epoch 1, Validation Loss: 0.3695, Validation Accuracy: 0.8312, F1: 0.8694
Epoch 2, Validation Loss: 0.3043, Validation Accuracy: 0.8736, F1: 0.8952
Epoch 3, Validation Loss: 0.2925, Validation Accuracy: 0.8778, F1: 0.8993
Epoch 4, Validation Loss: 0.2811, Validation Accuracy: 0.8884, F1: 0.9097
Epoch 5, Validation Loss: 0.2647, Validation Accuracy: 0.8927, F1: 0.9123
Epoch 6, Validation Loss: 0.2918, Validation Accuracy: 0.8799, F1: 0.9052
Epoch 7, Validation Loss: 0.2632, Validation Accuracy: 0.8912, F1: 0.9108
Epoch 8, Validation Loss: 0.2714, Validation Accuracy: 0.8874, F1: 0.9062
Epoch 9, Validation Loss: 0.2492, Validation Accuracy: 0.8997, F1: 0.9175
Epoch 10, Validation Loss: 0.2460, Validation Accuracy: 0.9008, F1: 0.9189
Epoch 11, Validation Loss: 0.2270, Validation Accuracy: 0.9068, F1: 0.9237
Epoch 12, Validation Loss: 0.2306, Validation Accuracy: 0.9061, F1: 0.9234
Epoch 13, Validation Loss: 0.2294, Validation Accuracy: 0.9103, F1: 0.9278
Epoch 14, Validation Loss: 0.2342,

In [9]:
class Net2(BaseModule):
    def __init__(self):
        super(Net2, self).__init__()

        self.net = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(64),
            nn.ReLU(),


            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
    
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.AvgPool2d(2, 2), 
    
            nn.Flatten(), 
            nn.Linear(65536 , 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256,128),
            nn.ReLU(),
            nn.Linear(128, 2),
          
        )
    def forward(self, x):
        return self.net(x)

name = "ConvNet2"
model = to_device(Net2(), device)
# model = model.cuda()
history = [evaluate(model, val_loader)]
num_epochs = 15
opt_func = torch.optim.Adam
lr = 4e-4
history += fit(num_epochs, lr, model, train_loader, val_loader)
histories[name] = history

Epoch 1, Validation Loss: 0.3513, Validation Accuracy: 0.8535, F1: 0.8850
Epoch 2, Validation Loss: 0.2982, Validation Accuracy: 0.8757, F1: 0.8977
Epoch 3, Validation Loss: 0.2952, Validation Accuracy: 0.8796, F1: 0.9031
Epoch 4, Validation Loss: 0.2860, Validation Accuracy: 0.8799, F1: 0.9022
Epoch 5, Validation Loss: 0.2715, Validation Accuracy: 0.8870, F1: 0.9078
Epoch 6, Validation Loss: 0.2944, Validation Accuracy: 0.8845, F1: 0.9079
Epoch 7, Validation Loss: 0.2815, Validation Accuracy: 0.8842, F1: 0.9075
Epoch 8, Validation Loss: 0.2690, Validation Accuracy: 0.8877, F1: 0.9058
Epoch 9, Validation Loss: 0.2991, Validation Accuracy: 0.8796, F1: 0.9005
Epoch 10, Validation Loss: 0.2741, Validation Accuracy: 0.8877, F1: 0.9083
Epoch 11, Validation Loss: 0.2521, Validation Accuracy: 0.9025, F1: 0.9212
Epoch 12, Validation Loss: 0.2487, Validation Accuracy: 0.9011, F1: 0.9189
Epoch 13, Validation Loss: 0.2463, Validation Accuracy: 0.9004, F1: 0.9183
Epoch 14, Validation Loss: 0.2478,

In [None]:
class Net3(BaseModule):
    def __init__(self):
        super(Net3, self).__init__()

        self.net = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(64),
            nn.ReLU(),


            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), 
    
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.AvgPool2d(2, 2), 
    
            nn.Flatten(), 
            nn.Linear(65536 , 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256,128),
            nn.ReLU(),
            nn.Linear(128, 2),
          
        )
    def forward(self, x):
        return self.net(x)
    
name = "ConvNet3"
model = to_device(Net3(), device)
# model = model.cuda()
history = [evaluate(model, val_loader)]
num_epochs = 15
opt_func = torch.optim.Adam
lr = 4e-4
history += fit(num_epochs, lr, model, train_loader, val_loader)
histories[name] = history

In [10]:
p = model.parameters()
print(p)

<generator object Module.parameters at 0x000001E422E87120>


In [15]:
model = Net2()

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(count_parameters(model))

16904834


In [16]:
import json


with open('convNets.json', 'w') as outfile:
    json.dump(histories, outfile)