In [1]:
import pandas as pd
from tqdm.notebook import tqdm, trange
import torch
from torch.utils.data import DataLoader
import numpy as np
import torchvision
import random
from collections import defaultdict
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
DATA_MODES = ['train', 'val', 'test']
RESCALE_SIZE = 160
EPOCHS = 21
BATCH_SIZE = 64
LEARNING_RATE = 0.001

SEED = 69
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.random.manual_seed(SEED)
torch.cuda.random.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
device = torch.device(device)

train_type_dir = '../data/train/type'
train_nrj_dir = '../data/train/nrj'
test_dir = '../data/test/'

cuda


In [3]:
train_transforms = transforms.Compose([transforms.CenterCrop(RESCALE_SIZE), 
                                       transforms.RandomHorizontalFlip(p=0.5),
                                       transforms.RandomVerticalFlip(p=0.5),
                                       transforms.RandomRotation(45),
                                       transforms.ToTensor(),
                                       transforms.Normalize(
                                           [0.485, 0.456, 0.406],
                                            [0.229, 0.224, 0.225])
                                       ])

testval_transforms = transforms.Compose([transforms.CenterCrop(RESCALE_SIZE),
                                       transforms.ToTensor(),
                                       transforms.Normalize(
                                           [0.485, 0.456, 0.406],
                                           [0.229, 0.224, 0.225])
                                       ])

In [4]:
def load_split_train_valid(datadir, batch_size, train_transforms, valid_transforms, valid_size):

    train_data = datasets.ImageFolder(datadir, transform=train_transforms)
    val_data = datasets.ImageFolder(datadir, transform=testval_transforms)
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    np.random.shuffle(indices)
    train_idx, val_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler = SubsetRandomSampler(val_idx)
    trainloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    valloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)   

    return trainloader, valloader

In [5]:
train_type_loader, val_type_loader = load_split_train_valid(datadir=train_type_dir,
                           batch_size=BATCH_SIZE,
                           train_transforms=train_transforms,
                           valid_transforms=testval_transforms,
                           valid_size = .2)

print(train_type_loader.dataset.classes)
print(val_type_loader.dataset.classes)
print(len(train_type_loader))
print(len(val_type_loader))
train_type_loader.dataset.class_to_idx

['ER', 'NR']
['ER', 'NR']
166
42


{'ER': 0, 'NR': 1}

In [6]:
train_nrj_loader, val_nrj_loader = load_split_train_valid(datadir=train_nrj_dir,
                           batch_size=BATCH_SIZE,
                           train_transforms=train_transforms,
                           valid_transforms=testval_transforms,
                           valid_size = .2)

print(train_nrj_loader.dataset.classes)
print(val_nrj_loader.dataset.classes)
print(len(train_nrj_loader))
print(len(val_nrj_loader))
train_nrj_loader.dataset.class_to_idx

['1', '10', '20', '3', '30', '6']
['1', '10', '20', '3', '30', '6']
166
42


{'1': 0, '10': 1, '20': 2, '3': 3, '30': 4, '6': 5}

In [7]:
test_dataset = torchvision.datasets.ImageFolder(test_dir, testval_transforms)
test_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, batch_size=BATCH_SIZE)
len(test_loader), len(test_dataset)

(259, 16560)

In [8]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [9]:
def train_model(model, loss, optimizer, scheduler, num_epochs, train_dataloader, val_dataloader):
    hystory_loss_train = []
    hystory_loss_val = []

    hystory_acc_train = []
    hystory_acc_val = []  
    for epoch in range(num_epochs):
        print('Epoch {}/{}:'.format(epoch, num_epochs - 1), flush=True)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                dataloader = train_dataloader                
                model.train()  # Set model to training mode
            else:
                dataloader = val_dataloader
                model.eval()   # Set model to evaluate mode

            running_loss = 0.
            running_acc = 0.

            # Iterate over data.
            for inputs, labels in tqdm(dataloader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                
                # forward and backward
                with torch.set_grad_enabled(phase == 'train'):
                    preds = model(inputs)
                    loss_value = loss(preds, labels)
                    preds_class = preds.argmax(dim=1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss_value.backward()
                        optimizer.step()

                # statistics
                running_loss += loss_value.item()
                running_acc += (preds_class == labels.data).float().mean()

            epoch_loss = running_loss / len(dataloader)
            epoch_acc = running_acc / len(dataloader)

            if phase == 'train':
                hystory_loss_train.append(epoch_loss)
                hystory_acc_train.append(epoch_acc)
                scheduler.step()
            else:
                hystory_loss_val.append(epoch_loss)
                hystory_acc_val.append(epoch_acc)


            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc), flush=True)

    return hystory_loss_train, hystory_loss_val, hystory_acc_train, hystory_acc_val

In [10]:
class SCNc6(nn.Module):
    def __init__(self):
        super(SCNc6, self).__init__()
        self.c1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=3, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(3),
            nn.ReLU(), 
        )

        self.c2 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=3, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(3),
            nn.ReLU(),  
        )

        self.c3 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3),
            nn.BatchNorm2d(16),
            nn.MaxPool2d(kernel_size=3),
            nn.ReLU(),
           
        )

        self.c4 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=3),
            nn.ReLU(),
            
        )

        self.fc1 = nn.Sequential(
            nn.Linear(in_features=8192, out_features=1024),
            nn.ReLU()
        )

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=1024, out_features=32),
            nn.ReLU()
        )

        self.fc3 = nn.Linear(in_features=32, out_features=6)
      
    def forward(self, x):
        # First block
        residual_1 = x
        out = self.c1(x)
        out += residual_1
        #print(out.shape)
  
        # Second block
        residual_2 = out
        out = self.c2(x)
        out += residual_2
        
        out = self.c3(out)
        out = self.c4(out)
  
        out = out.view(out.size(0), -1)
        #print(out.shape)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [11]:
%%time
dict_pred = defaultdict(list)
file_names = []
for line in test_dataset.imgs:
    file_names.append(str(line).split('/')[-1].split('.')[0])

dict_pred['id'] = file_names

for i in trange(8):
    set_seed(SEED+i)
    
    net = SCNc6()
    net = net.to(device)
    
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adagrad(net.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    hystory_loss_train, hystory_loss_val, hystory_acc_train, hystory_acc_val =\
    train_model(net, loss, optimizer, scheduler, EPOCHS, train_nrj_loader, val_nrj_loader);
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
    ax1.plot(hystory_loss_train, label='train')
    ax1.plot(hystory_loss_val, label='val')
    ax1.legend()

    ax2.plot(hystory_acc_train, label='train')
    ax2.plot(hystory_acc_val, label='val')
    ax2.legend()
    
    net.eval()
    dict_pred[f'SCNc6-{i}'] = []
    with torch.no_grad():
        for _, (img, name) in enumerate(tqdm(test_loader)):
            img = img.to(device)
            outputs = net(img)
            _, predicted = torch.max(outputs.data, dim=1)
            predicted = list(map(int, predicted.cpu()))
            dict_pred[f'SCNc6-{i}'].extend(predicted)

    del net
    torch.cuda.empty_cache()

HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))

Epoch 0/20:


HBox(children=(FloatProgress(value=0.0, max=166.0), HTML(value='')))





KeyboardInterrupt: 

In [12]:
data_frame = pd.DataFrame.from_dict(dict_pred)
data_frame.head()

Unnamed: 0,id
0,private_test\\00013a94f1fae3e5a33f8b44c2842c85...
1,private_test\\0005564c011259402dcb6e3beb6e64d2...
2,private_test\\0007520c8d8efade8feaafbf74fc7d20...
3,private_test\\000804a189f630cd980ff1543c0ef99d...
4,private_test\\000eb12def5371089743dcfe401a8e39...


In [13]:
data_frame['SCNc6-0'] = data_frame['SCNc6-0'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame['SCNc6-1'] = data_frame['SCNc6-1'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame['SCNc6-2'] = data_frame['SCNc6-2'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame['SCNc6-3'] = data_frame['SCNc6-3'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame['SCNc6-4'] = data_frame['SCNc6-4'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame['SCNc6-5'] = data_frame['SCNc6-5'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame['SCNc6-6'] = data_frame['SCNc6-6'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame['SCNc6-7'] = data_frame['SCNc6-7'].replace({
    0: 1.0, 
    1: 10.0,
    2: 20.0,
    3: 3.0,
    4: 30.0, 
    5: 6.0})
data_frame.head()

KeyError: 'SCNc6-0'

In [None]:
data_frame['id'] = data_frame['id'].apply(lambda x: x.split('\\\\')[1])
data_frame.head()

In [None]:
data_frame.to_csv('SCNc6.csv', index=False, header=True)