In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models

from skimage import io, transform

import matplotlib.pyplot as plt # for plotting
import numpy as np
import pandas as pd
import glob
import os
from tqdm import tqdm
import PIL
from sklearn.model_selection import KFold
import torchvision.models as models

from IPython.display import Image

from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

device = ("cuda" if torch.cuda.is_available() else "cpu")
trainfile = "../input/col341-a3/training.csv"#sys.argv[1]
testfile = "../input/col341-a3/test.csv"#sys.argv[2]
modelfile = "model.pth"#sys.argv[3]
lossfile = "loss.txt"#sys.argv[4]
accuracyfile = "accuracy.txt"#sys.argv[5]
img_train_folder="../input/col341-a3/"
img_test_folder="../input/col341-a3/"
print(device)

cuda


In [2]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, images_folder, transform = None, train=True):
        self.df = pd.read_csv(csv_path)
        self.is_train = train
        self.images_folder = images_folder
        self.transform = transform
        self.class2index = {
        "Virabhadrasana":0,
        "Vrikshasana":1,
        "Utkatasana":2,
        "Padahastasana":3,
        "Katichakrasana":4,
        "TriyakTadasana":5,
        "Gorakshasana":6,
        "Tadasana":7,
        "Natarajasana":8,                 
        "Pranamasana":9,
        "ParivrittaTrikonasana":10,
        "Tuladandasana":11,
        "Santolanasana":12,
        "Still":13,
        "Natavarasana":14,
        "Garudasana":15,
        "Naukasana":16,
        "Ardhachakrasana":17,
        "Trikonasana":18,

        }

    def __len__(self):
        return len(self.df)
    def __getitem__(self, index):
        filename = self.df["name"].iloc[index]
        if self.is_train:
            label = self.class2index[self.df["category"].iloc[index]]
        else:
            label = -1
        image = PIL.Image.open(os.path.join(self.images_folder, filename))
        if self.transform is not None:
            image = self.transform(image)
        sample = {"images": image, "labels": label}
        return sample

In [3]:
# Data Loader Usage

BATCH_SIZE = 200
NUM_WORKERS = 20
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))


img_transforms = transforms.Compose([transforms.RandomHorizontalFlip(),
                                     transforms.Resize(size=(64,64)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(*stats,inplace=True)])

img_test_transforms = transforms.Compose([transforms.Resize(size=(64,64)),
                                          transforms.ToTensor(),
                                          transforms.Normalize(*stats)])


train_data = trainfile 
train_dataset = CustomDataset(csv_path = train_data, images_folder = img_train_folder, transform=img_transforms, train=True)

test_data = testfile 
test_dataset = CustomDataset(csv_path = test_data, images_folder = img_test_folder, transform=img_test_transforms, train=False)

In [4]:
#architecture 1 

class Net(Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = Sequential(
            # Defining a 2D convolution layer
            Conv2d(3, 32, kernel_size=3, stride=1),
            BatchNorm2d(32),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            # Defining another 2D convolution layer
            Conv2d(32, 64, kernel_size=3, stride=1),
            BatchNorm2d(64),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            
            Conv2d(64, 512, kernel_size=3, stride=1),
            BatchNorm2d(512),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            
            Conv2d(512, 1024, kernel_size=2, stride=1),
            #BatchNorm2d(32),
            ReLU(inplace=True),
            #MaxPool2d(kernel_size=2, stride=2),
        )

        self.linear_layers = Sequential(
            Linear(1024 * 1 * 1 , 256),
            ReLU(inplace=True),
            Dropout(p = 0.2),
            Linear(256 * 1 * 1 , 19),
        )


In [5]:
#architecture 2

class Net_drop_1(Module):   
    def __init__(self):
        super(Net_drop_1, self).__init__()

        self.cnn_layers = Sequential(
            
            Conv2d(3, 32, kernel_size=3, stride=1,padding=1),
            BatchNorm2d(32),
            ReLU(inplace=True),
            Dropout(p = 0.2),
            
            Conv2d(32, 64, kernel_size=3, stride=1,padding=1),
            BatchNorm2d(64),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            Dropout(p = 0.2),
            
            Conv2d(64, 128, kernel_size=3, stride=1,padding=1),
            BatchNorm2d(128),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            Dropout(p = 0.2),
            
            Conv2d(128, 128, kernel_size=3, stride=1,padding=1),
            BatchNorm2d(128),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            Dropout(p = 0.2),
            
            Conv2d(128, 256, kernel_size=3, stride=1,padding=1),
            BatchNorm2d(256),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            Dropout(p = 0.2),
            
            Conv2d(256, 512, kernel_size=3, stride=1,padding=1),
            ReLU(inplace=True),
            Dropout(p = 0.2),
        )

        self.linear_layers = Sequential(
            Linear(512*4*4 , 512),
            ReLU(inplace=True),
            Dropout(p = 0.2),
            Linear(512, 64),
            ReLU(inplace=True),
            Dropout(p = 0.2),
            Linear(64 , 19),
        )

    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

In [6]:
#pretrained arch

class Pre_Net(Module):
    def __init__(self, pretrained=True):
        super(Pre_Net,self).__init__()
        
        self.m = models.googlenet(pretrained=True)
        self.m.fc = nn.Linear(self.m.fc.in_features, 19)

    def forward(self, xb):
        return self.m(xb)

In [7]:
def train(epoch, x, y, criterion, optimizer, model):
    model.train()
    
    x_train, y_train = Variable(x), Variable(y)
    
    if torch.cuda.is_available():
        x_train = x_train.cuda()
        y_train = y_train.cuda()
        
    optimizer.zero_grad()
    
    output_train = model(x_train)
    
    loss_train = criterion(output_train, y_train)
    
    loss_train.backward()
    optimizer.step()
    tr_loss = loss_train.item()
    
    return tr_loss

In [8]:
def predict(epoch, x, y, criterion, optimizer, model):
    
    model.eval()
    x_train, y_train = Variable(x), Variable(y)
    
    if torch.cuda.is_available():
        x_train = x_train.cuda()
        y_train = y_train.cuda()

    output_train = model(x_train)
    output_train = torch.argmax(output_train, dim = 1)
    
    return (torch.sum(output_train==y_train)).item()/y_train.shape[0]

In [9]:
def predict_n(x, model):
    
    model.eval()
    x_train= Variable(x)
    
    if torch.cuda.is_available():
        x_train = x_train.cuda()
        
    output_train = model(x_train)
    output_train = torch.argmax(output_train, dim = 1)
    return output_train

In [10]:
def reset_weights(m):
    
    for layer in m.children():
        if hasattr(layer, 'reset_parameters'):
            print(f'Reset trainable parameters of layer = {layer}')
            layer.reset_parameters()

In [11]:
train_loader = torch.utils.data.DataLoader(
                  train_dataset, 
                  batch_size=BATCH_SIZE, num_workers = NUM_WORKERS, shuffle=False)

test_loader = torch.utils.data.DataLoader(
              test_dataset,
              batch_size=BATCH_SIZE,  num_workers = NUM_WORKERS, shuffle = False)

torch.manual_seed(51)
cnnmodel = Net_drop_1()
#cnnmodel.apply(reset_weights)
torch.cuda.empty_cache()
print(sum(p.numel() for p in cnnmodel.parameters()))

optimizer = SGD(cnnmodel.parameters(), lr=0.1, momentum=0.9,nesterov=True)
criterion = CrossEntropyLoss()
scheduler = optim.lr_scheduler.OneCycleLR(optimizer,  max_lr = 0.1, epochs = 20, steps_per_epoch = len(train_loader))

if torch.cuda.is_available():
    cnnmodel = cnnmodel.cuda()
    criterion = criterion.cuda()

  cpuset_checked))


5946259


In [12]:
epochs = 20

for epoch in range(epochs):
        
    loss_avg = 0
    count = 0
    for batch_idx, sample in enumerate(train_loader):
        images = sample['images']
        labels = sample['labels']
        
        if torch.cuda.is_available():
            images = images.cuda()
            labels = labels.cuda()
            
        loss = train(epoch, images, labels, criterion, optimizer, cnnmodel)
        loss_avg += loss
        count+=1
        scheduler.step()
        
    loss_avg = loss_avg/count
    print("Training loss -> Epoch" + str(epoch), loss_avg)

    torch.save(cnnmodel.state_dict(), modelfile)
    

Training loss -> Epoch0 2.8025729541909206
Training loss -> Epoch1 2.1325618167446083
Training loss -> Epoch2 1.6446180139502433
Training loss -> Epoch3 1.2959100934740615
Training loss -> Epoch4 1.0329184785281142
Training loss -> Epoch5 2.2757302473669183
Training loss -> Epoch6 1.2564059528585982
Training loss -> Epoch7 0.9581721388313869
Training loss -> Epoch8 0.8372493770024548
Training loss -> Epoch9 0.7277268623448399
Training loss -> Epoch10 0.6372433658944417
Training loss -> Epoch11 0.5919598249131686
Training loss -> Epoch12 0.5426453788803048
Training loss -> Epoch13 0.5023631251429859
Training loss -> Epoch14 0.4760413686808658
Training loss -> Epoch15 0.42449328670762987
Training loss -> Epoch16 0.3963466780132627
Training loss -> Epoch17 0.3653552824350661
Training loss -> Epoch18 0.35703242906171523
Training loss -> Epoch19 0.3347140775597973


In [13]:
test_loader = torch.utils.data.DataLoader(
              test_dataset,
              batch_size=BATCH_SIZE,  num_workers = NUM_WORKERS, shuffle = False)

predictions = torch.Tensor([])

if torch.cuda.is_available():
    predictions = predictions.cuda()
    
for batch_idx, sample in enumerate(test_loader):
    images = sample['images']
    
    temp = predict_n(images, cnnmodel)
    predictions = torch.cat((predictions,temp),0)

predictions=predictions.cpu().detach().numpy()

In [14]:
classif = {
        "Virabhadrasana":0,
        "Vrikshasana":1,
        "Utkatasana":2,
        "Padahastasana":3,
        "Katichakrasana":4,
        "TriyakTadasana":5,
        "Gorakshasana":6,
        "Tadasana":7,
        "Natarajasana":8,                 
        "Pranamasana":9,
        "ParivrittaTrikonasana":10,
        "Tuladandasana":11,
        "Santolanasana":12,
        "Still":13,
        "Natavarasana":14,
        "Garudasana":15,
        "Naukasana":16,
        "Ardhachakrasana":17,
        "Trikonasana":18,

        }
    
inv_map = {v: k for k, v in classif.items()}

pred = [inv_map[letter] for letter in predictions]
df1 = pd.read_csv(testfile)
df1["category"] = pred
df1.drop(df1.tail(1).index,inplace=True)
df1.to_csv(path_or_buf="submission.csv", columns=["name", "category"],index=False)