In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('input/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

input/sample-submission.csv
input/test.csv
input/train.csv


In [2]:
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torchmetrics

In [3]:
DATA_DIR = pathlib.Path("input/")


train_df = pd.read_csv(DATA_DIR / "train.csv", dtype="uint8")
test_df = pd.read_csv(DATA_DIR / "test.csv", dtype="uint8")


train_features = train_df.drop("labels", axis=1)
train_target = train_df.loc[:, "labels"]

In [4]:
class DataSetWithTransforms(Dataset):
    
    def __init__(self, features, target, feature_transforms=None):
        super().__init__()
        self._features = features
        self._target = torch.from_numpy(target).long()
        self._feature_transforms = feature_transforms
        
    def __getitem__(self, index):
        if self._feature_transforms is None:
            features = self._features[index]
            #feature = torch.rashape(feature, (32,32,3))
        else: 
            features = self._feature_transforms(self._features[index])
        target = self._target[index]
        return (features, target) 
    
    def __len__(self):
        n_samples, _ = self._features.shape
        return n_samples

In [5]:
class DataSetTest(Dataset):
    
    def __init__(self, features, feature_transforms=None):
        super().__init__()
        self._features = features
        self._feature_transforms = feature_transforms
        
    def __getitem__(self, index):
        if self._feature_transforms is None:
            features = self._features[index]
            #feature = torch.rashape(feature, (32,32,3))
        else: 
            features = self._feature_transforms(self._features[index])
        return (features) 
    
    def __len__(self):
        n_samples, _ = self._features.shape
        return n_samples

In [6]:
# data augmentation should only apply to training data
_feature_transforms = transforms.Compose([
    transforms.Lambda(lambda array: array.reshape((32, 32))),
    transforms.ToPILImage(),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), shear=15, scale=(1.0, 1.1)),
    transforms.ToTensor(),
])

In [7]:
# conveting test data only to tensor images should only apply to training data
_feature_transforms_test = transforms.Compose([
    transforms.Lambda(lambda array: array.reshape((32, 32))),
    transforms.ToPILImage(),
    transforms.ToTensor(),
])

In [91]:
# MNIST
def mnist(batch_sz, valid_size=0.2, shuffle=True, random_seed=2000):
    

    train_df = pd.read_csv(DATA_DIR / "train.csv", dtype="uint8")
    test_df = pd.read_csv(DATA_DIR / "test.csv", dtype="uint8")

    train_features = train_df.drop("labels", axis=1)
    train_target = train_df.loc[:, "labels"]

    # Training dataset
    train_data = DataSetWithTransforms(train_features.values, train_target.values, _feature_transforms)
    valid_data = DataSetWithTransforms(train_features.values, train_target.values, _feature_transforms_test)
    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    if shuffle == True:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    train_idx, valid_idx = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    train_loader = DataLoader(train_data, batch_size=batch_sz, sampler=train_sampler, pin_memory=True)
    valid_loader = DataLoader(valid_data, batch_size=batch_sz, sampler=valid_sampler, pin_memory=True)

    # Test dataset
    test_data = DataSetTest(test_df.values, _feature_transforms_test)
    test_loader = DataLoader(test_data, batch_size=batch_sz, shuffle=False, pin_memory=True)

    return train_loader, valid_loader, test_loader

In [92]:
batch_sz=64 # this is batch size i.e. the number of rows in a batch of data
train_loader, valid_loader, test_loader=mnist(batch_sz) 
for batch in test_loader:
    print(batch.shape)
    break

torch.Size([64, 1, 32, 32])


In [114]:
64*(len(train_loader)+len(valid_loader))

13440

In [262]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,20,5,1)
        self.norm1 = nn.BatchNorm2d(20)
        self.conv2 = nn.Conv2d(20,50,5,1)
        self.norm2 = nn.BatchNorm2d(50)
        self.l1 = nn.Linear(5*5*50,100)
        self.l2 = nn.Linear(100,200)
        self.l3 = nn.Linear(200, 500)
        self.l4 = nn.Linear(500, 200)
        self.l5 = nn.Linear(200, 29)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # (32,32)
        x = F.leaky_relu(self.norm1(self.conv1(x)))
        # (28,28)
        x = F.max_pool2d(x, (2,2))
        # (14,14)
        x = F.leaky_relu(self.norm2(self.conv2(x)))
        # (10,10)
        x = F.max_pool2d(x, (2,2))
        # (5,5)
        x = x.view(-1,5*5*50)
        x = self.dropout(x)
        x = F.leaky_relu(self.l1(x))
        x = self.dropout(x)
        x = F.leaky_relu(self.l2(x))
        x = self.dropout(x)
        x = F.leaky_relu(self.l3(x))
        x = self.dropout(x)
        x = F.leaky_relu(self.l4(x))
        x = self.dropout(x)
        x = self.l5(x)
        return x

In [445]:
device = torch.device('cuda:0')
net = ConvNet()
net = net.to(device)

ls=[]

In [446]:
from torchvision import models


net = models.resnet18(num_classes=29)

net.conv1 = nn.Conv2d(1, 64, kernel_size=(5, 5), stride=(2, 2), padding=(3, 3), bias=False)
net = net.to(device)

In [451]:
net.conv1 = nn.Conv2d(1, 64, kernel_size=(5, 5), padding=(3, 3), bias=False)
#net.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
net = net.to(device)
lr = 0.1
momentum = 0.2
optimizer = optim.Adam(net.parameters(), lr=lr)
#optimizer = optim.Adam(net.parameters(), lr=lr, momentum=momentum)
lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99, verbose=True)


Adjusting learning rate of group 0 to 1.0000e-01.


In [324]:
highest = 0

In [328]:
print(highest)

0.9825148809523809


In [None]:
for i in range(3000):
  loss_total=0
  loss_val=0
  acc_train=0
  total_train=0
  net.train()
  for ii,batch in enumerate(train_loader):
    data=batch[0]
    label=batch[1]
    #optimizer-->buffer += grad
    optimizer.zero_grad()
    data, label = data.to(device), label.to(device)
    logits = net(data)
    #print(type(logits))
    #this is the output of the network and it's shape is batch_size X no of classes
    loss = F.cross_entropy(logits, label)
    loss_total+=loss.item()

    loss.backward()
    optimizer.step()
    
    out=torch.argmax(logits, dim=1)
    acc_train+=torch.sum(out==label)
    total_train+=logits.shape[0]

  acc_val=0
  total_val=0 
  net.eval()
  with torch.no_grad():
    for jj,batch in enumerate(valid_loader):
          data=batch[0]
          label=batch[1]
          #optimizer-->buffer += grad
          data, label = data.to(device), label.to(device)
          logits = net(data)
          loss = F.cross_entropy(logits, label)
          loss_val+=loss.item()
          out=torch.argmax(logits, dim=1)
          acc_val+=torch.sum(out==label)
          total_val+=logits.shape[0]



  ls.append(loss_total)
  lr_scheduler.step()
  print(f"Iterataion {i}: Training Loss: {loss_total/ii}, Validation Loss: {loss_val/jj}")
  print(f"Iteataion {i}: Training Accuracy: {acc_train.item()/total_train}, Validation Accuracy  {acc_val.item()/total_val}")
  if ((acc_val.item()/total_val) > highest):
    highest = acc_val.item()/total_val
    break
    
plt.plot(ls)

In [354]:
acc_val=0
total_val=0 
with torch.no_grad():
      for jj,batch in enumerate(valid_loader):
            data=batch[0]
            label=batch[1]
            #optimizer-->buffer += grad
            data, label = data.to(device), label.to(device)
            logits = net(data)
            loss = F.cross_entropy(logits, label)
            loss_val+=loss.item()
            out=torch.argmax(logits, dim=1)
            acc_val+=torch.sum(out==label)
            total_val+=logits.shape[0]

print(f"Testing Accuracy: {acc_val.item()/total_val}")

Testing Accuracy: 0.9840029761904762


In [355]:
prediction_list = list()
for images in test_loader:
    prediction_list.append(net(images.cuda()).argmax(1))
    
prediction_list = torch.cat(prediction_list)
test_features = pd.read_csv(DATA_DIR / "test.csv")

_ = (pd.DataFrame
       .from_dict({"Id": test_features.index, "Category": prediction_list.cpu()})
       .to_csv("submission.csv", index=False))