In [38]:
!pip install kaggle



In [39]:
from google.colab import files

uploaded = files.upload()

!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [40]:
!kaggle competitions list

ref                                            deadline             category            reward  teamCount  userHasEntered  
---------------------------------------------  -------------------  ---------------  ---------  ---------  --------------  
digit-recognizer                               2030-01-01 00:00:00  Getting Started  Knowledge       2582            True  
titanic                                        2030-01-01 00:00:00  Getting Started  Knowledge      10477            True  
house-prices-advanced-regression-techniques    2030-01-01 00:00:00  Getting Started  Knowledge       4129            True  
imagenet-object-localization-challenge         2029-12-31 07:00:00  Research         Knowledge         36           False  
competitive-data-science-predict-future-sales  2019-12-31 23:59:00  Playground           Kudos       2612           False  
two-sigma-financial-news                       2019-07-15 23:59:00  Featured          $100,000       2927           False  
aerial-c

In [41]:
!kaggle competitions download -c digit-recognizer

train.csv: Skipping, found more recently modified local copy (use --force to force download)
test.csv: Skipping, found more recently modified local copy (use --force to force download)
sample_submission.csv: Skipping, found more recently modified local copy (use --force to force download)


In [42]:
!ls

drive  sample_data  sample_submission.csv  test.csv  train.csv


In [43]:
from google.colab import drive
drive.mount('/content/drive')

drive_path = 'My Drive/Colab Notebooks/MNIST_competition'
drive_full_path = '/content/drive/' + drive_path

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
drive_full_path

'/content/drive/My Drive/Colab Notebooks/MNIST_competition'

In [0]:
# Imports here
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data.sampler import SubsetRandomSampler
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset


In [198]:
train_on_gpu = torch.cuda.is_available()
train_on_gpu

True

In [0]:
from PIL import Image

class CSVDataset(Dataset):
    
    def __init__(self, data, height, width, channels,transform=None):
        self.data = data
        self.transform = transform
        self.height = height
        self.width = width
        self.channels = channels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, we use ToTensor(), so we define the numpy array like (H, W, C)
        image_numpy = self.data.iloc[index, 1:].values.astype(np.uint8).reshape(
            (self.height, self.width))
        image = Image.fromarray(image_numpy.astype('uint8'))

        label = int(self.data.iloc[index, 0])
        
        if self.transform is not None:
            image = self.transform(image)   #transform.toTensor already /255
            
        return image, label

In [0]:
data_transforms = {'train':transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor()
    ]) ,
    'val':transforms.Compose([
        transforms.ToTensor(),
    ])
                  }


In [201]:
train_ratio=0.8

df = pd.read_csv('train.csv',dtype = np.float32)

middle = int(df.shape[0] * train_ratio)
print(middle)

train = df.iloc[:middle]
val = df.iloc[middle:]

33600


In [0]:
train_dataset = CSVDataset(train, 28, 28, 1, transform=data_transforms['train'])
val_dataset = CSVDataset(val, 28, 28, 1, transform=data_transforms['val'])

In [0]:
batch_size = 128

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = False)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size = batch_size, shuffle = False)

In [0]:
class MNISTNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Sequential(
      torch.nn.Conv2d(1, 32, 5, padding=2),
      nn.ReLU(),
      torch.nn.Conv2d(32, 32, 5, padding=2),
      nn.ReLU(),
      torch.nn.MaxPool2d(kernel_size=2)
    ) #32,14,14
    self.layer2 = nn.Sequential(
      torch.nn.Conv2d(32, 64, 3, padding=1),
      nn.ReLU(),
      torch.nn.Conv2d(64, 64, 3, padding=1),
      nn.ReLU(),
      torch.nn.MaxPool2d(kernel_size=2, stride=2)
    ) #64, 7, 7
    self.fc1 = torch.nn.Linear(7*7*64, 256)
    self.fc2 = torch.nn.Linear(256, 10)
   
  def forward(self, x):
    x = self.layer1(x)
    x = F.dropout(x, p=0.25)
    x = self.layer2(x)
    x = F.dropout(x, p=0.25)
    x = x.reshape(x.size(0), -1)
    x = F.relu(self.fc1(x))
    x = F.dropout(x, p=0.5)
    out = F.softmax(self.fc2(x))
    return out
    

In [0]:
model = MNISTNetwork()

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

load = False

In [0]:
def save_model(model, epochs=0, val_loss=-1):
  checkpoint = {'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 
              'epochs': epochs, 'val_loss': val_loss}
  torch.save(checkpoint, drive_full_path + '/checkpoint.pt')
  print("Model Saved")

In [0]:
def load_model(checkpoint):    
    model = MNISTNetwork()
    model.load_state_dict(checkpoint['model_state_dict'])
    val_loss = checkpoint['val_loss']
    
    return model, val_loss

checkpoint = torch.load(drive_full_path + '/checkpoint.pt')
model, val_loss = load_model(checkpoint)
load = True


In [0]:
# number of epochs to train the model
n_epochs = 10

early_stop = 0

valid_loss_min = val_loss if load else np.Inf # track change in validation loss


for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    accuracy=0.0
    
    ###################
    # train the model #
    ###################
    model.train()
    for batch_number, (data, target) in enumerate(train_loader):
        # move tensors to GPU if CUDA is available
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()*data.size(0)
        
        if batch_number%50 == 0:
            print("batch number: {}".format(batch_number))
        
    # validate the model #
    ######################
    model.eval()
    for data, target in val_loader:
        # move tensors to GPU if CUDA is available
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
        _,pred=torch.max(output,1)
        accuracy += torch.sum(pred==target.data)
    
    # calculate average losses
    train_loss = train_loss/len(train_loader.dataset)
    valid_loss = valid_loss/len(val_loader.dataset)
    accuracy = accuracy.double()/len(val_loader.dataset)
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}\tAccuracy: {:.6f}'.format(
        epoch, train_loss, valid_loss,accuracy))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min, valid_loss))
        save_model(model, epoch, valid_loss)
        valid_loss_min = valid_loss
        early_stop = 0
    else:
        early_stop+=1
    
    if early_stop >= 10:
        print("\n\nEarly stop")
        break

In [0]:
from PIL import Image

class CSVDatasetTest(Dataset):
    
    def __init__(self, data, height, width):
        self.data = data
        self.height = height
        self.width = width
        self.transform = transforms.ToTensor()
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # load image as ndarray type (Height * Width * Channels)
        # be carefull for converting dtype to np.uint8 [Unsigned integer (0 to 255)]
        # in this example, we use ToTensor(), so we define the numpy array like (H, W, C)
        image_numpy = self.data.iloc[index].values.astype(np.uint8).reshape(
            (self.height, self.width))
        
        image = self.transform(image_numpy)   #transform.toTensor already /255
            
        return image

In [0]:
df = pd.read_csv('test.csv',dtype = np.float32)

test_dataset = CSVDatasetTest(df, 28, 28)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 1, shuffle = False)

In [0]:
predictions = pd.DataFrame(columns = ['ImageId', 'Label'])

In [0]:
model.eval()
image_id = 0
with torch.no_grad():
  for data in test_loader:
    image_id+=1
    output = model(data)
    _,pred=torch.max(output,1)
    
    #row = {'ImageId' : image_id, 'Label' : pred}
    serie = pd.Series([image_id, int(pred)],index=predictions.columns)
    predictions = predictions.append(serie, ignore_index=True)

predictions.to_csv('submission.csv', index=False)
files.download('submission.csv')


In [0]:
predictions.to_csv('submission.csv', index=False)
files.download('submission.csv')