In [11]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv
/kaggle/input/digit-recognizer/sample_submission.csv


In [12]:
from sklearn.model_selection import train_test_split

# Import PyTorch Modules

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import TensorDataset, DataLoader

In [13]:
#Import the dataset

train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
sample_sub = pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')

In [14]:
batch_size = 128

# split train data into training and validation set with sklearn
X_train, X_val, y_train, y_val = train_test_split(train.values[:,1:], train.values[:,0], test_size=0.2)

# Load and convert the dataset into tensor
train_dataset = TensorDataset(torch.from_numpy(X_train.astype(np.float32)/255), torch.from_numpy(y_train))
val_dataset = TensorDataset(torch.from_numpy(X_val.astype(np.float32)/255), torch.from_numpy(y_val))
test_dataset = TensorDataset(torch.from_numpy(test.values[:,:].astype(np.float32)/255))


#DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [15]:
#Define the Neural_Network
class Neural_Net(nn.Module):
    def __init__(self):
        super(Neural_Net, self).__init__()
        
        self.input = nn.Linear(28*28, 512)
        self.hidden=  nn.Linear(512, 256)
        self.output = nn.Linear(256,10)
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.dropout(self.input(x)))
        x = F.relu(self.dropout(self.hidden(x)))
        x = self.output(x)
        return x
    
model = Neural_Net()
print(model)
        

Neural_Net(
  (input): Linear(in_features=784, out_features=512, bias=True)
  (hidden): Linear(in_features=512, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


In [22]:
# Define Criterion and Optimizer

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [23]:
# Train the Model

epochs = 30
for epoch in range(epochs):
    train_loss = 0.0
    for data, target in train_loader:
        # Zero the gradient
        optimizer.zero_grad()
        # Forward Propagation
        output = model(data)
        # Calculate the Loss
        loss = criterion(output, target)
        # Back Propagation
        loss.backward()
        # Update weights using the optimizer
        optimizer.step()
        # Calculate the Cummulated Loss
        train_loss += loss.item()*data.size(0)
        
    train_loss = train_loss/len(train_loader.dataset)
    print(f"Epoch: {epoch}, Train Loss: {train_loss}")

Epoch: 0, Train Loss: 0.02908407331793569
Epoch: 1, Train Loss: 0.026168279388941645
Epoch: 2, Train Loss: 0.02383734275491969
Epoch: 3, Train Loss: 0.022443662846205935
Epoch: 4, Train Loss: 0.020913395857482794
Epoch: 5, Train Loss: 0.019961652491349794
Epoch: 6, Train Loss: 0.018959988898852663
Epoch: 7, Train Loss: 0.01798465642442655
Epoch: 8, Train Loss: 0.01706515481289465
Epoch: 9, Train Loss: 0.01628724265534185
Epoch: 10, Train Loss: 0.015670407589388473
Epoch: 11, Train Loss: 0.01507632825764068
Epoch: 12, Train Loss: 0.014495246197212899
Epoch: 13, Train Loss: 0.013965190483671277
Epoch: 14, Train Loss: 0.013520454526470055
Epoch: 15, Train Loss: 0.01313481977006545
Epoch: 16, Train Loss: 0.012749494020118575
Epoch: 17, Train Loss: 0.012383833343978516
Epoch: 18, Train Loss: 0.011962811265108203
Epoch: 19, Train Loss: 0.01165948455879559
Epoch: 20, Train Loss: 0.011348466620196458
Epoch: 21, Train Loss: 0.011035453853980925
Epoch: 22, Train Loss: 0.010746113926278278
Epoch:

In [24]:
# Model Validation

val_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

# Switch to evaluation Mode
model.eval()
# Loop  through the Validation Set
for data, target in val_loader:
    #Forward Pass
    output = model(data)
    # Calculate Loss
    loss = criterion(output, target)
    # Update the Validation Loss
    val_loss += loss.item()*data.size(0)
    # Convert Output probabilities to predicted Class
    _, pred = torch.max(output, 1)
    # Compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # Calculate Validation Accuracy for each object class
    for i in range(len(target)):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1
        
# Calculate and Print Average Validation Loss
val_loss = val_loss/len(val_loader.sampler)
print('Validation Loss: {:.6f}\n'.format(val_loss))

for i in range(10):
    if class_total[i] > 0:
        print("Validation Accuracy of %5s: %2d%% (%2d/%2d)" %(str(i), 100 * class_correct[i]/class_total[i], np.sum(class_correct[i]), np.sum(class_total[i])))
        
    else:
        print("Validation Accuracy of %5s: N/A (no training examples)" % (classes[i]))
    
print("\nValidation Accuracy (Overall): %2d%% (%2d/%2d)" %(100 * np.sum(class_correct)/np.sum(class_total), np.sum(class_correct), np.sum(class_total)))

Validation Loss: 0.248454

Validation Accuracy of     0: 98% (809/821)
Validation Accuracy of     1: 98% (904/915)
Validation Accuracy of     2: 96% (782/811)
Validation Accuracy of     3: 96% (835/861)
Validation Accuracy of     4: 97% (793/814)
Validation Accuracy of     5: 96% (773/799)
Validation Accuracy of     6: 98% (820/836)
Validation Accuracy of     7: 96% (864/891)
Validation Accuracy of     8: 95% (792/832)
Validation Accuracy of     9: 96% (790/820)

Validation Accuracy (Overall): 97% (8162/8400)


In [9]:
# Predict for the Test Dataset

# Switch to Evaluation Mode
model.eval()
predictions = []

for data in test_loader:
    # Forward Pass
    output = model(data[0])
    # Calculate the Loss
    _, pred = torch.max(output, 1)
    predictions.extend(pred.tolist())

In [10]:
sample_sub['Label'] = predictions
sample_sub.to_csv('Sub_file2.csv', index=False)