In [193]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [194]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torch.optim as optim
import torch.nn.functional as F
from torch import tensor
import pandas as pd
from sklearn.model_selection import train_test_split

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [195]:
# data processing block (panda columns -> specific numpy arrays -> pytorch tensors -> datasets/dataloaders)

train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv', dtype=np.float32)

# organize panda columns into numpy arrays 
labels = train.label.values

# normalize, normalize, normalize!
features = train.loc[:, train.columns != 'label'].values / 225

# train/val split
features_train, features_val, labels_train, labels_val = train_test_split(features, labels, test_size=0.2, random_state=42)

# change numpy arrays into tensors
featuresTrain = tensor(features_train)
featuresVal = tensor(features_val)
labelsTrain = tensor(labels_train, dtype=torch.long)
labelsVal = tensor(labels_val, dtype=torch.long)

# set batch_size, epoch, and num_iterations
batch_size = 100
n_iters = 10000
num_epochs = int(n_iters / (len(features_train) / batch_size))

# load tensor data into Datasets 
train = TensorDataset(featuresTrain, labelsTrain)
val = TensorDataset(featuresVal, labelsVal)

# create Dataloaders from Datasets
train_loader = DataLoader(train, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False)

In [205]:
# model block
# class LogisticRegressionModel(nn.Module):
#     def __init__(self, input_dim, output_dim):
#         super().__init__()
        
#         self.linear = nn.Linear(input_dim, output_dim)
#     def forward(self, x):
#         out = self.linear(x)
#         return out
    
# input_dim = 28 * 28
# output_dim = 10

# model = LogisticRegressionModel(input_dim, output_dim)
# model = model.to(device)

# class ANNModel(nn.Module):
#     def __init__(self, input_dim, hidden_dim, output_dim):
#         super().__init__()
#         self.fc1 = nn.Linear(input_dim, hidden_dim)
#         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
#         self.fc3 = nn.Linear(hidden_dim, hidden_dim)
#         self.fc4 = nn.Linear(hidden_dim, output_dim)
#     def forward(self, x):
#         x = self.fc1(x)
#         x = F.relu(x)
        
#         x = self.fc2(x)
#         x = torch.tanh(x)
        
#         x = self.fc3(x)
#         x = F.elu(x)
        
#         x = self.fc4(x)
#         return x
    
# input_dim = 28 * 28
# hidden_dim = 150
# output_dim = 10

# model = ANNModel(input_dim, hidden_dim, output_dim)
# model = model.to(device)

class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.cnn1 = nn.Conv2d(1, 16, 5)
        self.pool = nn.MaxPool2d(2)
        self.cnn2 = nn.Conv2d(16, 32, 5)
        self.fc1 = nn.Linear(32 * 4 * 4, 10)
        
    def forward(self, x):
        x = self.cnn(x)
        x = F.relu(x)
        
        x = self.pool(x)
        
        x = self.cnn2(x)
        x = F.relu(x)
        
        x = self.pool(x)
        
        x = torch.flatten(x, 1)
        
        x = self.fc1(x)
        
        return x


In [202]:
# loss function and optimizer block
loss_fn = nn.CrossEntropyLoss()
lr = 0.1
optimizer = optim.SGD(model.parameters(), lr=lr)

In [203]:
# training set-up block
count = 0
loss_list = []
iteration_list = []

# get training step
def get_step(model, loss_fn, optimizer):
    def train_step(x, y):
        xbatch.to(device)
        ybatch.to(device)
        
        optimizer.zero_grad()
        yhat = model(x)
        loss = loss_fn(yhat, y)
        loss.backward()
        optimizer.step()
        return loss.item()
    return train_step

In [204]:
# Traning the Model
count = 0
loss_list = []
iteration_list = []

train_step = get_step(model, loss_fn, optimizer)

for epoch in range(num_epochs):
    for i, (xbatch, ybatch) in enumerate(train_loader):
        
        train_step(xbatch, ybatch)
        count += 1
        
        # Prediction
        if count % 50 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Predict test dataset
            for images, labels in val_loader: 
                test = Variable(images.view(-1, 28*28))
                
                # Forward propagation
                outputs = model(test)
                
                # Get predictions from the maximum value
                predicted = torch.max(outputs.data, 1)[1]
                
                # Total number of labels
                total += len(labels)
                
                # Total correct predictions
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / float(total)
            
            # store loss and iteration
            loss_list.append(loss)
            iteration_list.append(count)
        if count % 500 == 0:
            # Print Loss
            print(f'Iteration: {count}  Loss: {loss}  Accuracy: {accuracy}%')