In [None]:
# Classifying Digits using Simple ML models
# Necessary Imports
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gc
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Exploring files in the Input Directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Step 1: Convert the data into usable format
# pandas is used to read the contents of csv into a dataframe
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

# Retrieve the features and label columns into a separate numpy arrays
features = train[train.columns[1:]].values
label = train.label.values

print(type(features))
print(type(label))

print(features.shape)
print(label.shape)


**Model 1: **

Logistic Regression

In [None]:
import warnings
warnings.simplefilter("ignore")
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=137)
acc = 0.0
global best_model

for jj, (train_index, val_index) in enumerate(kf.split(features)):
    print("Fitting fold", jj+1)
    train_features = features[train_index]
    train_target = label[train_index]
    
    val_features = features[val_index]
    val_target = label[val_index]
    
    model = LogisticRegression(C=20, solver='lbfgs', multi_class='multinomial')
    model.fit(train_features, train_target)
    val_pred = model.predict_proba(val_features)
    fold_acc=accuracy_score(val_target, np.argmax(val_pred, axis=1))
    print("Fold accuracy:", accuracy_score(val_target, np.argmax(val_pred, axis=1)))
    #test_preds += model.predict_proba(test)/n_splits
    if(fold_acc>acc):
        acc = fold_acc
        best_model = model
    del train_features, train_target, val_features, val_target
    gc.collect()

    
    

In [None]:
print(acc)

In [None]:
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
print(test.columns)
#Retrieve the features and label columns into a separate numpy arrays
test_features = test[test.columns[0:]].values
test_pred = model.predict_proba(test_features)
predict = np.argmax(test_pred, axis=1)

In [None]:
submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
submission['Label'] = predict
submission.to_csv('submission.csv', index=False)

**Model 2: **

Logistic Regression with STOCHASTIC GRADIENT DESCENT 

In [None]:
from sklearn.linear_model import SGDClassifier
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=137)
acc = 0.0
global best_model

for jj, (train_index, val_index) in enumerate(kf.split(features)):
    print("Fitting fold", jj+1)
    train_features = features[train_index]
    train_target = label[train_index]
    
    val_features = features[val_index]
    val_target = label[val_index]
    
    # loss = 'hinge' represents linear regression
    # log loss implement logistic regression
    model = SGDClassifier(loss='log')
    model.fit(train_features, train_target)
    val_pred = model.predict_proba(val_features)
    fold_acc=accuracy_score(val_target, np.argmax(val_pred, axis=1))
    print("Fold accuracy:", accuracy_score(val_target, np.argmax(val_pred, axis=1)))
    #test_preds += model.predict_proba(test)/n_splits
    if(fold_acc>acc):
        acc = fold_acc
        best_model = model
    del train_features, train_target, val_features, val_target
    gc.collect()


**Model 3:**
RandomForest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=137)
acc = 0.0
global best_model

for jj, (train_index, val_index) in enumerate(kf.split(features)):
    print("Fitting fold", jj+1)
    train_features = features[train_index]
    train_target = label[train_index]
    
    val_features = features[val_index]
    val_target = label[val_index]
    
    # max depth serves as an important hyperparameter
    # When the depth was set with a value of 2, accuracy was down by 60%
    model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=0)
    model.fit(train_features, train_target)
    val_pred = model.predict_proba(val_features)
    fold_acc=accuracy_score(val_target, np.argmax(val_pred, axis=1))
    print("Fold accuracy:", accuracy_score(val_target, np.argmax(val_pred, axis=1)))
    #test_preds += model.predict_proba(test)/n_splits
    if(fold_acc>acc):
        acc = fold_acc
        best_model = model
    del train_features, train_target, val_features, val_target
    gc.collect()


In [None]:
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
print(test.columns)
#Retrieve the features and label columns into a separate numpy arrays
test_features = test[test.columns[0:]].values
test_pred = model.predict_proba(test_features)
predict = np.argmax(test_pred, axis=1)

In [None]:
submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
submission['Label'] = predict
submission.to_csv('submission.csv', index=False)

**Model 3:**
Convolutional Neural Network

**Defining a simple convolutional neural network**

In [1]:
import torch
import torch.nn.functional as F
from torch import nn
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, x):
        return x.view(x.size(0), -1)

class NN(torch.nn.Module):
    def __init__(self):
        super(NN,self).__init__()
        self.conv1 = torch.nn.Conv2d(1,6,3,padding=1)
        self.pool1 = torch.nn.MaxPool2d(2)
        self.conv2 = torch.nn.Conv2d(6,16,5,padding=0)
        self.pool2 = torch.nn.MaxPool2d(2)
        self.linear1 = torch.nn.Linear(400,120)
        self.linear2 = torch.nn.Linear(120,84)
        self.linear3 = torch.nn.Linear(84,10)
    def forward(self,x):
        c1= F.relu(self.conv1(x))
        s1 = self.pool1(c1)
        c2 = F.relu(self.conv2(s1))
        s2 = self.pool2(c2)
        f  = (Flatten()(s2))
        f1 = F.relu(self.linear1(f))
        f2 = F.relu(self.linear2(f1))
        f3 = self.linear3(f2)
        return f3

In [3]:
# Install Necessary Packages
!pip install torchsummary

Collecting torchsummary
  Downloading https://files.pythonhosted.org/packages/7d/18/1474d06f721b86e6a9b9d7392ad68bed711a02f3b61ac43f13c719db50a6/torchsummary-1.5.1-py3-none-any.whl
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1


**Selecting the device and initiating the model**
torch summary provides a detailed picture on the arrangement of layers in the resulting neural network

In [4]:
import torch
from torchsummary import summary
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NN().to(device)
summary(model,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]              60
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Linear-5                  [-1, 120]          48,120
            Linear-6                   [-1, 84]          10,164
            Linear-7                   [-1, 10]             850
Total params: 61,610
Trainable params: 61,610
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.24
Estimated Total Size (MB): 0.30
----------------------------------------------------------------


**Reading the contents of the csv file 
Reshaping the input from a single dimensional vector to a image of size (1,28,28)**

In [7]:
import pandas as pd
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

# Retrieve the features and label columns into a separate numpy arrays
features = train[train.columns[1:]].values
label = train.label.values

[rows, columns] = features.shape
print(type(features))
print(type(label))

print(features.shape)
print(label.shape)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(42000, 784)
(42000,)


In [17]:
features = features /255.0
features = features.reshape(rows,1,28,28)
print(features.shape)

(42000, 1, 28, 28)


**Defining the dataset.** A Class which accepts the input images and the labels; can be used by loaders to feed the images to the network in batches

In [33]:
from torch.utils.data import Dataset, DataLoader
class mnistDataset(Dataset):
    def __init__(self, images, labels):
        self.image =  torch.from_numpy(images)
        self.gt = torch.from_numpy(labels)

    def __len__(self):
        #print(self.image.shape)
        #print(self.gt.shape)
        return self.image.shape[0]

    def __getitem__(self,index):
        return self.image[index], self.gt[index]

In [34]:
dataset = mnistDataset(features, label)
print(features.shape)
print(label.shape)
trainLoader = torch.utils.data.DataLoader(dataset, batch_size=5, shuffle=False)


(42000, 1, 28, 28)
(42000,)


**Define the Criterion and Optimizer**

In [35]:
import torch.optim
Criterion = torch.nn.CrossEntropyLoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

**Train the model **
1. Obtain the model prediction for batch from the dataset
2. Compare target against prediction
3. Obtain gradients and adjust weights based on the loss
4. Repeat for all the batches in the dataset

In [39]:
def train(model, Criterion, optimizer, trainLoader):
    model.train()
    epoch_list=[]
    loss_list=[]
    for epoch in range(100):
        running_loss = 0
        for data,target in trainLoader:
            x = data.to(device)
            x = x.type(torch.cuda.FloatTensor)
            y = target.to(device)
            #Compute model ouput
            pred = model(x)
            #Compute loss
            loss = Criterion(pred,y)
            running_loss +=loss.item()
            #Optimizer to adjust weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print('epoch:',epoch,'loss:',running_loss/len(trainLoader))
        epoch_list.append(epoch)
        loss_list.append(running_loss/len(trainLoader))
    return epoch_list,loss_list, model


In [40]:
epoch_list, loss_list, model = train(model, Criterion, optimizer, trainLoader)



epoch: 0 loss: 2.301833299710637
epoch: 1 loss: 2.3018463186706817
epoch: 2 loss: 2.3018342753251395
epoch: 3 loss: 2.3018237073648544
epoch: 4 loss: 2.301814973127274
epoch: 5 loss: 2.301808540934608
epoch: 6 loss: 2.3018051237435566
epoch: 7 loss: 2.3018019312903997
epoch: 8 loss: 2.3017970759244193
epoch: 9 loss: 2.3017941764422827
epoch: 10 loss: 2.301790985862414
epoch: 11 loss: 2.3017870062305814
epoch: 12 loss: 2.3017845088811146
epoch: 13 loss: 2.3017825939825602
epoch: 14 loss: 2.3017814167056767
epoch: 15 loss: 2.301777136666434
epoch: 16 loss: 2.3017764546473822
epoch: 17 loss: 2.301770636041959
epoch: 18 loss: 2.3017661517574672
epoch: 19 loss: 2.3017659149283456
epoch: 20 loss: 2.3017635577065603
epoch: 21 loss: 2.301762164746012
epoch: 22 loss: 2.301760265287899
epoch: 23 loss: 2.301757753548168
epoch: 24 loss: 2.3017538617622284
epoch: 25 loss: 2.301752048702467
epoch: 26 loss: 2.301752816410292
epoch: 27 loss: 2.3017521229528244
epoch: 28 loss: 2.3017512489784333
epoch:

In [41]:
from torch.utils.data import Dataset, DataLoader
class mnisttestDataset(Dataset):
    def __init__(self, images):
        self.image =  torch.from_numpy(images)

    def __len__(self):
        #print(self.image.shape)
        #print(self.gt.shape)
        return self.image.shape[0]

    def __getitem__(self,index):
        return self.image[index]

In [69]:
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
#Retrieve the features and label columns into a separate numpy arrays
features = test[test.columns[0:]].values
print(features.shape)
features = features /255.0
features = features.reshape(28000,1,28,28)

dataset = mnisttestDataset(features)
print(features.shape)
print(label.shape)
testLoader = torch.utils.data.DataLoader(dataset, batch_size=5, shuffle=False)


(28000, 784)
(28000, 1, 28, 28)
(42000,)


In [70]:
import numpy as np
from torch.autograd import Variable
def validation(model, optimizer, testLoader, device, Criterion):
    model.eval()
    predictions = []
    with torch.no_grad():
        for vinput in testLoader:
            vinput = Variable(vinput)
            vinput = vinput.to(device)
            vinput = vinput.type(torch.cuda.FloatTensor)
            optimizer.zero_grad()
            vpredict = model(vinput)
            predict = F.softmax(vpredict, dim=1)
            predict = predict.to('cpu')
            predict = predict.numpy()
            print(predict)
            predictions.append(np.argmax(predict, axis=1))
            print(predictions)
            break
    return predictions

In [71]:
predict = validation(model, optimizer, testLoader, device, Criterion)

#submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
#submission['Label'] = predict
#submission.to_csv('submission.csv', index=False)

[[0.10169941 0.11342679 0.09564821 0.10364237 0.0992491  0.08757892
  0.09941325 0.10394844 0.09560012 0.0997934 ]
 [0.10225735 0.11366615 0.09532756 0.10376731 0.09912973 0.08764957
  0.0990767  0.10424156 0.0952206  0.0996635 ]
 [0.10206757 0.11360858 0.09570484 0.10362783 0.09908223 0.08729158
  0.09934831 0.10392742 0.09578578 0.0995558 ]
 [0.10204396 0.11347844 0.09571691 0.10368015 0.09910908 0.087291
  0.09938284 0.10386305 0.09571311 0.09972136]
 [0.10220698 0.11372564 0.09543299 0.10359783 0.09907059 0.08760928
  0.09937488 0.10405271 0.09531747 0.09961168]]
[41]


In [66]:
print(predict)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

**Reference:**
https://www.kaggle.com/tunguz/mnist-logistic-regression-baseline