# Pytorch

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 

In [2]:
raw_train = pd.read_csv("train.csv")
raw_test = pd.read_csv("test.csv")

In [3]:
raw_train.shape, raw_test.shape

((42000, 785), (28000, 784))

In [19]:
X = raw_train.drop("label", axis=1)
y = raw_train["label"]

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
import torch 
import torch.nn as nn

In [7]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Conv 1
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=0) # 16, 26, 26
        self.relu1 = nn.ReLU()
        # Maxpool 1
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 16, 13, 13
        # Conv 2
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0) # 32, 11, 11
        self.relu2 = nn.ReLU()
        # Maxpool 2
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 32, 5, 5
        
        # dense 1
        self.fc1 = nn.Linear(32*5*5, 128)
        self.relu3 = nn.ReLU()
        # dense 2
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self,x):
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.pool1(out)
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.pool2(out)
        out = out.view(out.size(0), -1) # flatten
        out = self.fc1(out)
        out = self.relu3(out)
        out = self.fc2(out)
        return out


In [13]:
batch_size = 100 # 100 images per batch
epochs = 10

In [21]:
# change to tensor 
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
X_val = torch.tensor(X_val.values, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.long)

In [22]:
from torch.utils.data import DataLoader, TensorDataset
train = TensorDataset(X_train, y_train) # make the dataset into torch tensor
test = TensorDataset(X_val, y_val) # make the dataset into torch tensor

train_loader = DataLoader(train, batch_size=batch_size, shuffle=True) # shuffle and batch the data
test_loader = DataLoader(test, batch_size=batch_size, shuffle=False) 

# Model, loss, compiler 
model = CNN()
criterion = nn.CrossEntropyLoss() # using entropy loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # using Adam optimizer

In [23]:
iter = 0 
loss_list = [] 
iter_list = []
acc_list = []
for epoch in range(epochs):
    for (images,labels) in train_loader:
        
        # Forward pass
        outputs = model(images.view(-1,1,28,28)) # reshape the img to (28*28)
        loss = criterion(outputs, labels) # calculate the loss
        
        optimizer.zero_grad() # clear grad
        loss.backward() # backpropagation
        optimizer.step() # update the weights
        
        iter += 1
        if iter % 100 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader: 
                outputs = model(images.view(-1,1,28,28)) # reshape  and pass forward
                # the shape of outputs: (batch_size, 10) 
                _, predicted = torch.max(outputs.data, dim=1) # get the max value 
                total += len(labels)
                correct += (predicted == labels).sum()
            accuracy = 100 * correct / total
            
            loss_list.append(loss.item())
            iter_list.append(iter)
            acc_list.append(accuracy)
            print("Iteration: {}. Loss: {}. Accuracy: {}".format(iter, loss.item(), accuracy))

Iteration: 100. Loss: 0.18412011861801147. Accuracy: 93.92857360839844
Iteration: 200. Loss: 0.15688851475715637. Accuracy: 95.96428680419922
Iteration: 300. Loss: 0.09548826515674591. Accuracy: 96.97618865966797
Iteration: 400. Loss: 0.18761688470840454. Accuracy: 97.55952453613281
Iteration: 500. Loss: 0.04079797863960266. Accuracy: 97.3452377319336
Iteration: 600. Loss: 0.10470384359359741. Accuracy: 97.55952453613281
Iteration: 700. Loss: 0.02339797280728817. Accuracy: 98.07142639160156
Iteration: 800. Loss: 0.1691151112318039. Accuracy: 97.88095092773438
Iteration: 900. Loss: 0.0388951450586319. Accuracy: 98.33333587646484
Iteration: 1000. Loss: 0.1226687878370285. Accuracy: 98.10713958740234
Iteration: 1100. Loss: 0.014458319172263145. Accuracy: 98.28571319580078
Iteration: 1200. Loss: 0.07091951370239258. Accuracy: 98.16666412353516
Iteration: 1300. Loss: 0.023702062666416168. Accuracy: 98.11904907226562
Iteration: 1400. Loss: 0.007712169084697962. Accuracy: 98.55952453613281
It

*cant run larger epoch because of my bad cpu*

Using the path I train (100 epochs) on Colab (thanks google hehe) 

In [27]:
#load cnn_model.pth
model.load_state_dict(torch.load("cnn_model.pth", map_location="cpu"))

<All keys matched successfully>

In [28]:
test_X = torch.tensor(raw_test.values, dtype=torch.float32)

In [31]:
# no grad mode
with torch.no_grad():
    output = model(test_X.view(-1,1,28,28))


In [37]:
_, predicted = torch.max(output.data, dim=1) # get the max value 
predicted.shape

torch.Size([28000])

In [38]:
predicted

tensor([2, 0, 9,  ..., 3, 9, 2])

In [39]:
predicted = predicted.numpy()
pd.DataFrame({"ImageId":np.arange(1,len(predicted)+1),"Label":predicted}).to_csv("Pytorch_submission.csv",index=False)