In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F

class DigitClassifier(nn.Module):
    def __init__(self, numChannels, numClasses):
        super().__init__()
        self.cov1 = nn.Conv2d(in_channels=numChannels, out_channels=20, kernel_size=3)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=(2, 2))
        
        self.cov2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=(5, 5))
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        
        self.fc1 = nn.Linear(in_features=800, out_features=500)
        self.relu3 = nn.ReLU()
        
        self.fc2 = nn.Linear(in_features=500, out_features=numClasses)
        self.logsoftmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.cov1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        
        x = self.cov2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu3(x)
        
        x = self.fc2(x)
        output = self.logsoftmax(x)
        
        return output
        

In [4]:
train, label = torch.tensor(np.array(data.drop(['label'], axis=1)), dtype=torch.float32).to("cuda"), torch.tensor(np.array(data["label"]), dtype=torch.long).to("cuda")
train = train / 255.0
train = train.reshape(-1,1,28,28)
print("features shape: ",train.shape)

features shape:  torch.Size([42000, 1, 28, 28])


In [5]:
# x_train, x_valid, y_train, y_valid = train_test_split(train, label, test_size=0.25, shuffle=True)

In [6]:
train_dataset = torch.utils.data.TensorDataset(train, label)
train_loader = DataLoader(dataset=train_dataset, batch_size=40, shuffle=True)

In [7]:
# x1, y1 = get_batch(x_train, y_train, batch_size=8)
# x2, y2 = get_batch(x_valid, y_valid, batch_size=8)
# print(x1.shape, y1.shape)
# print(x2.shape, y2.shape)

In [16]:
class AlexNet(nn.Module):
    def __init__(self, numChannel, numClass):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=numChannel, out_channels=64, padding='same', kernel_size=3)
        self.norm1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, padding='same', kernel_size=3)
        self.norm2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, padding='same', kernel_size=3)
        self.norm3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, padding='same', kernel_size=3)
        self.norm4 = nn.BatchNorm2d(128)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=192, padding='same', kernel_size=3)
        self.norm5 = nn.BatchNorm2d(192)
        
        self.fc1 = nn.Linear(in_features=1728, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=512)
        self.fc3 = nn.Linear(in_features=512, out_features=numClass)
        
        self.logsoftmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = F.relu(self.norm1(self.conv1(x)))
        x = F.max_pool2d(x, kernel_size=2)
        x = F.relu(self.norm2(self.conv2(x)))
        x = F.max_pool2d(x, kernel_size=2)
        x = F.relu(self.norm3(self.conv3(x)))
        x = F.relu(self.norm4(self.conv4(x)))
        x = F.relu(self.norm5(self.conv5(x)))
        x = F.max_pool2d(x, kernel_size=2)
        
        x = torch.flatten(x, 1)
        
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.5)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.5)
        x = self.fc3(x)
        output = self.logsoftmax(x)
        
        return output

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(1, 4, 3)
        self.bn1 = nn.BatchNorm2d(4)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(4, 8, 3)
        self.bn2 = nn.BatchNorm2d(8)
        self.fc = nn.Linear(8 * 11 * 11, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.bn1(x)
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.bn2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
    
model=Network().to('cuda')
model=AlexNet(1, 10).to('cuda')
# model=DigitClassifier(1, 10).to('cuda')
input_size=(1,28,28)

learning_rate=3e-4
num_epochs=15
# loss_fn=nn.NLLLoss()
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)

In [17]:
for epoch in range(num_epochs):
    for batch_idx, (d, targets) in enumerate(tqdm(train_loader)):
        # batch, channel, (feature, feature)
        d = d.view(-1, 1, 28, 28)

        # forward
        scores = model(d)
        loss = loss_fn(scores, targets)
        # print((scores.argmax(1) == targets).sum()/len(targets))
        # print(scores.argmax(1))
        # print(targets)
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples


print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}")

100%|██████████| 1050/1050 [00:04<00:00, 230.48it/s]
100%|██████████| 1050/1050 [00:04<00:00, 231.17it/s]
100%|██████████| 1050/1050 [00:04<00:00, 239.51it/s]
100%|██████████| 1050/1050 [00:04<00:00, 233.89it/s]
100%|██████████| 1050/1050 [00:04<00:00, 241.98it/s]
100%|██████████| 1050/1050 [00:04<00:00, 229.52it/s]
100%|██████████| 1050/1050 [00:04<00:00, 222.79it/s]
100%|██████████| 1050/1050 [00:04<00:00, 228.87it/s]
100%|██████████| 1050/1050 [00:04<00:00, 235.86it/s]
100%|██████████| 1050/1050 [00:04<00:00, 233.67it/s]
100%|██████████| 1050/1050 [00:04<00:00, 236.73it/s]
100%|██████████| 1050/1050 [00:04<00:00, 235.93it/s]
100%|██████████| 1050/1050 [00:04<00:00, 217.19it/s]
100%|██████████| 1050/1050 [00:04<00:00, 229.35it/s]
100%|██████████| 1050/1050 [00:04<00:00, 245.89it/s]


Accuracy on training set: 98.85


In [15]:
features_test = test.values.reshape(-1, 1, 28, 28)

test_tensor = torch.tensor(features_test, dtype=torch.float32) / 255.0
test_tensor = test_tensor.to('cuda')
model.eval()  
with torch.no_grad():
    test_predictions = model(test_tensor)

model.train() 

predicted_labels = torch.argmax(test_predictions, dim=1).to('cpu').numpy()

submission = pd.DataFrame({"ImageId": range(1, len(predicted_labels) + 1), "Label": predicted_labels})
submission.to_csv("submission0.csv", index=False)