In [1]:
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, Dataset

In [2]:
class Net(nn.Module):
    def __init__(self):
      super(Net, self).__init__()
      # First fully connected layer
      self.fc1 = nn.Linear(784, 60)
      # Second fully connected layer that outputs our 10 labels
      self.fc2 = nn.Linear(60, 30)
      self.fc3 = nn.Linear(30, 10)
    
    def forward(self, x):
      x = self.fc1(x)
      # Use the rectified-linear activation function over x
      x = F.relu(x)
      x = self.fc2(x)
      # Use the rectified-linear activation function over x
      x = F.relu(x)
      return self.fc3(x)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")

In [4]:
model = Net().to(device)

In [5]:
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [6]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device = device)
            y = y.to(device = device)
            
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
        model.train

In [7]:
class TorchDataset(Dataset):
    
    def __init__(self, csv_path):
        
        self.df = torch.from_numpy(np.loadtxt(csv_path, delimiter=',', skiprows = 1))
        
    def __getitem__(self, idx):
        
        y = self.df[idx, 0].long()
        x = self.df[idx, 1:].float()
        return x, y
    
    def __len__(self):
        return self.df.shape[0]

In [8]:
csv_path = "C:/Users/nikit/Desktop/Учеба/rezume/train.csv"
data = TorchDataset(csv_path)

In [9]:
train_size = int(0.6 * len(data))
test_size = len(data) - train_size
train_dataset, test_dataset = random_split(data, [train_size, test_size])

In [10]:
train_loader = DataLoader(dataset = train_dataset, drop_last = True, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, drop_last = True, batch_size=64, shuffle=True)

In [17]:
num_epochs = 30
def fit(train_loader, num_epochs, model, criterion, optimizer):
    for epoch in range(num_epochs):
        for batch_idx, (data,targets) in enumerate(train_loader):
            data = data.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            scores = model(data)
            loss = criterion(scores, targets)

            loss.backward()

            optimizer.step()

In [12]:
# fit(train_loader, num_epochs, model, criterion, optimizer)
# check_accuracy(train_loader, model)
# check_accuracy(test_loader, model)

In [40]:
#creating submission file
model_final = Net().to(device)
optimizer_final = optim.Adam(model_final.parameters(), lr = learning_rate)
test_path = "C:/Users/nikit/Desktop/Учеба/rezume/test.csv"
full_data = TorchDataset(csv_path)
test_data = torch.from_numpy(np.loadtxt(test_path, delimiter=',', skiprows = 1)).to(device).float()
train_loader_final = DataLoader(dataset = full_data, drop_last = True, batch_size=64, shuffle=True)
test_loader_final = DataLoader(dataset = test_data, drop_last = False, batch_size=64, shuffle=False)
fit(train_loader_final, num_epochs, model_final, criterion, optimizer_final)

In [38]:
print(test_data.shape)

torch.Size([28000, 784])


In [21]:
def get_predictions(model, x):
    model.eval()
    
    with torch.no_grad():     
        scores = model(x)
        model.train
    return scores

In [49]:
scores, indicies = get_predictions(model_final, test_data).max(1)
submission = pd.DataFrame({"ImageId" : np.arange(1, 28001), "Label" : indicies.to("cpu")})

In [51]:
submission.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,2
4,5,3


In [52]:
submission.to_csv('submission.csv',index=False)