In [None]:
# imports
import torch
import torchvision
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from torchvision import io, datasets, transforms
import os
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
# set variables 
n_epochs = 10
batch_size_train = 50
batch_size_test = 1000
learning_rate = 0.01
momentum = 0.5
log_interval = 50

random_seed = 1
torch.backends.cudnn.enable = False
torch.manual_seed(random_seed)

# annotations = pd.read_csv("../input/soml-hackathon/SoML-50/annotations.csv")
# annotations['Value'].value_counts().sort_index().plot(kind='bar', rot=0)

img_height = 28
img_width = 3 * 28

In [None]:
#enable cuda
use_cuda = torch.cuda.is_available()
if use_cuda:
    device = torch.device('cuda')
    loader_kwargs = {'num_workers': 1, 'pin_memory': True}
else:
    device = torch.device('cpu')
    loader_kwargs = {}
    
print(device)

In [None]:
# train_loader = torch.utils.data.DataLoader(
#     torchvision.datasets.MNIST('./input', train = True, download = True,
#                                transform = torchvision.transforms.ToTensor()),
#                                batch_size = batch_size_train, shuffle = True
# )
# test_loader = torch.utils.data.DataLoader(
#     torchvision.datasets.MNIST('./input', train = False, download = True,
#                                transform = torchvision.transforms.ToTensor()),
#                                batch_size = batch_size_test, shuffle = True
# )

class ExpressionDataset(Dataset):
    """Binomial expression dataset"""
    
    def __init__(self, csv_file, root_dir):
        self.labels = pd.read_csv(csv_file)
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        img_name = os.path.join(self.root_dir, self.labels.iloc[idx,0])
        img = io.read_image(img_name).float()
        exprtype, ans = self.labels.iloc[idx,1:]
        exprtype_int = 0
        if exprtype == "infix":
            exprtype_int = 1
        elif exprtype == "postfix":
            exprtype_int = 2
        return [img, exprtype_int, ans + 9]

dataset = ExpressionDataset(csv_file='../input/soml-hackathon/SoML/SoML-50/annotations.csv', root_dir='../input/soml-hackathon/SoML/SoML-50/data-resized')
plt.imshow(dataset[10][0].squeeze())

train_set, test_set, validation_set, remainder = torch.utils.data.random_split(dataset, [20000,5000,5000,20000])
# plt.imshow(train_set[10][0].permute(1,2,0))

train_loader = DataLoader(train_set, batch_size=batch_size_train, shuffle = True, **loader_kwargs)
test_loader = DataLoader(test_set, batch_size=batch_size_test, shuffle = True, **loader_kwargs)
# validation_loader = DataLoader(validation_set, batch_size=50, shuffle=True)

In [None]:
# look at data

# examples = enumerate(test_loader)
# batch_idx, (example_data, example_targets) = next(examples)

# fig = plt.figure()
# for i in range(6):
#     plt.subplot(2,3,i+1)
#     plt.tight_layout()
#     plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
#     plt.title("Ground Truth: {}".format(example_targets[i]), color='white')
#     plt.xticks([])
#     plt.yticks([])
# fig

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size = 5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size = 5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(1440, 50)
        self.fc2 = nn.Linear(50, 3)
    
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 1440)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training = self.training)
        x = self.fc2(x)
        return F.log_softmax(x, -1)

In [None]:
network = Net().to(device = device)
optimizer = optim.SGD(network.parameters(), lr = learning_rate, momentum=momentum)

In [None]:
# train_losses = []
# train_counter = []
# test_losses = []
# test_counter = [i * len(train_loader.dataset) for i in range(n_epochs + 1)]

In [None]:
def train(epoch):
    network.train()
    for data, types, answers in tqdm(train_loader):
        optimizer.zero_grad()
        
        data, types = data.to(device), types.to(device)
        
        output = network(data)
        print(output)
        loss = F.nll_loss(output, types)
        loss.backward()
        optimizer.step()

In [None]:
def test():
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target, ex in tqdm(test_loader):
            data, target = data.to(device), target.to(device)
            print(data.is_cuda)
            output = network(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
        test_loss /= len(test_loader.dataset)
        print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))

In [None]:
test()
for epoch in range(1, n_epochs + 1):
    train(epoch)
    test()

In [None]:
torch.cuda.is_available