In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import ndjson
import os
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader

IMAGESIZE = 28



In [4]:
def parseSimplifiedDrawings(fileName, num):
        drawings = []
        with open(fileName, 'r') as fileStream:
            json_list = ndjson.load(fileStream)
            idx_range = min(num, len(json_list))
            for obj in range(idx_range):
                drawings.append(json_list[obj])
        return drawings

def prepare_datafiles(length, per_class=300, img_dir="images", data_dir="data"):
        absolute_path = "/Users/arthurjakobsson/Documents/10617/Project/"
        img_path = os.path.join(absolute_path, img_dir)
        data_path = os.path.join(absolute_path, data_dir)
        img_list = sorted(os.listdir(img_path))
        data_list = sorted(os.listdir(data_dir))
        # img_list = img_list[0:length] # cut classes it short
        # data_list = data_list[0:length] # cut it classes short

        print(img_list)
        print(data_list)
        img_npy = np.vstack([np.load(os.path.join(img_path, fname))[0:per_class] for fname in img_list])
        print(img_npy.shape)
        allDrawings = []
        for fname in data_list:
            drawings = parseSimplifiedDrawings("data/"+ fname, per_class)
            allDrawings += drawings
            print(fname)
        dicts = np.array([allDrawings]).T

        img_npy = np.array(img_npy)
        data_dir = data_list

        finalData = np.hstack([img_npy, dicts])
        print(finalData.shape)
        length = finalData.shape[0]
        np.save("AllData.npy", finalData)

prepare_datafiles(0, per_class=4)


In [22]:
class ImageDataLoader(Dataset):
    """Quick Draw and Image dataset."""

    def __init__(self):
        """
        Arguments:
            len: length of data requested
            img_dir (string): path to images
            data_dir (string): Directory with all the images.
        """
        self.data = np.load("AllData.npy", allow_pickle=True)
        self.length = self.data.shape[0]
        self.outputSize = self.getUniqueCountryCount()

        xData = self.data[:,:(IMAGESIZE*IMAGESIZE)]
        dataInfo = self.data[:, IMAGESIZE*IMAGESIZE]
        print(dataInfo.shape)
        countryCode = np.array([i["countrycode"] for i in dataInfo])
        print(countryCode)
        self.data = (xData, countryCode)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        if idx >= self.__len__():
            print("Index too large for {}-sample dataset".format(self.__len__()))
            return

        dataPoint = self.data[idx]

        return xData, countryCode

    def getUniqueCountryCount(self):
        d = {}
        for elem in self.data:
            code = elem[-1]["countrycode"]
            if code in d:
                continue
            else:
                d[code] = None
        return len(d)

    def getData(self):
        return self.data

dataloader = ImageDataLoader()


(1380,)
['GB', 'FR', 'GB', 'US', 'US', 'US', 'US', 'SA', 'US', 'US', 'NZ', 'US', 'MY', 'GB', 'GB', 'FI', 'US', 'CA', 'US', 'US', 'US', 'US', 'US', 'US', 'US', 'US', 'CH', 'US', 'US', 'US', 'JP', 'SE', 'HR', 'US', 'US', 'SK', 'US', 'US', 'US', 'US', 'PL', 'US', 'US', 'BD', 'US', 'RU', 'GB', 'US', 'GB', 'US', 'US', 'US', 'US', 'GB', 'RU', 'US', 'KR', 'SE', 'US', 'DE', 'LT', 'RU', 'US', 'CA', 'US', 'GB', 'US', 'US', 'GB', 'US', 'US', 'GB', 'BM', 'US', 'US', 'US', 'US', 'US', 'US', 'BA', 'US', 'US', 'US', 'CZ', 'US', 'DE', 'PH', 'TH', 'US', 'FR', 'VN', 'US', 'RO', 'US', 'BR', 'US', 'BR', 'GB', 'RU', 'RU', 'US', 'PT', 'PL', 'RU', 'SE', 'GB', 'BG', 'CA', 'US', 'US', 'US', 'GB', 'US', 'RU', 'AU', 'US', 'US', 'GB', 'CZ', 'PL', 'RU', 'US', 'US', 'US', 'US', 'AU', 'DK', 'GB', 'DE', 'US', 'HK', 'US', 'IS', 'ES', 'US', 'GB', 'US', 'US', 'US', 'DE', 'BR', 'RO', 'CZ', 'AU', 'GB', 'GB', 'ME', 'GB', 'TH', 'IT', 'US', 'US', 'US', 'FI', 'US', 'US', 'DE', 'GB', 'IT', 'NL', 'US', 'BE', 'US', 'AT', 'UA', '

In [None]:

class OurCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(3,3), stride=1, padding=1)
        self.act1 = nn.LogSoftmax(dim=1)
        self.drop1 = nn.Dropout(0.3)

        self.conv2 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=1, padding=1)
        self.act2 = nn.LogSoftmax(dim=1)
        self.pool2 = nn.MaxPool2d(kernel_size=(2, 2))

        self.flat = nn.Flatten()

        self.fc3 = nn.Linear(8192, 512)
        self.act3 = nn.LogSoftmax(dim=1)
        self.drop3 = nn.Dropout(0.5)

        self.fc4 = nn.Linear(512, 10) # add number of unique countries from model

    def forward(self, x):
        # input 3x32x32, output 32x32x32
        x = self.act1(self.conv1(x))
        x = self.drop1(x)
        # input 32x32x32, output 32x32x32
        x = self.act2(self.conv2(x))
        # input 32x32x32, output 32x16x16
        x = self.pool2(x)
        # input 32x16x16, output 8192
        x = self.flat(x)
        # input 8192, output 512
        x = self.act3(self.fc3(x))
        x = self.drop3(x)
        # input 512, output 10
        x = self.fc4(x)
        return x

    def


transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

batch_size = 32
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

model = OurCNN()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
class trainLoop:
    def __init__(self, train_data_loader, eval_data_loader,
                        my_model, loss_function,
                        outputFolder, batch_size):
        # Note: We can access dataset from dataloader by doing dataloader.dataset
        self.train_data_loader = train_data_loader
        self.eval_data_loader = eval_data_loader
        self.my_model = my_model

        self.loss_function = loss_function
        self.outputFolder = outputFolder
        self.batch_size = batch_size
        self.optimizer = torch.optim.SGD(self.my_model.parameters(), lr=1e-3)

    def shuffleData(self, X, y, epoch):
        """
        DO NOT modify this function.

        Permute the training data for SGD.
        :param X: The original input data in the order of the file.
        :param y: The original labels in the order of the file.
        :param epoch: The epoch number (0-indexed).
        :return: Permuted X and y training data for the epoch.
        """
        np.random.seed(epoch)
        N = len(y)
        ordering = np.random.permutation(N)
        return X[ordering], y[ordering]

    def runEpoch(self, epoch):
        xDataStart, yDataStart = self.my_model.getData()
        xData, yData = self.shuffleData(xDataStart, yDataStart, epoch)
        xData, yData = torch.from_numpy(xData),  torch.from_numpy(yData)

        #GET THIS BATCHED
        y_pred = self.model(inputs)
        loss = self.loss_function(y_pred, labels)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

n_epochs = 1
for epoch in range(n_epochs):
    for inputs, labels in trainloader:
        # forward, backward, and then weight update
        y_pred = model(inputs)
        loss = loss_fn(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    acc = 0
    count = 0
    for inputs, labels in testloader:
        y_pred = model(inputs)
        acc += (torch.argmax(y_pred, 1) == labels).float().sum()
        count += len(labels)
    acc /= count
    print("Epoch %d: model accuracy %.2f%%" % (epoch, acc*100))

torch.save(model.state_dict(), "cifar10model.pth")
