In [43]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import timeit

In [27]:
## shuffle two arrays, keeping rows in correspondence
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

def linear_svm(train, valid, c_vals, verbose=False):
    train_x = train[0]
    train_y = train[1]
    valid_x = valid[0]
    valid_y = valid[1]

    best_classifier = (None, 0, "l1", 0)

    res = {"l1": [], "l2": []}
    
    for penalty in ["l2"]:
        for c in c_vals:
            clf = LinearSVC(C=c, penalty=penalty)
            clf.fit(train_x, train_y)
            prediction = clf.predict(valid_x)
            accuracy = accuracy_score(valid_y, prediction)
            res[penalty].append(accuracy)
            if (accuracy > best_classifier[3]):
                best_classifier = (clf, c, penalty, accuracy)
            if verbose:
                print("Tried c = " + str(c) + " with " + penalty + " penalty")
    #plt.plot(c_vals, res["l1"])
    plt.plot(c_vals, res["l2"])
    #plt.legend(["l1 penalty", "l2 penalty"])
    plt.xlabel("C parameter")
    plt.ylabel("Accuracy")
    plt.xscale("log", basex=10)
    plt.title("Results from linear SVM classifier")
    plt.show()
    return (best_classifier[0], {"penalty": best_classifier[2], "c": best_classifier[1]}, best_classifier[3])

# The training and test sets are passed as tuples where the first index is the X and the second is the Y
def logistic_regression(train, valid, c_vals, verbose=False):
    train_x = train[0]
    train_y = train[1]
    valid_x = valid[0]
    valid_y = valid[1]
    
    best_classifier = (None, 0, "l1", 0)

    res = {"l1": [], "l2": []}
    
    for penalty in ["l1", "l2"]:
        for c in c_vals:
            clf = LogisticRegression(C=c, penalty=penalty)
            clf.fit(train_x, train_y)
            prediction = clf.predict(valid_x)
            accuracy = accuracy_score(valid_y, prediction)
            res[penalty].append(accuracy)
            if (accuracy > best_classifier[3]):
                best_classifier = (clf, c, penalty, accuracy)
            if verbose:
                print("Tried c = " + str(c) + " with " + penalty + " penalty")
    plt.plot(c_vals, res["l1"])
    plt.plot(c_vals, res["l2"])
    plt.legend(["l1 penalty", "l2 penalty"])
    plt.xlabel("C parameter")
    plt.ylabel("Accuracy")
    plt.xscale("log", basex=10)
    plt.title("Results from logistic regression classifier")
    plt.show()
    return (best_classifier[0], {"penalty": best_classifier[2], "c": best_classifier[1]}, best_classifier[3])

In [32]:
%%time
train_x = np.loadtxt("./Datasets/train_x.csv", delimiter=',')

CPU times: user 1min 30s, sys: 8.33 s, total: 1min 38s
Wall time: 1min 59s


In [33]:
%%time
train_y = np.loadtxt("./Datasets/train_y.csv", delimiter=',')

CPU times: user 118 ms, sys: 1.91 ms, total: 120 ms
Wall time: 146 ms


In [34]:
%%time
test_x = np.loadtxt("./Datasets/test_x.csv", delimiter=',')

CPU times: user 17.2 s, sys: 680 ms, total: 17.8 s
Wall time: 18.4 s


In [35]:
mini_train_x = np.loadtxt("./Datasets/mini_train_x.csv", delimiter=',')
mini_train_y = np.loadtxt("./Datasets/mini_train_y.csv", delimiter=",")

In [36]:
# %%time

# train_avg = np.mean(train_x, axis=0)
# for i in range(len(train_x)):
#     train_x[i] -= train_avg
# for i in range(len(test_x)):
#     test_x[i] -= train_avg
# train_x /= 255.0
# test_x /= 255.0

# X_train, y_train = unison_shuffled_copies(train_x, train_y)

# HYP_TUNE_SIZE = 5000
# train = (X_train[:int(HYP_TUNE_SIZE * 0.7)], y_train[:int(HYP_TUNE_SIZE*0.7)])
# valid = (X_train[int(HYP_TUNE_SIZE*0.7):], y_train[int(HYP_TUNE_SIZE*0.7):])
# results_logistic = logistic_regression(train, valid, np.logspace(-6,6,20), verbose=True)
# results_svm = linear_svm(train, valid, np.logspace(-6,6,20), verbose = True)

## Pytorch

In [37]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init
import torch.utils.data as data

from PIL import Image

import torchvision
import torchvision.transforms as transforms

from sklearn.model_selection import train_test_split

import time
from __future__ import print_function


In [38]:
# Convert the dataset into tensors
x_train, y_train = unison_shuffled_copies(train_x, train_y)

x_train_dev, x_test_dev, y_train_dev, y_test_dev = train_test_split(x_train, y_train, test_size=0.05, train_size=0.1, random_state=42)

ttrain_x = torch.from_numpy(x_train_dev) # currently has x_train_dev as dataset for dev purposes
ttrain_y = torch.from_numpy(y_train_dev)
ttest_x = torch.from_numpy(x_test_dev)
ttest_y = torch.from_numpy(y_test_dev)

In [44]:
class CNN(nn.Module):
    
    # Can choose to take dimensions in constructor, but for now just keeping them variable since unsure about
    # how many variables to have in the constructor based on design
    def __init__(self, scale):
        self.print = True
        self.scale = scale
        super(CNN, self).__init__() # init recursively
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(16),
            nn.Dropout(p=0.1), # Random p% of nodes are cancelled - this is for regularization
            nn.ReLU(),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1),
            nn.BatchNorm2d(32),
            nn.Dropout(p=0.1), # Random p% of nodes are cancelled - this is for regularization
            nn.ReLU(),
        )
#         self.layer4 = nn.Sequential(
#             nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), padding=1),
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=(2, 2), stride=2)
#         )
         # Logistic Regression
        self.fc1 = nn.Linear(self.scale*64*64,10)
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
#         out = self.layer3(out)
#         out = self.layer4(out)
        out = out.view( -1 , self.scale*64*batch_size)
        out = self.fc1(out)
        if(self.print):
            self.print = False
            print("output size: ", out.size())
        return out

In [40]:
import torch.utils.data as data
class DatasetKaggle(data.Dataset):
    def __init__(self, np_data_x, np_data_y, transform=None):
        self.data = np_data_x
        self.labels = np_data_y
        if (transform is not None):
            self.transform = transform
        else:
            self.transform = transforms.ToTensor()
        
    def __getitem__(self,index):
        sample = self.data[index]
        sample = sample.reshape(64,64,1)
        sample = self.transform(sample)
        
        
        label = int(self.labels[index])
        
        return sample, label
    
    def __len__(self):
        return len(self.labels)

In [41]:
# train = []
# for i in range(len(x_train_dev)):
#     train.append((x_train_dev[i],y_train_dev[i]))


cuda_available = torch.cuda.is_available()
# train = torch.utils.data.TensorDataset(ttrain_x,ttrain_y)



train = DatasetKaggle(x_train_dev, y_train_dev)
test = DatasetKaggle(x_test_dev, y_test_dev)

# train_x = []
# train_y = []
# for i in range(len(y_train_dev)):
#     train_x.append(x_train_dev[i].reshape(64,64,1))
#     train_y.append(int(y_train_dev[i]))

# transform = transforms.ToTensor()
# tensor_x = torch.stack([transform(i) for i in train_x]) # transform to torch tensors
# tensor_y = torch.LongTensor(train_y)

# ttrain = torch.utils.data.TensorDataset(tensor_x, tensor_y)

mnist_transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

batch_size = 64

trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True,num_workers=2)
testloader = torch.utils.data.DataLoader(test, batch_size=64, shuffle=True, num_workers=2)

In [45]:
clf = CNN(32)
if cuda_available:
    clf = clf.cuda()

optimizer = torch.optim.Adam(clf.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()


cuda_available = torch.cuda.is_available()

# y_onehot = torch.LongTensor(batch_size, 10)
for epoch in range(50):

    start_time = timeit.default_timer()
    losses = []
    # Train
    for batch_idx, data in enumerate(trainloader):
        inputs, labels = data
#         y = torch.LongTensor(batch_size,1)
#         for i in range(batch_size):
#             y[i] = labels[i]
#         y_onehot.zero_()
#         y_onehot.scatter_(1, y, 1)
#         labels = y_onehot
        
        if cuda_available:
            inputs, labels = inputs.cuda(), labels.cuda()

        optimizer.zero_grad()
        inputs, labels = Variable(inputs), Variable(labels)
    
        outputs = clf(inputs)
        
        # Compute the loss
        loss = F.cross_entropy(outputs,labels)
        
        # Reset gradients to zero, perform a backward pass, and update the weights.
        loss.backward()
        optimizer.step()
        losses.append(loss.data[0])

    print('Epoch : %d Loss : %.3f ' % (epoch, np.mean(losses)))
    
    # Evaluate
    clf.eval()
    total = 0
    correct = 0
    for batch_idx, (inputs, labels) in enumerate(testloader):
        if cuda_available:
            inputs, labels = inputs.cuda(), labels.cuda()

        inputs, labels = Variable(inputs, volatile=True), Variable(labels, volatile=True)
        outputs = clf(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += predicted.eq(labels.data).cpu().sum()
    elapsed = timeit.default_timer() - start_time
    print('Epoch : %d Test Acc : %.3f' % (epoch, 100.*correct/total))
    print('Time for epoch: ', elapsed)
    print('--------------------------------------------------------------')
    clf.train()

output size:  torch.Size([64, 10])
Epoch : 0 Loss : 2.524 
Epoch : 0 Test Acc : 12.160
Time for epoch:  302.8651117779955
--------------------------------------------------------------


Process Process-16:
Process Process-15:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/usr/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 50, in _worker_loop
    r = ind

KeyboardInterrupt: 