In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm
import numpy as np
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

## Hyperparameters and Model Architecture

In [3]:
WEIGHT_DECAY = 0.0
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001
BATCH_SIZE = 10
CRITERION = F.nll_loss
EPOCHS = 3

In [47]:
class CNN(nn.Module):
    def __init__(self, y: int, z: int, output: int):
        super(CNN, self).__init__() 
        self.dropout = nn.Dropout(DROPOUT_RATE)
        self.conv1 = nn.Conv2d(z, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        x = torch.randn(z, y, y).view(-1, z, y, y)
        self.to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self.to_linear, 512)
        self.fc2 = nn.Linear(512, output)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        if x.shape[2] > 1 and x.shape[3] > 1:
            x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        if self.to_linear is None:
            self.to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x

    def forward(self, x):
        x = F.pad(x, (0, 0, 0, 0))
        x = self.convs(x)
        x = x.view(-1, self.to_linear)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

### MNIST

In [48]:
train = datasets.MNIST("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)

In [52]:
MNIST_net = CNN(28, 1, 10).cuda()
optimizer = optim.Adam(MNIST_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        MNIST_net.zero_grad()
        output = MNIST_net(X.cuda())
        loss = CRITERION(output, y.cuda())
        loss.backward()
        optimizer.step()
    print(loss)

  0%|          | 18/5400 [00:00<00:30, 174.36it/s]

torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])


  1%|          | 60/5400 [00:00<00:28, 188.10it/s]

torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])


  2%|▏         | 101/5400 [00:00<00:27, 193.17it/s]

torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])


  3%|▎         | 147/5400 [00:00<00:26, 201.54it/s]

torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])





KeyboardInterrupt: 

In [33]:
output = []
true = []
MNIST_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in MNIST_net(X.cuda()):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
MNIST_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.9845
Validation F1 Score: 0.9845070912248806
Validation AUC Score: 0.9914172027862561


### CIFAR-10

In [26]:
train = datasets.CIFAR10("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.CIFAR10("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
print(train[0][0].shape)
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=BATCH_SIZE, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified
torch.Size([3, 32, 32])


In [11]:
CIFAR10_net = CNN(32, 3, 10).cuda()
optimizer = optim.Adam(CIFAR10_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        CIFAR10_net.zero_grad()
        output = CIFAR10_net(X.cuda())
        loss = CRITERION(output, y.cuda())
        loss.backward()
        optimizer.step()
    print(loss)

100%|██████████| 5000/5000 [00:31<00:00, 158.24it/s]
  0%|          | 15/5000 [00:00<00:35, 141.43it/s]

tensor(1.6725, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [00:30<00:00, 165.40it/s]
  0%|          | 16/5000 [00:00<00:31, 156.23it/s]

tensor(0.9463, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [00:31<00:00, 159.27it/s]

tensor(0.9590, device='cuda:0', grad_fn=<NllLossBackward0>)





In [12]:
output = []
true = []
CIFAR10_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in CIFAR10_net(X.cuda()):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
CIFAR10_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.6664
Validation F1 Score: 0.6557049535746451
Validation AUC Score: 0.815043988147967


### Iyer

In [84]:
iyer = np.loadtxt(open("347data/iyer.txt", "rb"), delimiter="\t")
features = iyer[:, 2:]
labels = iyer[:, 1]

# X = np.column_stack([np.roll(X[0,], i, axis=0) for i in range(X.shape[1])])
# X = torch.Tensor(np.array([torch.Tensor(np.roll(X[j,], i, axis=0)) for j in range(X.shape[0]) for i in range(X.shape[1])]))
print(features[0].shape)
X = []
data = []
for i in range(features.shape[0]):
    element = []
    element.append(np.column_stack([np.roll(features[i,], j, axis=0) for j in range(features.shape[1])]))
    element.append(labels[i])
    data.append(element)
# X = torch.Tensor(np.array(X))
# print(X.shape)
print(data)
data = np.array(data)
print(data.shape)
data = data[torch.randperm(len(data))]
print(data.shape)
print(data)
# print(X.shape)
# one_hot_y = torch.Tensor(np.array([np.eye(12)[int(i)] for i in labels]))
test_set_size = int(len(data) * 0.1)
training_set_size = len(data) - test_set_size
validation_set_size = int(training_set_size * 0.1)
training_set_size -= validation_set_size
# data = torch.Tensor(np.array([[X[i], one_hot_y[i]] for i in range(len(X))]))
# data = [X, one_hot_y]
# training = []  
# test = []
# validation = []
# for i in range(len(data[0])):
#     if i < training_set_size:
#         training.append(data[0])
#     elif i < training_set_size + validation_set_size:
#         validation.append(data[0])
#     else:
#         test.append(data[0])
# training_set = torch.Tensor(np.array(training))
# print(training_set.shape)
# print(training_set[0])
# validation_set = torch.Tensor(np.array(validation))
# test_set = torch.Tensor(np.array(test))
# train_set = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
# validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
# test_set = torch.utils.data.DataLoader(tensor_test, batch_size=BATCH_SIZE, shuffle=True)

(12,)
[[array([[1.  , 0.52, 0.66, 0.5 , 0.28, 0.49, 0.39, 0.66, 1.08, 0.57, 0.1 ,
        0.72],
       [0.72, 1.  , 0.52, 0.66, 0.5 , 0.28, 0.49, 0.39, 0.66, 1.08, 0.57,
        0.1 ],
       [0.1 , 0.72, 1.  , 0.52, 0.66, 0.5 , 0.28, 0.49, 0.39, 0.66, 1.08,
        0.57],
       [0.57, 0.1 , 0.72, 1.  , 0.52, 0.66, 0.5 , 0.28, 0.49, 0.39, 0.66,
        1.08],
       [1.08, 0.57, 0.1 , 0.72, 1.  , 0.52, 0.66, 0.5 , 0.28, 0.49, 0.39,
        0.66],
       [0.66, 1.08, 0.57, 0.1 , 0.72, 1.  , 0.52, 0.66, 0.5 , 0.28, 0.49,
        0.39],
       [0.39, 0.66, 1.08, 0.57, 0.1 , 0.72, 1.  , 0.52, 0.66, 0.5 , 0.28,
        0.49],
       [0.49, 0.39, 0.66, 1.08, 0.57, 0.1 , 0.72, 1.  , 0.52, 0.66, 0.5 ,
        0.28],
       [0.28, 0.49, 0.39, 0.66, 1.08, 0.57, 0.1 , 0.72, 1.  , 0.52, 0.66,
        0.5 ],
       [0.5 , 0.28, 0.49, 0.39, 0.66, 1.08, 0.57, 0.1 , 0.72, 1.  , 0.52,
        0.66],
       [0.66, 0.5 , 0.28, 0.49, 0.39, 0.66, 1.08, 0.57, 0.1 , 0.72, 1.  ,
        0.52],
       [0.52,

In [59]:
Iyer_net = CNN(12, 1, 12).cuda()
optimizer = optim.Adam(Iyer_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for i in tqdm(range(0, training_set_size, BATCH_SIZE)):
        # print(test_set.shape)
        train_X = X[i:i+BATCH_SIZE]
        print(train_X.shape)
        train_y = torch.Tensor(labels[i:i+BATCH_SIZE])
        print(train_y.shape)
        # print(X.shape, y.shape)
        Iyer_net.zero_grad()
        output = Iyer_net(train_X.cuda())
        print("output", output.shape)
        print("train_y", train_y.shape)
        print("train_y", train_y)
        loss = CRITERION(output, train_y.cuda())
        loss.backward()
        optimizer.step()
    print(loss)
    

  0%|          | 0/42 [00:00<?, ?it/s]

torch.Size([10, 1, 12, 12])
torch.Size([10])
output torch.Size([10, 12])
train_y torch.Size([10])
train_y tensor([-1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])





RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'