In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm
import numpy as np
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

## Hyperparameters and Model Architecture

In [6]:
WEIGHT_DECAY = 0.0
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001
BATCH_SIZE = 10
CRITERION = F.nll_loss
EPOCHS = 3

device = "cpu" # torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class CNN(nn.Module):
    def __init__(self, y: int, z: int, output: int):
        super(CNN, self).__init__() 
        self.dropout = nn.Dropout(DROPOUT_RATE)
        self.conv1 = nn.Conv2d(z, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        x = torch.randn(z, y, y).view(-1, z, y, y)
        self.to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self.to_linear, 512)
        self.fc2 = nn.Linear(512, output)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        if x.shape[2] > 1 and x.shape[3] > 1:
            x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        if self.to_linear is None:
            self.to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x

    def forward(self, x):
        x = F.pad(x, (0, 0, 0, 0))
        x = self.convs(x)
        x = x.view(-1, self.to_linear)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

### MNIST

In [12]:
train = datasets.MNIST("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)

In [8]:
MNIST_net = CNN(28, 1, 10).to(device)
optimizer = optim.Adam(MNIST_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        MNIST_net.zero_grad()
        output = MNIST_net(X.to(device))
        loss = CRITERION(output, y.to(device))
        loss.backward()
        optimizer.step()
    print(loss)

100%|██████████| 5400/5400 [01:00<00:00, 89.60it/s] 
  0%|          | 15/5400 [00:00<00:37, 142.33it/s]

tensor(0.0699, grad_fn=<NllLossBackward0>)


100%|██████████| 5400/5400 [01:02<00:00, 86.71it/s] 
  0%|          | 15/5400 [00:00<00:36, 145.54it/s]

tensor(0.0188, grad_fn=<NllLossBackward0>)


100%|██████████| 5400/5400 [00:58<00:00, 93.03it/s] 

tensor(0.0015, grad_fn=<NllLossBackward0>)





In [13]:
output = []
true = []
MNIST_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in MNIST_net(X.to(device)):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
MNIST_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.9828333333333333
Validation F1 Score: 0.982945776735584
Validation AUC Score: 0.9906334510264463


### CIFAR-10

In [10]:
train = datasets.CIFAR10("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.CIFAR10("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=BATCH_SIZE, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
CIFAR10_net = CNN(32, 3, 10).to(device)
optimizer = optim.Adam(CIFAR10_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        CIFAR10_net.zero_grad()
        output = CIFAR10_net(X.to(device))
        loss = CRITERION(output, y.to(device))
        loss.backward()
        optimizer.step()
    print(loss)

100%|██████████| 5000/5000 [00:31<00:00, 158.24it/s]
  0%|          | 15/5000 [00:00<00:35, 141.43it/s]

tensor(1.6725, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [00:30<00:00, 165.40it/s]
  0%|          | 16/5000 [00:00<00:31, 156.23it/s]

tensor(0.9463, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [00:31<00:00, 159.27it/s]

tensor(0.9590, device='cuda:0', grad_fn=<NllLossBackward0>)





In [12]:
output = []
true = []
CIFAR10_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in CIFAR10_net(X.to(device)):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
CIFAR10_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.6664
Validation F1 Score: 0.6557049535746451
Validation AUC Score: 0.815043988147967


### Iyer

In [14]:
iyer = np.loadtxt(open("347data/iyer.txt", "rb"), delimiter="\t")
features = iyer[:, 2:].astype(float)
labels = iyer[:, 1].astype(int)

data = [] #data0
for i in range(features.shape[0]):
    stack = np.array([])
    stack = np.column_stack([np.roll(features[i,], j, axis=0) for j in range(features.shape[1])]).astype(np.float32)
    data.append([stack, np.array(labels[i] + 1, dtype=int)])

data = np.array(data)
np.random.shuffle(data)
X = torch.tensor(np.array([i[0] for i in data])).view(-1, 1, 12, 12)
y = torch.tensor(np.array([i[1] for i in data]))

test_set_size = int(X.shape[0] * 0.1)
training_set_size = X.shape[0] - test_set_size
validation_set_size = int(training_set_size * 0.1)
training_set_size -= validation_set_size
print(training_set_size, validation_set_size, test_set_size)
train_X = X[:training_set_size]
print(train_X.shape)
train_y = y[:training_set_size]
validation_X = X[training_set_size:training_set_size + validation_set_size]
validation_y = y[training_set_size:training_set_size + validation_set_size]
test_X = X[training_set_size + validation_set_size:]
test_y = y[training_set_size + validation_set_size:]


420 46 51
torch.Size([420, 1, 12, 12])


In [20]:
Iyer_net = CNN(12, 1, 12).to(device)
optimizer = optim.Adam(Iyer_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for i in tqdm(range(0, training_set_size, BATCH_SIZE)):
        batch_X = train_X[i:i+BATCH_SIZE]
        batch_y = train_y[i:i + BATCH_SIZE]
        
        Iyer_net.zero_grad()
        output = Iyer_net(batch_X.to(device))
        loss = CRITERION(output, batch_y.to(device))
        loss.backward()
        optimizer.step()
    print(loss)
    

100%|██████████| 42/42 [00:00<00:00, 99.31it/s]
 19%|█▉        | 8/42 [00:00<00:00, 63.45it/s]

tensor(2.0682, grad_fn=<NllLossBackward0>)


100%|██████████| 42/42 [00:00<00:00, 69.65it/s]
 31%|███       | 13/42 [00:00<00:00, 81.96it/s]

tensor(1.7472, grad_fn=<NllLossBackward0>)


100%|██████████| 42/42 [00:00<00:00, 76.45it/s]

tensor(1.8556, grad_fn=<NllLossBackward0>)





In [26]:
output = []
true = []
Iyer_net.eval()
with torch.no_grad():
    for i in range(validation_X.shape[0]):
        output.append(torch.argmax(Iyer_net(validation_X[i].view(-1, 1, 12, 12))).cpu())
        true.append(validation_y[i])
Iyer_net.train()
print(output)
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(12)[true]
output = np.eye(12)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))

[tensor(3), tensor(7), tensor(6), tensor(5), tensor(3), tensor(11), tensor(9), tensor(7), tensor(2), tensor(3), tensor(3), tensor(4), tensor(3), tensor(3), tensor(2), tensor(9), tensor(3), tensor(7), tensor(3), tensor(7), tensor(3), tensor(4), tensor(4), tensor(3), tensor(3), tensor(2), tensor(3), tensor(9), tensor(2), tensor(7), tensor(7), tensor(3), tensor(3), tensor(2), tensor(3), tensor(5), tensor(3), tensor(3), tensor(3), tensor(4), tensor(3), tensor(2), tensor(4), tensor(6), tensor(6), tensor(3)]
Validation Accuracy: 0.7608695652173914
Validation F1 Score: 0.4928320802005013


ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.