In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm
import numpy as np
import sklearn.metrics as metrics

## Hyperparameters and Model Architecture

In [39]:
WEIGHT_DECAY = 0.0
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001
BATCH_SIZE = 10
CRITERION = F.nll_loss
EPOCHS = 3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [40]:
class CNN(nn.Module):
    def __init__(self, y: int, z: int, output: int):
        super(CNN, self).__init__() 
        self.dropout = nn.Dropout(DROPOUT_RATE)
        self.conv1 = nn.Conv2d(z, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        x = torch.randn(z, y, y).view(-1, z, y, y)
        self.to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self.to_linear, 512)
        self.fc2 = nn.Linear(512, output)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        if x.shape[2] > 1 and x.shape[3] > 1:
            x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        if self.to_linear is None:
            self.to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x

    def forward(self, x):
        x = F.pad(x, (0, 0, 0, 0))
        x = self.convs(x)
        x = x.view(-1, self.to_linear)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

### MNIST

In [50]:
train = datasets.MNIST("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)

In [51]:
MNIST_net = CNN(28, 1, 10).to(device)
optimizer = optim.Adam(MNIST_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for epoch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        MNIST_net.zero_grad()
        output = MNIST_net(X.to(device))
        loss = CRITERION(output, y.to(device))
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1} Loss: {loss.item()}")

100%|██████████| 5400/5400 [00:52<00:00, 102.80it/s]
  0%|          | 12/5400 [00:00<00:45, 118.36it/s]

tensor(0.4466, grad_fn=<NllLossBackward0>)


100%|██████████| 5400/5400 [00:50<00:00, 106.62it/s]
  0%|          | 14/5400 [00:00<00:40, 132.71it/s]

tensor(0.0001, grad_fn=<NllLossBackward0>)


100%|██████████| 5400/5400 [00:51<00:00, 105.34it/s]

tensor(0.2072, grad_fn=<NllLossBackward0>)





In [62]:
output = []
true = []
MNIST_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in MNIST_net(X.to(device)):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
MNIST_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.984
Validation F1 Score: 0.9839195689358654
(6000, 10) (6000, 10)
Validation AUC Score: 0.9910829480538382


### CIFAR-10

In [40]:
train = datasets.CIFAR10("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.CIFAR10("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=BATCH_SIZE, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [34]:
CIFAR10_net = CNN(32, 3, 10).to(device)
optimizer = optim.Adam(CIFAR10_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for epoch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        CIFAR10_net.zero_grad()
        output = CIFAR10_net(X.to(device))
        loss = CRITERION(output, y.to(device))
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1} Loss: {loss.item()}")

100%|██████████| 5000/5000 [01:26<00:00, 58.07it/s]
  0%|          | 7/5000 [00:00<01:32, 54.01it/s]

tensor(1.3031, grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [01:35<00:00, 52.48it/s]
  0%|          | 8/5000 [00:00<01:04, 77.37it/s]

tensor(0.9414, grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [01:32<00:00, 53.96it/s]

tensor(1.0281, grad_fn=<NllLossBackward0>)





In [42]:
output = []
true = []
CIFAR10_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in CIFAR10_net(X.to(device)):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
CIFAR10_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.6732
Validation F1 Score: 0.6775597070102387
Validation AUC Score: 0.8184548189215338


### Iyer

In [48]:
iyer = np.loadtxt(open("347data/iyer.txt", "rb"), delimiter="\t")
features = iyer[:, 2:].astype(float)
labels = iyer[:, 1].astype(int)

data = [] #data0
for i in range(features.shape[0]):
    stack = np.array([])
    stack = np.column_stack([np.roll(features[i,], j, axis=0) for j in range(features.shape[1])]).astype(np.float32)
    data.append([stack, np.array(labels[i] + 1, dtype=int)])

data = np.array(data)
np.random.shuffle(data)
X = torch.tensor(np.array([i[0] for i in data])).view(-1, 1, 12, 12)
y = torch.tensor(np.array([i[1] for i in data]))

test_set_size = int(X.shape[0] * 0.1)
training_set_size = X.shape[0] - test_set_size
validation_set_size = int(training_set_size * 0.1)
training_set_size -= validation_set_size
print(training_set_size, validation_set_size, test_set_size)
train_X = X[:training_set_size]
train_y = y[:training_set_size]
validation_X = X[training_set_size:training_set_size + validation_set_size]
validation_y = y[training_set_size:training_set_size + validation_set_size]
test_X = X[training_set_size + validation_set_size:]
test_y = y[training_set_size + validation_set_size:]

420 46 51


In [49]:
Iyer_net = CNN(12, 1, 12).to(device)
optimizer = optim.Adam(Iyer_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for epoch in range(EPOCHS):
    for i in tqdm(range(0, training_set_size, BATCH_SIZE)):
        batch_X = train_X[i:i+BATCH_SIZE]
        batch_y = train_y[i:i + BATCH_SIZE]
        
        Iyer_net.zero_grad()
        output = Iyer_net(batch_X.to(device))
        loss = CRITERION(output, batch_y.to(device))
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1} Loss: {loss.item()}")
        

100%|██████████| 42/42 [00:00<00:00, 117.18it/s]
 50%|█████     | 21/42 [00:00<00:00, 202.43it/s]

tensor(1.7292, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 42/42 [00:00<00:00, 174.14it/s]
 29%|██▊       | 12/42 [00:00<00:00, 116.92it/s]

tensor(1.0667, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 42/42 [00:00<00:00, 116.99it/s]

tensor(0.9123, device='cuda:0', grad_fn=<NllLossBackward0>)





In [53]:
output = []
true = []
Iyer_net.eval()
with torch.no_grad():
    for i in range(validation_set_size):
        output.append(torch.argmax(Iyer_net(validation_X[i].view(-1, 1, 12, 12).to(device))).cpu())
        true.append(validation_y[i])
Iyer_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
labels = [0 for _ in range(0, 12)]
for i in true:
    labels[i] += 1
for i, l in enumerate(labels):
    if l == 0:
        true.append(i) 
        output.append(i)
labels = [0 for _ in range(0, 12)]
for i in output:
    labels[i] += 1
for i, l in enumerate(labels):
    if l == 0:
        true.append(i) 
        output.append(i)
true = np.eye(12)[true]
output = np.eye(12)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))

Validation Accuracy: 0.7608695652173914
Validation F1 Score: 0.48860106025881433
Validation AUC Score: 0.851966943727425
