In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm
import numpy as np
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

## Hyperparameters and Model Architecture

In [51]:
WEIGHT_DECAY = 0.0
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001
BATCH_SIZE = 10
CRITERION = F.nll_loss
EPOCHS = 3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [43]:
class CNN(nn.Module):
    def __init__(self, y: int, z: int, output: int):
        super(CNN, self).__init__() 
        self.dropout = nn.Dropout(DROPOUT_RATE)
        self.conv1 = nn.Conv2d(z, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        x = torch.randn(z, y, y).view(-1, z, y, y)
        self.to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self.to_linear, 512)
        self.fc2 = nn.Linear(512, output)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        if x.shape[2] > 1 and x.shape[3] > 1:
            x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        if self.to_linear is None:
            self.to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x

    def forward(self, x):
        x = F.pad(x, (0, 0, 0, 0))
        x = self.convs(x)
        x = x.view(-1, self.to_linear)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

### MNIST

In [7]:
train = datasets.MNIST("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)

In [8]:
MNIST_net = CNN(28, 1, 10).to(device)
optimizer = optim.Adam(MNIST_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        MNIST_net.zero_grad()
        output = MNIST_net(X.to(device))
        loss = CRITERION(output, y.to(device))
        loss.backward()
        optimizer.step()
    print(loss)

In [33]:
output = []
true = []
MNIST_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in MNIST_net(X.to(device)):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
MNIST_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.9845
Validation F1 Score: 0.9845070912248806
Validation AUC Score: 0.9914172027862561


### CIFAR-10

In [5]:
train = datasets.CIFAR10("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.CIFAR10("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
print(train[0][0].shape)
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=BATCH_SIZE, shuffle=True)

Files already downloaded and verified


KeyboardInterrupt: 

In [11]:
CIFAR10_net = CNN(32, 3, 10).to(device)
optimizer = optim.Adam(CIFAR10_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        CIFAR10_net.zero_grad()
        output = CIFAR10_net(X.to(device))
        loss = CRITERION(output, y.to(device))
        loss.backward()
        optimizer.step()
    print(loss)

100%|██████████| 5000/5000 [00:31<00:00, 158.24it/s]
  0%|          | 15/5000 [00:00<00:35, 141.43it/s]

tensor(1.6725, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [00:30<00:00, 165.40it/s]
  0%|          | 16/5000 [00:00<00:31, 156.23it/s]

tensor(0.9463, device='cuda:0', grad_fn=<NllLossBackward0>)


100%|██████████| 5000/5000 [00:31<00:00, 159.27it/s]

tensor(0.9590, device='cuda:0', grad_fn=<NllLossBackward0>)





In [12]:
output = []
true = []
CIFAR10_net.eval()
with torch.no_grad():
    for data in validation_set:
        X, y = data
        for i in CIFAR10_net(X.to(device)):
            output.append(torch.argmax(i).cpu())
        for i in y:
            true.append(i)
CIFAR10_net.train()
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))
            

Validation Accuracy: 0.6664
Validation F1 Score: 0.6557049535746451
Validation AUC Score: 0.815043988147967


### Iyer

In [62]:
iyer = np.loadtxt(open("347data/iyer.txt", "rb"), delimiter="\t")
features = iyer[:, 2:].astype(float)
labels = iyer[:, 1].astype(int)

data = [] #data0
for i in range(features.shape[0]):
    stack = np.array([])
    stack = np.column_stack([np.roll(features[i,], j, axis=0) for j in range(features.shape[1])]).astype(np.float32)
    data.append([stack, np.array(labels[i] + 1, dtype=int)])

data = np.array(data)
np.random.shuffle(data)
X = torch.tensor(np.array([i[0] for i in data])).view(-1, 1, 12, 12)
y = torch.tensor(np.array([i[1] for i in data]))

test_set_size = int(X.shape[0] * 0.1)
training_set_size = X.shape[0] - test_set_size
validation_set_size = int(training_set_size * 0.1)
training_set_size -= validation_set_size
print(training_set_size, validation_set_size, test_set_size)
train_X = X[:training_set_size]
print(train_X.shape)
train_y = y[:training_set_size]
validation_X = X[training_set_size:training_set_size + validation_set_size]
validation_y = y[training_set_size:training_set_size + validation_set_size]
test_X = X[training_set_size + validation_set_size:]
test_y = y[training_set_size + validation_set_size:]


420 46 51
torch.Size([420, 1, 12, 12])


In [61]:
Iyer_net = CNN(12, 1, 12)
optimizer = optim.Adam(Iyer_net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
for batch in range(EPOCHS):
    for i in tqdm(range(0, training_set_size, BATCH_SIZE)):
        batch_X = train_X[i:i+BATCH_SIZE]
        batch_y = train_y[i:i + BATCH_SIZE]
        
        Iyer_net.zero_grad()
        output = Iyer_net(batch_X)
        print(output)
        loss = CRITERION(output, batch_y)
        loss.backward()
        optimizer.step()
    print(loss)
    

  2%|▏         | 1/42 [00:00<00:07,  5.17it/s]

tensor([[-2.4536, -2.5824, -2.4665, -2.5453, -2.5097, -2.4186, -2.5284, -2.4805,
         -2.4965, -2.4658, -2.4399, -2.4445],
        [-2.4322, -2.5026, -2.5133, -2.5691, -2.4722, -2.5082, -2.5298, -2.5141,
         -2.4829, -2.4897, -2.3864, -2.4320],
        [-2.3922, -2.4655, -2.5493, -2.6359, -2.5715, -2.4023, -2.5976, -2.5111,
         -2.4809, -2.4460, -2.4128, -2.3928],
        [-2.4215, -2.4393, -2.4785, -2.5370, -2.4982, -2.5083, -2.5847, -2.5334,
         -2.4663, -2.4057, -2.4032, -2.5634],
        [-2.4735, -2.5136, -2.5017, -2.5335, -2.4588, -2.4591, -2.5187, -2.4947,
         -2.5052, -2.4486, -2.4374, -2.4792],
        [-2.2489, -2.4547, -2.8261, -2.4858, -2.4436, -2.4134, -2.7700, -2.5208,
         -2.5967, -2.3655, -2.3935, -2.4425],
        [-1.9859, -2.4564, -2.5754, -2.5413, -2.5480, -2.4266, -2.8398, -2.6723,
         -2.7104, -2.4417, -2.3437, -2.5443],
        [-2.4308, -2.5143, -2.4944, -2.5447, -2.4592, -2.4555, -2.5282, -2.4900,
         -2.5057, -2.4434, -2.

  5%|▍         | 2/42 [00:00<00:08,  4.67it/s]

tensor([[-2.4878, -2.5922, -2.5439, -2.4807, -2.4775, -2.4495, -2.6142, -2.5497,
         -2.6442, -2.4316, -2.3173, -2.2956],
        [-2.3102, -2.4775, -2.5230, -2.5057, -2.4903, -2.5295, -2.8761, -2.4760,
         -2.6313, -2.6747, -2.2636, -2.2344],
        [-2.4942, -2.5604, -2.5586, -2.4989, -2.4139, -2.4947, -2.6066, -2.4789,
         -2.5651, -2.4927, -2.3424, -2.3511],
        [-2.4975, -2.5334, -2.4835, -2.4907, -2.4412, -2.4858, -2.5878, -2.4698,
         -2.5139, -2.4536, -2.4502, -2.4224],
        [-2.4643, -2.5950, -2.5561, -2.5336, -2.4291, -2.4995, -2.6237, -2.3634,
         -2.5223, -2.5334, -2.3757, -2.3664],
        [-2.5025, -2.5345, -2.5133, -2.5062, -2.4042, -2.4979, -2.6084, -2.4619,
         -2.5644, -2.4702, -2.3901, -2.3907],
        [-2.5015, -2.5881, -2.5179, -2.5211, -2.4217, -2.4588, -2.5785, -2.4796,
         -2.5608, -2.4861, -2.3599, -2.3751],
        [-2.4844, -2.6443, -2.5829, -2.4167, -2.3463, -2.5431, -2.7267, -2.4225,
         -2.6808, -2.4403, -2.

 17%|█▋        | 7/42 [00:00<00:05,  6.31it/s]

tensor([[-2.5862, -2.6871, -2.4516, -2.3079, -2.4416, -2.3895, -2.7013, -2.3140,
         -2.6830, -2.5504, -2.3938, -2.4234],
        [-2.5462, -2.7149, -2.4397, -2.2925, -2.4444, -2.3763, -2.6862, -2.3038,
         -2.7506, -2.5752, -2.4061, -2.4157],
        [-2.5387, -2.6456, -2.4601, -2.2925, -2.4272, -2.4156, -2.6647, -2.3530,
         -2.6214, -2.6044, -2.4438, -2.4319],
        [-2.5470, -2.7272, -2.4321, -2.2335, -2.4753, -2.3655, -2.8038, -2.3327,
         -2.5753, -2.6425, -2.4279, -2.4070],
        [-2.5517, -2.6318, -2.4761, -2.3461, -2.4244, -2.3847, -2.6622, -2.3630,
         -2.6633, -2.5337, -2.4495, -2.4048],
        [-2.5175, -2.6613, -2.4577, -2.2537, -2.4410, -2.4317, -2.6973, -2.3336,
         -2.6564, -2.6075, -2.4205, -2.4448],
        [-2.5327, -2.7077, -2.4281, -2.2816, -2.4710, -2.3801, -2.6836, -2.3535,
         -2.6615, -2.6046, -2.4295, -2.3936],
        [-2.6007, -2.8859, -2.6171, -2.1197, -2.3814, -2.2695, -2.7578, -2.3218,
         -2.8914, -2.6909, -2.

 48%|████▊     | 20/42 [00:01<00:02,  9.28it/s]

tensor([[-2.7677, -3.1375, -2.5170, -1.8865, -2.5354, -2.3605, -2.9968, -2.1558,
         -2.9179, -2.7867, -2.4036, -2.1535],
        [-2.7590, -3.4756, -2.5300, -1.5044, -2.6348, -2.4770, -3.1377, -2.0996,
         -3.4796, -3.0416, -2.3014, -2.2674],
        [-2.7807, -3.1517, -2.5495, -1.6543, -2.6740, -2.3233, -3.1303, -2.1174,
         -3.1277, -3.0625, -2.3174, -2.2463],
        [-2.6219, -2.7647, -2.3775, -2.1203, -2.4770, -2.4110, -2.7293, -2.3472,
         -2.7169, -2.6331, -2.4313, -2.3959],
        [-2.5898, -2.6861, -2.3708, -2.1788, -2.4429, -2.4511, -2.6742, -2.3515,
         -2.7423, -2.6219, -2.4200, -2.4426],
        [-2.6585, -3.4738, -2.5459, -1.4879, -2.6967, -2.5693, -3.0633, -2.1835,
         -3.2167, -3.0574, -2.5914, -2.0458],
        [-2.7230, -3.0699, -2.4341, -1.8189, -2.5110, -2.3606, -2.8867, -2.3131,
         -2.9562, -2.6863, -2.4291, -2.3077],
        [-2.5944, -2.7847, -2.3475, -2.1298, -2.4725, -2.4573, -2.7019, -2.3404,
         -2.7468, -2.6110, -2.

 67%|██████▋   | 28/42 [00:01<00:01, 13.05it/s]

tensor([[-14.5389, -34.0313, -10.0790,  -9.6197, -21.2236,  -4.3655, -25.3395,
          -7.4880, -17.4130,  -0.2523, -18.7266,  -1.5625],
        [ -2.5998,  -3.5321,  -2.1630,  -1.9150,  -2.8530,  -2.2050,  -3.1559,
          -2.4523,  -2.7918,  -2.0603,  -2.8823,  -2.3773],
        [ -3.5748,  -6.4196,  -2.7670,  -1.9511,  -4.3971,  -1.8177,  -4.9130,
          -3.0457,  -3.7610,  -0.8525,  -4.2351,  -2.6357],
        [ -2.9635,  -5.1749,  -2.3461,  -1.9179,  -3.6967,  -2.1321,  -4.0304,
          -2.4543,  -3.4661,  -1.3248,  -3.3476,  -2.1144],
        [ -3.0477,  -5.4281,  -2.3157,  -1.9604,  -3.8218,  -2.1696,  -4.0074,
          -2.4868,  -3.4557,  -1.2265,  -3.6183,  -2.1249],
        [ -2.8972,  -4.3929,  -2.3042,  -1.8620,  -3.1844,  -2.0112,  -3.5512,
          -2.5626,  -2.9218,  -1.7297,  -3.1977,  -2.0857],
        [ -2.5720,  -3.4010,  -1.9915,  -1.8752,  -2.8084,  -2.3248,  -3.0791,
          -2.5284,  -2.8910,  -2.1329,  -2.8164,  -2.4923],
        [ -2.5805,  -3.5985

 67%|██████▋   | 28/42 [00:01<00:00, 16.73it/s]


KeyboardInterrupt: 

In [60]:
output = []
true = []
Iyer_net.eval()
with torch.no_grad():
    for i in range(0, validation_X.shape[0], BATCH_SIZE):
        print(torch.argmax(Iyer_net(validation_X[i:i+BATCH_SIZE])).cpu())
        true.append(validation_y[i:i:BATCH_SIZE])
Iyer_net.train()
print(output)
print("Validation Accuracy:", metrics.accuracy_score(true, output))
print("Validation F1 Score:", metrics.f1_score(true, output, average="macro"))
true = np.eye(10)[true]
output = np.eye(10)[output]
print("Validation AUC Score:", metrics.roc_auc_score(true, output, multi_class="ovo", average="macro"))

tensor(105)
tensor(45)
tensor(45)
tensor(41)
tensor(9)
[]


ValueError: Found input variables with inconsistent numbers of samples: [5, 0]