In [1]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.utils.data as data_utils
import gzip, os, pickle
from torch.autograd import Variable

In [2]:
TRAIN_SAMPLES = 60000
TRAIN_PATH = "s2_mnist_train_dwr_" + str(TRAIN_SAMPLES) + ".gz"
TEST_PATH = "s2_mnist.gz"
ROOT_PATH = "models"
MODEL_NAME = "baseline.pt"

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

NUM_EPOCHS = 20
BATCH_SIZE = 32
LEARNING_RATE = 5e-4


class ConvNet(nn.Module):

    def __init__(self):
        super().__init__()

        f1 = 32
        f2 = 64

        self.feature_layer = nn.Sequential(
            torch.nn.Conv2d(1, f1, kernel_size=5, stride=3),
            torch.nn.ReLU(),
            torch.nn.Conv2d(f1, f2, kernel_size=5, stride=3),
            torch.nn.ReLU()
        )
        self.out_layer = torch.nn.Linear(f2 * 5**2, 10)

    def forward(self, x):
        x = self.feature_layer(x)
        x = x.view(x.shape[0], -1)
        x = self.out_layer(x)
        return x


def load_data(path, batch_size):

    with gzip.open(path, 'rb') as f:
        dataset = pickle.load(f)

    train_data = torch.from_numpy(
        dataset["train"]["images"][:, None, :, :].astype(np.float32))
    train_labels = torch.from_numpy(
        dataset["train"]["labels"].astype(np.int64))

    # TODO normalize dataset
    # mean = train_data.mean()
    # stdv = train_data.std()

    train_dataset = data_utils.TensorDataset(train_data, train_labels)
    train_loader = data_utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_data = torch.from_numpy(
        dataset["test"]["images"][:, None, :, :].astype(np.float32))
    test_labels = torch.from_numpy(
        dataset["test"]["labels"].astype(np.int64))

    test_dataset = data_utils.TensorDataset(test_data, test_labels)
    test_loader = data_utils.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, test_loader, train_dataset, test_dataset




def load_train_data(path, batch_size):
    
    with gzip.open(path, 'rb') as f:
        dataset = pickle.load(f)
        
    train_data = torch.from_numpy(
        dataset["images"][:, None, :, :].astype(np.float32))
    train_labels = torch.from_numpy(
        dataset["labels"].astype(np.int64))

    train_dataset = data_utils.TensorDataset(train_data, train_labels)
    train_loader = data_utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    return train_loader, train_dataset
    
    
def load_test_data(path, batch_size):
    
    with gzip.open(path, 'rb') as f:
        dataset = pickle.load(f)
        
    test_data = torch.from_numpy(
        dataset["test"]["images"][:, None, :, :].astype(np.float32))
    test_labels = torch.from_numpy(
        dataset["test"]["labels"].astype(np.int64))

    test_dataset = data_utils.TensorDataset(test_data, test_labels)
    test_loader = data_utils.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    return test_loader, test_dataset

In [3]:
train_loader, train_dataset = load_train_data(TRAIN_PATH, BATCH_SIZE)
test_loader, _ = load_test_data(TEST_PATH, BATCH_SIZE)

classifier = ConvNet()
classifier.to(DEVICE)

print("#params", sum([x.numel() for x in classifier.parameters()]))


criterion = nn.CrossEntropyLoss()
criterion = criterion.to(DEVICE)

optimizer = torch.optim.Adam(classifier.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCHS):
    for i, (images, labels) in enumerate(train_loader):
        classifier.train()

        images = images.to(DEVICE)
        labels = labels.to(DEVICE)

        optimizer.zero_grad()
        outputs = classifier(images)
        loss = criterion(outputs, labels)
        print(outputs.shape)
        print(labels.shape)
        print(loss)
        raise RunTimeError()
        loss.backward()

        optimizer.step()

        print('\rEpoch [{0}/{1}], Iter [{2}/{3}] Loss: {4:.4f}'.format(
            epoch+1, NUM_EPOCHS, i+1, len(train_dataset)//BATCH_SIZE,
            loss.item()), end="")
    print("")
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(test_loader):
        classifier.eval()

        with torch.no_grad():
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = classifier(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).long().sum().item()

    print('Test Accuracy: {0}'.format(100 * correct / total))

#params 68106
torch.Size([32, 10])
torch.Size([32])
tensor(11.5136, device='cuda:0', grad_fn=<NllLossBackward>)


NameError: name 'RunTimeError' is not defined

In [None]:
torch.save(classifier.state_dict(), MODEL_NAME)

In [4]:
the_model = ConvNet()
the_model.load_state_dict(torch.load(MODEL_NAME))
the_model.to(DEVICE)

ConvNet(
  (feature_layer): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(3, 3))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(5, 5), stride=(3, 3))
    (3): ReLU()
  )
  (out_layer): Linear(in_features=1600, out_features=10, bias=True)
)

In [None]:
classifier is the_model

In [None]:
classifier == the_model

In [5]:
def test(model, test_loader):
    correct = 0
    total = 0
    
    for i, (images, labels) in enumerate(test_loader):
        model.eval()

        with torch.no_grad():
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).long().sum().item()
            
    print('Test Accuracy: {0}'.format(100 * correct / total))

In [6]:
test(classifier, test_loader)
test(the_model, test_loader)

Test Accuracy: 9.88
Test Accuracy: 62.74


In [9]:
for i, (images, labels) in enumerate(train_loader):
    the_model.eval()

    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    optimizer.zero_grad()
    outputs = the_model(images)
    loss = criterion(outputs, labels)
    print(outputs.shape)
    print(labels.shape)
    print(loss)
    _, predicted = torch.max(outputs, 1)
    total = labels.size(0)
    correct = (predicted == labels).long().sum().item()
    print(100 * correct / total)
    raise RuntimeError()

torch.Size([32, 10])
torch.Size([32])
tensor(0.8878, device='cuda:0', grad_fn=<NllLossBackward>)
68.75


RuntimeError: 

In [10]:
for i, (images, labels) in enumerate(test_loader):
    the_model.eval()

    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    optimizer.zero_grad()
    outputs = the_model(images)
    loss = criterion(outputs, labels)
    print(outputs.shape)
    print(labels.shape)
    print(loss)
    _, predicted = torch.max(outputs, 1)
    total = labels.size(0)
    correct = (predicted == labels).long().sum().item()
    print(100 * correct / total)
    raise RuntimeError()

torch.Size([32, 10])
torch.Size([32])
tensor(1.5239, device='cuda:0', grad_fn=<NllLossBackward>)
40.625


RuntimeError: 

In [None]:
print(classifier.feature_layer)

In [None]:
# TODO: convert this into pytorch lightning model