In [2]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import log_loss
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split

# MNIST

In [3]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from copy import deepcopy
from torch.utils.data import TensorDataset, DataLoader
import torchvision.datasets as dsets

In [4]:
# Load train set
train = dsets.MNIST('../data', train=True, download=True)
imgs = train.data.reshape(-1, 784) / 255.0
labels = train.targets

# Shuffle and split into train and val
inds = torch.randperm(len(train))
imgs = imgs[inds]
labels = labels[inds]
val, Y_val = imgs[:6000], labels[:6000]
train, Y_train = imgs[6000:], labels[6000:]

# Load test set
test = dsets.MNIST('../data', train=False, download=True)
test, Y_test = test.data.reshape(-1, 784) / 255.0, test.targets

# Move test data to numpy
test_np = test.cpu().data.numpy()
Y_test_np = Y_test.cpu().data.numpy()

In [5]:
def train_model(train, Y_train, val, Y_val):
    # Create model
    device = torch.device('cuda', 0)
    model = nn.Sequential(
        nn.Linear(train.shape[1], 32),
        nn.ELU(),
        nn.Linear(32, 10)).to(device)

    # Training parameters
    lr = 1e-3
    mbsize = 64
    max_nepochs = 250
    loss_fn = nn.CrossEntropyLoss()
    lookback = 5
    verbose = False

    # Move to GPU
    train = train.to(device)
    val = val.to(device)
    # test = test.to(device)
    Y_train = Y_train.to(device)
    Y_val = Y_val.to(device)
    # Y_test = Y_test.to(device)

    # Data loader
    train_set = TensorDataset(train, Y_train)
    train_loader = DataLoader(train_set, batch_size=mbsize, shuffle=True)

    # Setup
    optimizer = optim.Adam(model.parameters(), lr=lr)
    min_criterion = np.inf
    min_epoch = 0

    # Train
    for epoch in range(max_nepochs):
        for x, y in train_loader:
            # Move to device.
            x = x.to(device=device)
            y = y.to(device=device)

            # Take gradient step.
            loss = loss_fn(model(x), y)
            loss.backward()
            optimizer.step()
            model.zero_grad()

        # Check progress.
        with torch.no_grad():
            # Calculate validation loss.
            val_loss = loss_fn(model(val), Y_val).item()
            if verbose:
                print('{}Epoch = {}{}'.format('-' * 10, epoch + 1, '-' * 10))
                print('Val loss = {:.4f}'.format(val_loss))

            # Check convergence criterion.
            if val_loss < min_criterion:
                min_criterion = val_loss
                min_epoch = epoch
                best_model = deepcopy(model)
            elif (epoch - min_epoch) == lookback:
                if verbose:
                    print('Stopping early')
                break

    # Keep best model
    model = best_model
    return model


In [6]:
p = [np.mean(Y_train.data.numpy() == i) for i in range(10)]
pred = np.array(p)[np.newaxis].repeat(len(Y_test), 0)
base_loss = log_loss(Y_test_np, pred)

In [7]:
device = torch.device('cuda', 0)
scores = np.zeros(train.shape[1])

for i in range(715,train.shape[1]):
    # Subsample data
    train_small = train[:, i:i+1]
    val_small = val[:, i:i+1]
    test_small = test[:, i:i+1]
    
    # Train model
    model = train_model(train_small, Y_train, val_small, Y_val)
    
    # Loss
    loss = log_loss(
        Y_test_np,
        model(test_small.to(device)).softmax(dim=1).cpu().data.numpy())
    scores[i] = base_loss - loss
    print('Done with {} (score = {:.4f})'.format(i, scores[i]))

Done with 715 (score = 0.0940)
Done with 716 (score = 0.0758)
Done with 717 (score = 0.0553)
Done with 718 (score = 0.0350)
Done with 719 (score = 0.0190)
Done with 720 (score = 0.0085)
Done with 721 (score = 0.0043)
Done with 722 (score = 0.0013)
Done with 723 (score = 0.0001)
Done with 724 (score = -0.0011)
Done with 725 (score = -0.0015)
Done with 726 (score = -0.0005)
Done with 727 (score = -0.0015)
Done with 728 (score = -0.0010)
Done with 729 (score = -0.0008)
Done with 730 (score = -0.0003)
Done with 731 (score = -0.0003)
Done with 732 (score = -0.0003)
Done with 733 (score = 0.0014)
Done with 734 (score = 0.0040)
Done with 735 (score = 0.0113)
Done with 736 (score = 0.0177)
Done with 737 (score = 0.0330)
Done with 738 (score = 0.0434)
Done with 739 (score = 0.0570)
Done with 740 (score = 0.0661)
Done with 741 (score = 0.0712)
Done with 742 (score = 0.0651)
Done with 743 (score = 0.0484)
Done with 744 (score = 0.0327)
Done with 745 (score = 0.0244)
Done with 746 (score = 0.0145)

In [23]:
with open('results/mnist univariate.pkl', 'wb') as f:
    pickle.dump(scores, f)