In [2]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import log_loss
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split

# MNIST

In [3]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from copy import deepcopy
from torch.utils.data import TensorDataset, DataLoader
import torchvision.datasets as dsets

In [4]:
# Load train set
train = dsets.MNIST('../data', train=True, download=True)
imgs = train.data.reshape(-1, 784) / 255.0
labels = train.targets

# Shuffle and split into train and val
inds = torch.randperm(len(train))
imgs = imgs[inds]
labels = labels[inds]
val, Y_val = imgs[:6000], labels[:6000]
train, Y_train = imgs[6000:], labels[6000:]

# Load test set
test = dsets.MNIST('../data', train=False, download=True)
test, Y_test = test.data.reshape(-1, 784) / 255.0, test.targets

# Move test data to numpy
test_np = test.cpu().data.numpy()
Y_test_np = Y_test.cpu().data.numpy()

In [5]:
def train_model(train, Y_train, val, Y_val):
    # Create model
    device = torch.device('cuda', 0)
    model = nn.Sequential(
        nn.Linear(train.shape[1], 256),
        nn.ELU(),
        nn.Linear(256, 256),
        nn.ELU(),
        nn.Linear(256, 10)).to(device)

    # Training parameters
    lr = 1e-3
    mbsize = 64
    max_nepochs = 250
    loss_fn = nn.CrossEntropyLoss()
    lookback = 5
    verbose = False

    # Move to GPU
    train = train.to(device)
    val = val.to(device)
    # test = test.to(device)
    Y_train = Y_train.to(device)
    Y_val = Y_val.to(device)
    # Y_test = Y_test.to(device)

    # Data loader
    train_set = TensorDataset(train, Y_train)
    train_loader = DataLoader(train_set, batch_size=mbsize, shuffle=True)

    # Setup
    optimizer = optim.Adam(model.parameters(), lr=lr)
    min_criterion = np.inf
    min_epoch = 0

    # Train
    for epoch in range(max_nepochs):
        for x, y in train_loader:
            # Move to device.
            x = x.to(device=device)
            y = y.to(device=device)

            # Take gradient step.
            loss = loss_fn(model(x), y)
            loss.backward()
            optimizer.step()
            model.zero_grad()

        # Check progress.
        with torch.no_grad():
            # Calculate validation loss.
            val_loss = loss_fn(model(val), Y_val).item()
            if verbose:
                print('{}Epoch = {}{}'.format('-' * 10, epoch + 1, '-' * 10))
                print('Val loss = {:.4f}'.format(val_loss))

            # Check convergence criterion.
            if val_loss < min_criterion:
                min_criterion = val_loss
                min_epoch = epoch
                best_model = deepcopy(model)
            elif (epoch - min_epoch) == lookback:
                if verbose:
                    print('Stopping early')
                break

    # Keep best model
    model = best_model
    return model


In [6]:
device = torch.device('cuda', 0)
model = torch.load('trained_models/mnist mlp.pt').to(device)

In [7]:
base_loss = log_loss(Y_test_np, model(test.to(device)).softmax(dim=1).cpu().data.numpy())

In [8]:
base_loss

0.08093452205247045

In [9]:
scores = np.zeros(train.shape[1])
print(train.shape[1])
for i in range(765,train.shape[1]):
    # Subsample data
    inds = np.ones(train.shape[1], dtype=bool)
    inds[i] = False
    train_small = train[:, inds]
    val_small = val[:, inds]
    test_small = test[:, inds]
    
    # Train model
    model = train_model(train_small, Y_train, val_small, Y_val)
    
    # Loss
    loss = log_loss(
        Y_test_np,
        model(test_small.to(device)).softmax(dim=1).cpu().data.numpy())
    scores[i] = loss - base_loss
    print('Done with {} (score = {:.4f})'.format(i, scores[i]))

784
Done with 765 (score = 0.0049)
Done with 766 (score = -0.0090)
Done with 767 (score = -0.0097)
Done with 768 (score = 0.0014)
Done with 769 (score = 0.0028)
Done with 770 (score = -0.0017)
Done with 771 (score = 0.0060)
Done with 772 (score = 0.0025)
Done with 773 (score = -0.0051)
Done with 774 (score = 0.0039)
Done with 775 (score = -0.0017)
Done with 776 (score = 0.0002)
Done with 777 (score = -0.0046)
Done with 778 (score = -0.0021)
Done with 779 (score = -0.0071)
Done with 780 (score = -0.0094)
Done with 781 (score = -0.0111)
Done with 782 (score = 0.0036)
Done with 783 (score = 0.0004)


In [None]:
with open('C:/Users/hp/Desktop/feature ablation.txt','r') as file:
    mylist=file.readlines()
    print(mylist)
    for i in range(784):
        scores[i]=eval(mylist[i][mylist[i].index('=')+2:-2])
        print(i,scores[i])
    print(scores)

In [None]:
with open('results/mnist feature_ablation.pkl', 'wb') as f:
    pickle.dump(scores, f)