In [5]:
# packages
import sys
import os
import math
import torch
import random
import pickle
import calendar
import numpy as np
import config
import training 
import matplotlib.pyplot as plt
import evaluation as E
from torch.utils.data import TensorDataset, DataLoader
os.environ['KMP_DUPLICATE_LIB_OK']='True'
torch.set_default_tensor_type(torch.FloatTensor)

In [21]:
datasets = os.listdir('./dataset')
datasets = [f for f in datasets if (f.startswith('Dataset') and f.endswith('.p'))]
datasets.sort()
datasets

['Dataset_Pendigits.p',
 'Dataset_acuteinflammation.p',
 'Dataset_balancescale.p',
 'Dataset_breastcancerwisc.p',
 'Dataset_cardiotocography3clases.p',
 'Dataset_energyy1.p',
 'Dataset_energyy2.p',
 'Dataset_iris.p',
 'Dataset_mammographic.p',
 'Dataset_seeds.p',
 'Dataset_tictactoe.p',
 'Dataset_vertebralcolumn2clases.p',
 'Dataset_vertebralcolumn3clases.p']

In [54]:
seeds = [1,2,3,4,5,6,7,8,9,10]
test_epsilons = [0, 0.05, 0.1, 0.2]

# non-variation aware (nominal) training

In [55]:
train_epsilon = 0

In [56]:
results = torch.zeros([13, 10, len(test_epsilons), 2])
results.shape

torch.Size([13, 10, 4, 2])

In [None]:
for e, test_epsilon in enumerate(test_epsilons):
    for d, dataset in enumerate(datasets):
        datapath = os.path.join(f'./dataset/{dataset}')
        with open(datapath, 'rb') as f:
            data = pickle.load(f)
        X_train    = data['X_train']
        y_train    = data['y_train']
        X_valid    = data['X_valid']
        y_valid    = data['y_valid']
        X_test     = data['X_test']
        y_test     = data['y_test']
        data_name  = data['name']

        N_class    = data['n_class']
        N_feature  = data['n_feature']
        N_train    = X_train.shape[0]
        N_valid    = X_valid.shape[0]
        N_test     = X_test.shape[0]

        # generate tensordataset
        trainset = TensorDataset(X_train, y_train)
        validset = TensorDataset(X_valid, y_valid)
        testset  = TensorDataset(X_test, y_test)

        # batch
        train_loader = DataLoader(trainset, batch_size=N_train)
        valid_loader = DataLoader(validset, batch_size=N_valid)
        test_loader  = DataLoader(testset,  batch_size=N_test)

        for s, seed in enumerate(seeds):
            setup = f'dataset:{data_name}_epsilon:{train_epsilon}_seed:{seed}'
            model = torch.load(f'./results_variation/pNN_{setup}')
            model.SetParameter('N', config.N_test)
            model.SetParameter('epsilon', test_epsilon)
            acc, std = E.BASIC_variation(model, X_test, y_test)
            results[d,s,e,0], results[d,s,e,1] = acc, std

In [None]:
results = torch.mean(results, dim=[1])
results.shape

In [None]:
for e, test_epsilon in enumerate(test_epsilons):
    print(f'Under {test_epsilon*100}% production error:')
    print(np.round(results[:,e,0].numpy(),2) ,'\n')

# 5% variation aware training

In [87]:
train_epsilon = 0.05

In [88]:
results = torch.zeros([13, 10, len(test_epsilons), 2])
results.shape

torch.Size([13, 10, 4, 2])

In [89]:
for e, test_epsilon in enumerate(test_epsilons):
    for d, dataset in enumerate(datasets):
        datapath = os.path.join(f'./dataset/{dataset}')
        with open(datapath, 'rb') as f:
            data = pickle.load(f)
        X_train    = data['X_train']
        y_train    = data['y_train']
        X_valid    = data['X_valid']
        y_valid    = data['y_valid']
        X_test     = data['X_test']
        y_test     = data['y_test']
        data_name  = data['name']

        N_class    = data['n_class']
        N_feature  = data['n_feature']
        N_train    = X_train.shape[0]
        N_valid    = X_valid.shape[0]
        N_test     = X_test.shape[0]

        # generate tensordataset
        trainset = TensorDataset(X_train, y_train)
        validset = TensorDataset(X_valid, y_valid)
        testset  = TensorDataset(X_test, y_test)

        # batch
        train_loader = DataLoader(trainset, batch_size=N_train)
        valid_loader = DataLoader(validset, batch_size=N_valid)
        test_loader  = DataLoader(testset,  batch_size=N_test)

        for s, seed in enumerate(seeds):
            setup = f'dataset:{data_name}_epsilon:{train_epsilon}_seed:{seed}'
            model = torch.load(f'./results_variation/pNN_{setup}')
            model.SetParameter('N', config.N_test)
            model.SetParameter('epsilon', test_epsilon)
            acc, std = E.BASIC_variation(model, X_test, y_test)
            results[d,s,e,0], results[d,s,e,1] = acc, std

In [90]:
results = torch.mean(results, dim=[1])
results.shape

torch.Size([13, 4, 2])

In [91]:
for e, test_epsilon in enumerate(test_epsilons):
    print(f'Under {test_epsilon*100}% production error:')
    print(np.round(results[:,e,0].numpy(),2) ,'\n')

Under 0% production error:
[0.57 1.   0.81 0.97 0.84 0.94 0.89 0.93 0.85 0.74 0.99 0.72 0.67] 

Under 5.0% production error:
[0.54 1.   0.8  0.97 0.84 0.92 0.89 0.93 0.85 0.74 0.93 0.72 0.67] 

Under 10.0% production error:
[0.5  0.99 0.79 0.97 0.84 0.92 0.88 0.88 0.84 0.72 0.83 0.72 0.66] 

Under 20.0% production error:
[0.42 0.94 0.77 0.96 0.83 0.9  0.86 0.77 0.77 0.7  0.7  0.72 0.65] 



# 10% variation aware training

In [92]:
train_epsilon = 0.1

In [93]:
results = torch.zeros([13, 10, len(test_epsilons), 2])
results.shape

torch.Size([13, 10, 4, 2])

In [94]:
for e, test_epsilon in enumerate(test_epsilons):
    for d, dataset in enumerate(datasets):
        datapath = os.path.join(f'./dataset/{dataset}')
        with open(datapath, 'rb') as f:
            data = pickle.load(f)
        X_train    = data['X_train']
        y_train    = data['y_train']
        X_valid    = data['X_valid']
        y_valid    = data['y_valid']
        X_test     = data['X_test']
        y_test     = data['y_test']
        data_name  = data['name']

        N_class    = data['n_class']
        N_feature  = data['n_feature']
        N_train    = X_train.shape[0]
        N_valid    = X_valid.shape[0]
        N_test     = X_test.shape[0]

        # generate tensordataset
        trainset = TensorDataset(X_train, y_train)
        validset = TensorDataset(X_valid, y_valid)
        testset  = TensorDataset(X_test, y_test)

        # batch
        train_loader = DataLoader(trainset, batch_size=N_train)
        valid_loader = DataLoader(validset, batch_size=N_valid)
        test_loader  = DataLoader(testset,  batch_size=N_test)

        for s, seed in enumerate(seeds):
            setup = f'dataset:{data_name}_epsilon:{train_epsilon}_seed:{seed}'
            model = torch.load(f'./results_variation/pNN_{setup}')
            model.SetParameter('N', config.N_test)
            model.SetParameter('epsilon', test_epsilon)
            acc, std = E.BASIC_variation(model, X_test, y_test)
            results[d,s,e,0], results[d,s,e,1] = acc, std

In [95]:
results = torch.mean(results, dim=[1])
results.shape

torch.Size([13, 4, 2])

In [96]:
for e, test_epsilon in enumerate(test_epsilons):
    print(f'Under {test_epsilon*100}% production error:')
    print(np.round(results[:,e,0].numpy(),2) ,'\n')

Under 0% production error:
[0.45 1.   0.8  0.95 0.83 0.92 0.88 0.92 0.85 0.8  0.7  0.73 0.69] 

Under 5.0% production error:
[0.45 1.   0.8  0.95 0.83 0.92 0.88 0.92 0.84 0.8  0.71 0.73 0.68] 

Under 10.0% production error:
[0.44 1.   0.79 0.95 0.82 0.92 0.88 0.9  0.83 0.79 0.72 0.73 0.68] 

Under 20.0% production error:
[0.4  0.97 0.76 0.95 0.82 0.91 0.87 0.83 0.78 0.76 0.7  0.73 0.67] 

