In [1]:
import os
import pickle
import datetime
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from scipy.spatial.distance import pdist, squareform

import time
import random

In [2]:
from cf_eval.metrics import *

from experiments.config import *
from experiments.util import get_tabular_dataset
from teacher.datasets import load_compas, load_german, load_adult, load_heloc


In [3]:
random.seed(random_state)
np.random.seed(random_state)

In [6]:
DATASETS = {
    'adult': load_adult,
    'compas': load_compas,
    'fico': load_heloc,
    'german': load_german
}

BLACKBOX = {
    'SVM': SVC,
    'NN': MLPClassifier
}

In [11]:
dataset = 'german'
black_box = 'SVM_default_params'
ds = DATASETS[dataset](normalize=True)
X, y = ds['X'], ds['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
bb = pickle.load(open('../models/' + '%s_%s.pickle' % (dataset, black_box), 'rb'))


In [12]:
bb.predict(X_train)

array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [7]:
dataset = 'german'
normalize = 'standard'
black_box = 'NN'

for dataset in ['adult', 'german', 'fico', 'compas']:
    for black_box in ['SVM', 'NN']:
        print(f'{black_box} - {dataset}')
        
        ds = DATASETS[dataset](normalize=True)
        X, y = ds['X'], ds['y']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
        # bb = pickle.load(open(path_models + '%s_%s.pickle' % (dataset, black_box), 'rb'))
        bb = BLACKBOX[black_box]()
        bb.fit(X_train, y_train)
        with open('../models/' + '%s_%s_default_params.pickle' % (dataset, black_box), 'wb') as pickle_file:
            pickle.dump(bb, pickle_file)

SVM - adult
NN - adult
SVM - german
NN - german




SVM - fico
NN - fico




SVM - compas
NN - compas




In [8]:
from sklearn.svm import SVC

In [13]:
bb.fit(X_train, y_train)



In [15]:


b = SVC(C=0.8367296331778482, class_weight='balanced', coef0=0.0653309732588253,
    gamma=0.04229578857212292, kernel='sigmoid', max_iter=1000,
    probability=True, random_state=0)

In [16]:
b.fit(X_train, y_train)
b.score(X_train, y_train)



0.24681357493857495

In [14]:
bb.score(X_train, y_train)

0.24681357493857495

In [10]:
a = SVC()
a.fit(X_train, y_train)

In [11]:
a.score(X_train, y_train)

0.7979499385749386

In [8]:
bb.score(X_train, y_train)

0.24681357493857495