# Attacker model

Here we will put it all togheter using the generated synthetic data from `data_synthesis_playground.ipynb`

In [1]:
from mblearn import AttackModels, ShadowModels

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

First we need to make a target model (we will use RandomForest with 100 estimators)

## Target model

We are going to use the wine datasetm which have 13 features and 3 classes

In [3]:
rf_target = RandomForestClassifier(n_estimators=100)
data, target = load_wine(return_X_y=True)

In [4]:
scaler = MinMaxScaler()
data_std = scaler.fit_transform(data)

# split to test membership in X_train
X_train, X_test, y_train, y_test = train_test_split(data_std, target, test_size=0.4)

rf_target.fit(X_train, y_train);

## Shadow model

Now train the Shadow models with synthetic data and the same learner.

In [5]:
synth_data = pd.read_csv('synthetic_data.csv')
synth_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,label
0,0.60564,0.918487,0.905298,0.270837,0.925469,0.798383,0.814641,0.115432,0.494172,0.572726,0.923159,0.603152,0.863932,0
1,0.890226,0.555533,0.440264,0.537694,0.228497,0.515491,0.63125,0.677099,0.784332,0.199912,0.322902,0.630714,0.950625,0
2,0.545436,0.630346,0.988858,0.351225,0.699322,0.637016,0.841696,0.828762,0.74394,0.462467,0.574451,0.912272,0.551307,0
3,0.940381,0.912475,0.0085,0.112698,0.501584,0.70728,0.821104,0.564798,0.744142,0.347987,0.778279,0.736786,0.595458,0
4,0.712215,0.452527,0.942117,0.353932,0.265224,0.536412,0.773513,0.98011,0.654703,0.732917,0.932585,0.584428,0.465397,0


In [6]:
rf_shadow =  RandomForestClassifier(n_estimators=100)

In [7]:
sh = ShadowModels(5, synth_data, 3, rf_shadow)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [8]:
shadow_data = sh.results

## Attacker

Now that we have the shadow dataset we can train the attacker model. The attacker learner doesn't need to be the same as the target so pick the one that performs the best.

`AttackModels` trains a model for each original class in shadow data (and in target model) with the in/out of training label as the target label.

In [9]:
rf_attack = RandomForestClassifier(n_estimators=100)

In [10]:
attacker = AttackModels(target_classes=3, attack_learner=rf_attack)

In [13]:
attacker.fit(shadow_data, learner_kwargs={})

now lets test with all `X_train` and `X_test`. 50/50 

In [14]:
train_idx = np.random.choice(np.arange(len(X_train)), len(X_test))
X_train = X_train[train_idx]
y_train = y_train[train_idx]

In [15]:
print(len(X_train), len(X_test))

72 72


In [16]:
X_in = rf_target.predict_proba(X_train)
res_in = attacker.predict(X_in, y_train, batch=True)

In [17]:
X_out = rf_target.predict_proba(X_test)
res_out = attacker.predict(X_out, y_test, batch=True)

## Some metrics

In [18]:
np.sum(np.argmax(res_in, axis=1)) / len(res_in)

0.9305555555555556

In [19]:
1 - np.sum(np.argmax(res_out, axis=1)) / len(res_out)

0.3055555555555556

### Precision, Recall and F-1 
since the class balance is 50/50 a dumb classifier will achieve 0.5 precision, 1 recall and 0.67 f-1

In [20]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [21]:
y_pred = np.concatenate((np.argmax(res_in, axis=1), np.argmax(res_out, axis=1)))
y_true = np.concatenate((np.ones_like(y_train), np.zeros_like(y_test)))

In [22]:
precision_score(y_true, y_pred)

0.5726495726495726

In [23]:
recall_score(y_true, y_pred)

0.9305555555555556

In [24]:
f1_score(y_true, y_pred)

0.708994708994709

Not bad for an out of the box setup