## Load Data

In [48]:
import pandas as pd

# df = pd.read_csv("data/5_sonar.csv")
df = pd.read_csv("data/5_sonar.csv_MACFE_s0.3_d2.csv")

df.head()

Unnamed: 0,minmax_s(attribute_3),minmax_s(attribute_7),minmax_s(attribute_8),minmax_s(attribute_12),minmax_s(attribute_17),minmax_s(attribute_22),minmax_s(attribute_24),minmax_s(attribute_30),minmax_s(attribute_31),minmax_s(attribute_33),...,"minmax_s(*(*(attribute_7,attribute_22),+(attribute_12,attribute_34)))","minmax_s(*(+(attribute_8,attribute_12),+(attribute_12,attribute_34)))","minmax_s(*(+(attribute_12,attribute_34),+(attribute_17,attribute_40)))","minmax_s(*(+(attribute_17,attribute_40),*(attribute_31,attribute_36)))","minmax_s(-(*(attribute_22,attribute_33),*(attribute_31,attribute_36)))","minmax_s(-(*(attribute_24,attribute_33),*(attribute_31,attribute_36)))","minmax_s(+(*(attribute_31,attribute_36),+(attribute_33,attribute_34)))","minmax_s(*(+(attribute_33,attribute_34),+(attribute_34,attribute_40)))","minmax_s(*(+(attribute_34,attribute_40),-(attribute_37,attribute_46)))",class
0,0.135677,0.407468,0.340904,0.197245,0.285048,0.496064,0.544104,0.345584,0.089918,0.487661,...,0.279532,0.259994,0.327763,0.065513,0.702421,0.627092,0.518474,0.373346,0.628076,0
1,0.27201,0.574405,0.755458,0.925557,1.0,0.391882,0.376498,0.15074,0.360327,0.158248,...,0.305034,0.840096,0.683781,0.193665,0.516624,0.423873,0.172586,0.04693,0.38416,0
2,0.35611,0.64881,0.819405,1.0,0.66418,0.666394,0.349247,0.841696,0.875204,0.145437,...,0.630702,1.0,0.721859,0.304377,0.471057,0.335525,0.227609,0.091656,0.562431,0
3,0.199737,0.288149,0.269239,0.257327,0.035644,0.354872,0.47198,0.313838,0.629755,0.592565,...,0.085619,0.152782,0.332937,0.18985,0.611194,0.57504,0.418393,0.348287,0.425214,0
4,0.153088,0.318182,0.531863,0.544549,0.720858,0.41642,0.528634,0.437946,0.503651,0.252441,...,0.098907,0.285133,0.292079,0.212276,0.507043,0.44225,0.176384,0.030943,0.373173,0


In [50]:
from sklearn.model_selection import train_test_split

X = df.drop(['class'], axis = 1).values
y = df['class'].values

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.20,
    shuffle=True,
    random_state=42
    )

## Model

In [51]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

model = LogisticRegression(random_state = 42)
model.fit(X_train, y_train)

# Accuracy
y_pred = model.predict(X_test)
print(f"Acc: {accuracy_score(y_test, y_pred)}")

Acc: 0.9047619047619048


## Black-box Adversarial Attacks

In [52]:
from utils.AdversarialAttacks import ZooAttackEvaluation
from utils.AdversarialAttacks import BoundaryAttackEvaluation
from utils.AdversarialAttacks import HopSkipJumpEvaluation

# Robust Accuracy
rob_acc = ZooAttackEvaluation(model, X_test, y_test)
print(f"Zoo Robust Acc: {rob_acc.mean()} +- {rob_acc.std()}")

# rob_acc = BoundaryAttackEvaluation(model, X_test, y_test)
# print(f"Boundary Robust Acc: {rob_acc.mean()} +- {rob_acc.std()}")

rob_acc = HopSkipJumpEvaluation(model, X_test, y_test)
print(f"HopSkipJump Robust Acc: {rob_acc.mean()} +- {rob_acc.std()}")

Zoo Robust Acc: 0.6714285714285714 +- 0.009523809523809533
HopSkipJump Robust Acc: 0.09523809523809523 +- 0.0


## Causal Feature Selection

In [53]:
from causalnex.structure import DAGClassifier

dag = DAGClassifier(
        alpha=0.01,
        beta=0.5,
        hidden_layer_units=[5],
        fit_intercept=True,
        standardize=True
    )

In [54]:
X.shape

(208, 101)

In [55]:
dag.fit(X, y)

DAGClassifier(alpha=0.01, beta=0.5, hidden_layer_units=[5], standardize=True,
              target_dist_type='bin')

In [56]:
import numpy as np

threshold = .50
_threshold = np.quantile(dag.feature_importances_[0], (1.0 - threshold))
selection_idx = np.where(dag.feature_importances_[0] >= _threshold)[0]
X_selected = X[:, selection_idx]

In [57]:
X_selected.shape

(208, 51)

In [58]:
X_train, X_test, y_train, y_test = train_test_split(
    X_selected,
    y,
    test_size=0.20,
    shuffle=True,
    random_state=42
    )

In [59]:
model = LogisticRegression(random_state = 42)
model.fit(X_train, y_train)

# Accuracy
y_pred = model.predict(X_test)
print(f"Acc: {accuracy_score(y_test, y_pred)}")

Acc: 0.9047619047619048


In [61]:
# Robust Accuracy
rob_acc = ZooAttackEvaluation(model, X_test, y_test)
print(f"Zoo Robust Acc: {rob_acc.mean()} +- {rob_acc.std()}")

# rob_acc = BoundaryAttackEvaluation(model, X_test, y_test)
# print(f"Boundary Robust Acc: {rob_acc.mean()} +- {rob_acc.std()}")

rob_acc = HopSkipJumpEvaluation(model, X_test, y_test)
print(f"HopSkipJump Robust Acc: {rob_acc.mean()} +- {rob_acc.std()}")

Zoo Robust Acc: 0.5095238095238095 +- 0.024281045302822813
HopSkipJump Robust Acc: 0.09523809523809523 +- 0.0


In [None]:
Dataset Original: Zoo Robust Acc: 0.3285714285714285 +- 0.009523809523809513