# Simulation on the semi-synthetic background-MNIST data

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
import mechanismlearn as ml
semiSyn_data_dir = r"../test_data/simu_data/semi_synthetic_data/frontdoor_data/"

### Read datasets

In [2]:
X_train_conf = pd.read_csv(semiSyn_data_dir + "X_train_conf.csv")
Y_train_conf = pd.read_csv(semiSyn_data_dir + "Y_train_conf.csv")
Z_train_conf = pd.read_csv(semiSyn_data_dir + "Z_train_conf.csv")
X_train_conf = np.array(X_train_conf)
Y_train_conf = np.array(Y_train_conf).reshape(-1,1)
Z_train_conf = np.array(Z_train_conf).reshape(-1,1)

X_test_unconf = pd.read_csv(semiSyn_data_dir + "X_test_unconf.csv")
Y_test_unconf = pd.read_csv(semiSyn_data_dir + "Y_test_unconf.csv")
X_test_unconf = np.array(X_test_unconf)
Y_test_unconf = np.array(Y_test_unconf).reshape(-1,1)

X_test_conf = pd.read_csv(semiSyn_data_dir + "X_test_conf.csv")
Y_test_conf = pd.read_csv(semiSyn_data_dir + "Y_test_conf.csv")
X_test_conf = np.array(X_test_conf)
Y_test_conf = np.array(Y_test_conf).reshape(-1,1)

### Train a deconfounded KNN using mechanism learning

In [3]:
clf_deconf, deconf_data = ml.mechanism_classifier(cause_data = {"Y": Y_train_conf}, 
                                                  mechanism_data = {"Z": Z_train_conf},
                                                  effect_data = {"X": X_train_conf}, 
                                                  n_bins = [0,0],
                                                  ml_model = KNeighborsClassifier(n_neighbors = 5), 
                                                  rebalance = False, 
                                                  n_samples = None, 
                                                  cb_mode = "fast",
                                                  output_data = True)

### Train a confounded KNN classifier

In [4]:
clf_conf = KNeighborsClassifier(n_neighbors = 5)
clf_conf = clf_conf.fit(X_train_conf, Y_train_conf.reshape(-1))

### Model performance comparison

In [5]:
print("Test on the unconfounded test set:")
y_pred_deconf_unconf = clf_deconf.predict(X_test_unconf)
print("Report of de-confonded model:")
print(classification_report(Y_test_unconf, y_pred_deconf_unconf))

print("-"*20)
y_pred_conf_unconf = clf_conf.predict(X_test_unconf)
print("Report of confonded model:")
print(classification_report(Y_test_unconf, y_pred_conf_unconf))

print("*"*30)
print("Test on the confounded test set:")
y_pred_deconf_conf = clf_deconf.predict(X_test_conf)
print("Report of de-confonded model:")
print(classification_report(Y_test_conf, y_pred_deconf_conf))

print("-"*20)
y_pred_conf_conf = clf_conf.predict(X_test_conf)
print("Report of confonded model:")
print(classification_report(Y_test_conf, y_pred_conf_conf))

Test on the unconfounded test set:
Report of de-confonded model:
              precision    recall  f1-score   support

           1       0.94      0.92      0.93       298
           2       0.92      0.94      0.93       300

    accuracy                           0.93       598
   macro avg       0.93      0.93      0.93       598
weighted avg       0.93      0.93      0.93       598

--------------------
Report of confonded model:
              precision    recall  f1-score   support

           1       0.79      0.65      0.71       298
           2       0.70      0.83      0.76       300

    accuracy                           0.74       598
   macro avg       0.75      0.74      0.74       598
weighted avg       0.75      0.74      0.74       598

******************************
Test on the confounded test set:
Report of de-confonded model:
              precision    recall  f1-score   support

           1       0.94      0.93      0.93       312
           2       0.92      0