In [1]:
# imports
from Utils import DataLoader
from interpret.glassbox import (LogisticRegression,
                                ClassificationTree,
                                ExplainableBoostingClassifier)
from interpret import show
from sklearn.metrics import f1_score, accuracy_score

In [2]:
data_loader = DataLoader()
data_loader.load_dataset()
data_loader.preprocess_data()

In [3]:
#split the data for evaluation
X_train,X_test,y_train,y_test = data_loader.get_data_split()
print(X_train.shape)
print(X_test.shape)


(4088, 21)
(1022, 21)


In [4]:
#oversample the data
X_train, y_train = data_loader.oversample(X_train,y_train)
print(X_train.shape)
print(y_train.shape)

(7778, 21)
(7778,)


In [5]:
# training the dataset
lr = LogisticRegression(random_state=2024, feature_names=X_train.columns, penalty = "l1", solver ='liblinear')
lr.fit(X_train,y_train)
print('training completed')


training completed


In [6]:
#testing the dataset and checking accuracy
y_pred = lr.predict(X_test)
print(f"F1_score   {f1_score(y_test,y_pred,average='macro')}")
print(f"Accurancy {accuracy_score(y_test, y_pred)}")

F1_score   0.5193734309253563
Accurancy 0.7397260273972602


In [7]:
#explain local prediction
lr_local = lr.explain_local(X_test[:10],y_test[:10],name='logistic Regression')
show(lr_local)

In [8]:
lr_global = lr.explain_global(name='Logistic Regression')
show(lr_global)

In [9]:
#Training model with ebm
ebm = ExplainableBoostingClassifier(random_state=2024)
ebm.fit(X_train,y_train)
print('training succeeded')
y_pred_ebm = ebm.predict(X_test)
print(f"F1 score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

training succeeded
F1 score 0.5193734309253563
Accuracy 0.7397260273972602


In [10]:
ebm_local = ebm.explain_local(X_test[:10],y_test[:10],name='Explaination boosting classifier')
show(ebm_local)

In [11]:
ebm_global = ebm.explain_global(name='explaianable boosting machine')
show(ebm_global)

In [12]:
tree = ClassificationTree()
tree.fit(X_train, y_train)
print('training completed')
tree_pred = tree.predict(X_test)
print(f"F1 score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

training completed
F1 score 0.5193734309253563
Accuracy 0.7397260273972602


In [13]:
tree_exp_local = tree.explain_local(X_test[:10],y_test[:10],name='Decision tree classifier')
show(tree_exp_local)

In [14]:
tree_exp_global = tree.explain_global(name = 'Decision tree classification')
show(tree_exp_global)