In [2]:
import pandas as pd
import torch
from data.datasets import ClinicalCovid
import numpy as np
from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabnet.pretraining import TabNetPretrainer
from augmentations import ClassificationSMOTE
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tabnet_utils import regularized_loss
import pickle

In [3]:
normalization = False

In [4]:
initial_drop_list = ['ID', 'cough', 'sputum', 'chills', 'Sore throat',
                     'dizziness', 'stomachache', 'Diarrhea', 'Nausea',
                     'runny nose', 'Nasal congestion', 'alcohol']

train_set = ClinicalCovid('data/all_final.csv', drop_list=initial_drop_list)

train_dropped_list = train_set.drop_missing_columns(threshold=0.5)
train_set.fill_missing_data(n_dset=1, return_dset=0, iters=5, n_tree=50, print_kernel=False)
# train_set.drop_missing_row()
if normalization:
    train_set.normalize()

Initiating data imputation
Creating Kernel
Initiating data iteration


In [5]:
x_train, x_test, y_train, y_test = train_set.data_split(test_ratio=0.2, random_state=101)

In [6]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)
model.fit(x_train, y_train)
score = model.score(x_test, y_test)

In [8]:
import lime
from lime import lime_tabular

cols = train_set.dataframe.columns.to_list()
cols.remove('outcome')

explainer = lime_tabular.LimeTabularExplainer(
    training_data=np.array(x_train),
    feature_names=cols,
    class_names=['discharged', 'expired'],
    mode='classification'
)

In [18]:
for i in range(10):
    exp = explainer.explain_instance(
    data_row=x_test[i], 
    predict_fn=model.predict_proba
    )
    
    exp.save_to_file(f'lime/lime{i}.html')