In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import zipfile

!pip install plotly
import plotly.express as px

!pip install dataprep
from dataprep.eda import create_report

!pip install h2o
import h2o
from h2o.automl import H2OAutoML

In [None]:
cancer_df1 = pd.read_csv("C:\\Users\\Lelin\\Downloads\\New\\cervical_cancer.csv")

In [None]:
cancer_df1.head(10)

In [None]:
cancer_df1.info()

In [None]:
cancer_df1.describe()

In [None]:
cancer_df1 = cancer_df1.replace('?', np.nan)
cancer_df1

In [None]:
cancer_df1.isnull().sum()

In [None]:
plt.figure(figsize = (10, 10))
sns.heatmap(cancer_df1.isnull(), yticklabels=False)

In [None]:
cancer_df1 = cancer_df1.apply(pd.to_numeric)
cancer_df1.info()

In [None]:
cancer_df1.describe()

In [None]:
cancer_df1.mean()

In [None]:
cancer_df1 = cancer_df1.fillna(round(cancer_df1.mean()))
cancer_df1.head()

In [None]:
h2o.init()

In [None]:
h2o_df = h2o.H2OFrame(cancer_df1)

h2o_df

In [None]:
h2o_df.describe()

In [None]:
train, test = h2o_df.split_frame(ratios=[0.75], seed=121)

In [None]:
X = train.columns
y = 'ca_cervix'
X.remove(y)

In [None]:
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

In [None]:
X

In [None]:
y

In [None]:
aml = H2OAutoML(balance_classes=True,
                stopping_metric='AUC',    
                seed=121,
                max_models=20,
                exclude_algos = ["StackedEnsemble"])

aml.train(x=X, 
          y=y, 
          training_frame=train)

In [None]:
lb = aml.leaderboard
lb.head(rows=lb.nrows)

In [None]:
exm = aml.explain(test)

In [None]:
exm = aml.leader.explain(test)

In [None]:
preds = aml.leader.predict(test)

In [None]:
df = test.cbind(preds)

df

In [None]:
df = h2o.as_list(df)

In [None]:
preds = df[['ca_cervix', 'predict']]
preds

In [None]:
df.to_csv('df1.csv')
preds.to_csv('preds1.csv')

In [None]:
!pip install lime
import lime
import lime.lime_tabular

In [None]:
feature_names = train.columns[0: -1]
feature_names

In [None]:
train_features_numpy = train[feature_names].as_data_frame().values
train_features_numpy

In [None]:
explainer = lime.lime_tabular.LimeTabularExplainer(train_features_numpy, 
                                                   feature_names = feature_names,
                                                   mode='classification')

In [None]:
def findPrediction(instance):
    #instance will be in type of numpy
    df = pd.DataFrame(data = instance, columns = feature_names)
    hf = h2o.H2OFrame(df)
    predictions = aml.leader.predict(hf).as_data_frame()
    predictions = predictions.iloc[:,1:].values
    return predictions

In [None]:
train

In [None]:
test

In [None]:
idx = 10
test_df = test.as_data_frame()
test_numpy = test_df.iloc[idx].values[0:-1]

In [None]:
print(test_df.shape)

In [None]:
test_numpy

In [None]:
exp = explainer.explain_instance(test_numpy, 
                                 findPrediction, 
                                 num_features = len(feature_names))
exp

In [None]:
exp.show_in_notebook(show_table=True, show_all=True)

In [None]:
exp.save_to_file('lime11.html')

In [None]:
idx = 12
test_df = test.as_data_frame()
test_numpy = test_df.iloc[idx].values[0:-1]

exp = explainer.explain_instance(test_numpy, 
                                 findPrediction, 
                                 num_features = len(feature_names))

exp.show_in_notebook(show_table=True, show_all=True)

In [None]:
exp.save_to_file('lime12.html')

In [None]:
idx = 15
test_df = test.as_data_frame()
test_numpy = test_df.iloc[idx].values[0:-1]

exp = explainer.explain_instance(test_numpy, 
                                 findPrediction, 
                                 num_features = len(feature_names))

exp.show_in_notebook(show_table=True, show_all=True)

In [None]:
exp.save_to_file('lime13.html')