In [None]:
from optimus import Optimus
from optimus.ml import feature as fe
import h2o
op = Optimus()

## Read data

In [None]:
df = op.load.csv("data/train.csv")

## Data cleansing

In [None]:
df = (df.cols.impute("LoanAmount",data_type="continuous")
       .cols.fill_na("Self_Employed", "No")
       .cols.fill_na("*", 0))

In [None]:
df_idx = fe.string_to_index(df, ['Gender','Married','Dependents','Education','Self_Employed','Property_Area'])

## H2O

In [None]:
columns = ['Credit_History','Gender_index','Married_index','Education_index']

In [None]:
df_final, xgboost_model = op.ml.h2o_xgboost(df_idx, "Loan_Status", columns)

In [None]:
df_final.table(5)

In [None]:
xgboost_model

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
plt.style.use('seaborn-pastel')

In [None]:
def plot_importance(model):
    variables = json.loads(model.getModelDetails())["variable_importances"]["data"][0]
    scaled_importance = json.loads(model.getModelDetails())["variable_importances"]["data"][2]
    fig, ax = plt.subplots()
    y_pos = np.arange(len(variables))
    ax.barh(y_pos, scaled_importance, align='center')
    ax.set_yticks(y_pos)
    ax.set_yticklabels(variables)
    ax.invert_yaxis()
    ax.set_xlabel('Scaled Importance')
    ax.set_title('Variable Importance')
    plt.show()

In [None]:
plot_importance(xgboost_model)

## Deep Learning

In [None]:
df_final_dl, dl_model = op.ml.h2o_deeplearning(df_idx, "Loan_Status", columns)

In [None]:
json.loads(dl_model.getModelDetails())["model_summary"]

In [None]:
json.loads(dl_model.getModelDetails())['training_metrics']["AUC"]

In [None]:
plot_importance(dl_model)

### More columns

In [None]:
columns = ['Credit_History','Gender_index','Married_index','Education_index', 'Dependents_index', "LoanAmount", "ApplicantIncome"]

In [None]:
df_final_dl, dl_model = op.ml.h2o_deeplearning(df_idx, "Loan_Status", columns)

In [None]:
json.loads(dl_model.getModelDetails())["model_summary"]

In [None]:
json.loads(dl_model.getModelDetails())['training_metrics']["AUC"]

In [None]:
plot_importance(dl_model)