<h2>Creating model for <i>risk default</i> probability assessment

In [None]:
import pandas as pd

df_data = pd.read_csv('/project_data/data_asset/german_credit_data_complete.csv')
df_data.head()

<h4>Explore data</h4>

In [None]:
print("Number of records: " + str((df_data.shape[0])))

<h4> Create a model </h4>

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
df_data["CheckingStatus"] = LabelEncoder().fit_transform(df_data["CheckingStatus"])
df_data["CreditHistory"] = LabelEncoder().fit_transform(df_data["CreditHistory"])
df_data["LoanPurpose"] = LabelEncoder().fit_transform(df_data["LoanPurpose"])
df_data["ExistingSavings"] = LabelEncoder().fit_transform(df_data["ExistingSavings"])
df_data["EmploymentDuration"] = LabelEncoder().fit_transform(df_data["EmploymentDuration"])
df_data["Sex"] = LabelEncoder().fit_transform(df_data["Sex"])
df_data["OthersOnLoan"] = LabelEncoder().fit_transform(df_data["OthersOnLoan"])
df_data["OwnsProperty"] = LabelEncoder().fit_transform(df_data["OwnsProperty"])
df_data["InstallmentPlans"] = LabelEncoder().fit_transform(df_data["InstallmentPlans"])
df_data["Housing"] = LabelEncoder().fit_transform(df_data["Housing"])
df_data["Job"] = LabelEncoder().fit_transform(df_data["Job"])
df_data["Telephone"] = LabelEncoder().fit_transform(df_data["Telephone"])
df_data["ForeignWorker"] = LabelEncoder().fit_transform(df_data["ForeignWorker"])
df_data["Risk"] = LabelEncoder().fit_transform(df_data["Risk"])

In [None]:
(train_data, test_data) = train_test_split(df_data, test_size = 0.2, stratify = df_data["Risk"])

print("Number of records for training: " + str(train_data.shape[0]))
print("Number of records for evaluation: " + str(test_data.shape[0]))

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators = 100)
rf.fit(train_data.loc[:, train_data.columns != "Risk"], train_data["Risk"])

In [None]:
from sklearn.metrics import roc_auc_score

predictions = rf.predict(test_data.loc[:,test_data.columns != "Risk"])
area_under_curve = roc_auc_score(test_data["Risk"], predictions)

print("areaUnderROC = %g" % area_under_curve)

<h4>Publish the model as asset</h4>

In [None]:
!pip install watson-machine-learning-client-V4

In [None]:
import sys,os,os.path
token = os.environ['USER_ACCESS_TOKEN']

wml_credentials = {
"token": token,
"instance_id" : "wml_local",
"url": "https://mlpattern.184.170.232.151.nip.io",
"version": "2.5.0"
}

In [None]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

wml_client = WatsonMachineLearningAPIClient( wml_credentials )

In [None]:
MODEL_NAME = "[DEMO] Credit Risk RF"

In [None]:
def guid_from_space_name(client, space_name):
    instance_details = client.service_instance.get_details()
    space = client.spaces.get_details()
    return(next(item for item in space['resources'] if item['entity']["name"] == space_name)['metadata']['guid'])

In [None]:
space_uid = guid_from_space_name(wml_client, 'wsl_jupyterlab_demo')

In [None]:
meta_props={
    client.repository.ModelMetaNames.NAME: "Credit Risk Scikit",
    client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.20-py3.6",
    client.repository.ModelMetaNames.TYPE: "scikit-learn_0.20",     
    client.repository.ModelMetaNames.SPACE_UID: space_uid
}

In [None]:
model_artifact = client.repository.store_model(rf,
                                                meta_props=meta_props,
                                                training_data=train_data.loc[:, train_data.columns != "Risk"],
                                                training_target=train_data["Risk"])
model_uid = client.repository.get_model_uid(model_artifact)
print("Model UID = " + model_uid)

In [None]:
model_details = client.repository.get_details(model_uid)
from pprint import pprint
pprint(model_details)