In [1]:
import pandas as pd
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression


from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer


In [2]:
credit = pd.read_csv("german_credit_prepared.csv", sep=",", engine="python")

input_types = {'account_check_status':"category", 
               'duration_in_month':"numeric",
               'credit_history':"category",
               'purpose':"category",
               'credit_amount':"numeric",
               'savings':"category",
               'present_emp_since':"category",
               'installment_as_income_perc':"numeric",
               'sex':"category",
               'personal_status':"category",
               'other_debtors':"category",
               'present_res_since':"numeric",
               'property':"category",
               'age':"numeric",
               'other_installment_plans':"category",
               'housing':"category",
               'credits_this_bank':"numeric",
               'job':"category",
               'people_under_maintenance':"numeric",
               'telephone':"category",
               'foreign_worker':"category"}


##Learning process

In [3]:
columns_to_scale = [key for key in input_types.keys() if input_types[key]=="numeric"]

numeric_transformer = Pipeline([('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])


columns_to_encode = [key for key in input_types.keys() if input_types[key]=="category"]

categorical_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore',sparse=False)) ])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, columns_to_scale),
      ('cat', categorical_transformer, columns_to_encode)
          ]
)
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression(max_iter =1000))])


Y=credit['default']
X= credit.drop(columns="default")
X_train,X_test,Y_train,Y_test = model_selection.train_test_split(X, Y, test_size=0.20,random_state = 30, stratify = Y)

In [4]:
clf.fit(X_train, Y_train)
print("model score: %.3f" % clf.score(X_test, Y_test))

model score: 0.785


In [5]:
from sklearn.metrics import confusion_matrix
Y_test_pred = clf.predict(X_test)
confusion_matrix = confusion_matrix(Y_test, Y_test_pred)
print(confusion_matrix)
total=sum(sum(confusion_matrix))
sensitivity_recall = confusion_matrix[0,0]/(confusion_matrix[0,0]+confusion_matrix[1,0])
print('Sensitivity_recall : ',sensitivity_recall )
Specificity = confusion_matrix[1,1]/(confusion_matrix[1,1]+confusion_matrix[0,1])
print('Specificity: ', Specificity)
precision = confusion_matrix[0,0]/(confusion_matrix[0,0]+confusion_matrix[0,1])
print('Precision: ', precision)
accuracy =(confusion_matrix[0,0]+confusion_matrix[1,1])/(confusion_matrix[0,0]+confusion_matrix[0,1]+confusion_matrix[1,0]+confusion_matrix[1,1])
print('Accuracy: ', accuracy)

[[125  15]
 [ 28  32]]
Sensitivity_recall :  0.8169934640522876
Specificity:  0.6808510638297872
Precision:  0.8928571428571429
Accuracy:  0.785


##Import in Giskard

In [6]:
#!pip install ai-inspector

In [7]:
from ai_inspector import ModelInspector

inspector = ModelInspector(
    prediction_function= clf.predict_proba,
    prediction_task="classification",
    input_types=input_types,
    classification_labels=["Not default","Default"],
)

### Uploading model and dataset

In [8]:
# Model and dataset can be uploaded either using a programmatic way:

inspector.upload_model_and_df(
#     url="http://localhost:19000", # When running a non-dockerized notebook
    url="http://frontend",
    project_key="Demo project",
    target_column="default",
    model_name="German credit scoring",
    df=credit,
    api_token="XXX"
)[0]

# Or through a small UI widget:
# inspector.inspect(credit)

'OK'