In [37]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer

from sklearn.ensemble import RandomForestClassifier

import xgboost as xgb
import bentoml
import json
from pydantic import BaseModel
import requests

In [3]:
df = pd.read_csv('CreditScoring.csv')

In [4]:
df.columns = df.columns.str.lower()

status_values = {
    1: 'ok',
    2: 'default',
    0: 'unk'
}

df.status = df.status.map(status_values)

home_values = {
    1: 'rent',
    2: 'owner',
    3: 'private',
    4: 'ignore',
    5: 'parents',
    6: 'other',
    0: 'unk'
}

df.home = df.home.map(home_values)

marital_values = {
    1: 'single',
    2: 'married',
    3: 'widow',
    4: 'separated',
    5: 'divorced',
    0: 'unk'
}

df.marital = df.marital.map(marital_values)

records_values = {
    1: 'no',
    2: 'yes',
    0: 'unk'
}

df.records = df.records.map(records_values)

job_values = {
    1: 'fixed',
    2: 'partime',
    3: 'freelance',
    4: 'others',
    0: 'unk'
}

df.job = df.job.map(job_values)

for c in ['income', 'assets', 'debt']:
    df[c] = df[c].replace(to_replace=99999999, value=np.nan)

df = df[df.status != 'unk'].reset_index(drop=True)

In [5]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=11)

df_train = df_train.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

y_train = (df_train.status == 'default').astype('int').values
y_test = (df_test.status == 'default').astype('int').values

del df_train['status']
del df_test['status']

In [6]:
dv = DictVectorizer(sparse=False)

train_dicts = df_train.fillna(0).to_dict(orient='records')
X_train = dv.fit_transform(train_dicts)

test_dicts = df_test.fillna(0).to_dict(orient='records')
X_test = dv.transform(test_dicts)

In [7]:
rf = RandomForestClassifier(n_estimators=200,
                            max_depth=10,
                            min_samples_leaf=3,
                            random_state=1)
rf.fit(X_train, y_train)

In [8]:
dtrain = xgb.DMatrix(X_train, label=y_train)

In [9]:
xgb_params = {
    'eta': 0.1, 
    'max_depth': 3,
    'min_child_weight': 1,

    'objective': 'binary:logistic',
    'eval_metric': 'auc',

    'nthread': 8,
    'seed': 1,
    'verbosity': 1,
}

model = xgb.train(xgb_params, dtrain, num_boost_round=175)

In [10]:
bentoml.xgboost.save_model(
    'credit_risk_model',
    model,
    custom_objects={
        'dictVectorizer': dv
    })

Model(tag="credit_risk_model:dxc2pv2t4ksstwea", path="C:\Users\gh611680\bentoml\models\credit_risk_model\dxc2pv2t4ksstwea\")

In [11]:
request = df_test.iloc[0].to_dict()
print(json.dumps(request, indent=2))

{
  "seniority": 3,
  "home": "owner",
  "time": 36,
  "age": 26,
  "marital": "single",
  "records": "no",
  "job": "freelance",
  "expenses": 35,
  "income": 0.0,
  "assets": 60000.0,
  "debt": 3000.0,
  "amount": 800,
  "price": 1000
}


In [12]:
pydantic

'1.0.7'

In [13]:
risk_model = bentoml.xgboost.save_model(
    'credit_risk_model',
    model,
    custom_objects={
        'dictVectorizer': dv
    })

In [14]:
risk_model.info.to_dict()

{'name': 'credit_risk_model',
 'version': 'enapomct4kvcbwea',
 'module': 'bentoml.xgboost',
 'labels': {},
 'options': {'model_class': 'Booster'},
 'metadata': {},
 'context': {'framework_name': 'xgboost',
  'framework_versions': {'xgboost': '1.6.2'},
  'bentoml_version': '1.0.7',
  'python_version': '3.10.4'},
 'signatures': {'predict': {'batchable': False}},
 'api_version': 'v2',
 'creation_time': '2022-10-24T21:23:43.753783+00:00'}

In [15]:
risk_model.validate

<bound method Model.validate of Model(tag="credit_risk_model:enapomct4kvcbwea", path="C:\Users\gh611680\bentoml\models\credit_risk_model\enapomct4kvcbwea\")>

In [16]:
class UserProfile(BaseModel):
    name : str
    age: int
    country : str
    rating : float

In [17]:
userOk = {
  "name": "Tim",
  "age": 37,
  "country": "US",
  "rating": 3.14
}

In [18]:
# Wrong test data
userNOk = {
  "name": 43,
  "age": "How old?",
  "country":  840,
  "rating": 2
}

In [19]:
try:
    print(f"{UserProfile(**userOk)}")
    print(f">>> OK -correct data")
except Exception as err:
    print(f">>> NOT OK \n Wrong data:  {err}")

name='Tim' age=37 country='US' rating=3.14
>>> OK -correct data


In [20]:
!curl --help

Usage: curl [options...] <url>
 -d, --data <data>          HTTP POST data
 -f, --fail                 Fail silently (no output at all) on HTTP errors
 -h, --help <category>      Get help for commands
 -i, --include              Include protocol response headers in the output
 -o, --output <file>        Write to file instead of stdout
 -O, --remote-name          Write output to a file named as the remote file
 -s, --silent               Silent mode
 -T, --upload-file <file>   Transfer local FILE to destination
 -u, --user <user:password> Server user and password
 -A, --user-agent <name>    Send User-Agent <name> to server
 -v, --verbose              Make the operation more talkative
 -V, --version              Show version number and quit

This is not the full help, this menu is stripped into categories.
Use "--help category" to get an overview of all categories.
For all options use the manual or "--help all".


In [21]:
!curl -O https://s3.us-west-2.amazonaws.com/bentoml.com/mlzoomcamp/coolmodel.bentomodel

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  1724  100  1724    0     0   1591      0  0:00:01  0:00:01 --:--:--  1593


In [22]:
!bentoml models import coolmodel.bentomodel

Error: [models] `import` failed: Item 'mlzoomcamp_homework:qtzdz3slg6mwwdu5' already exists in the store <osfs 'C:\Users\gh611680\bentoml\models'>


In [23]:
!bentoml models list

 Tag                          Module           Size        Creation Time       
 credit_risk_model:enapomctâ€¦  bentoml.xgboost  197.77 KiB  2022-10-24 17:23:43 
 credit_risk_model:dxc2pv2tâ€¦  bentoml.xgboost  197.77 KiB  2022-10-24 17:23:34 
 credit_risk_model:lj5jbr2tâ€¦  bentoml.xgboost  197.77 KiB  2022-10-24 16:28:00 
 credit_risk_model:hrh4nzktâ€¦  bentoml.xgboost  197.77 KiB  2022-10-24 16:27:09 
 credit_risk_model:sggce2ctâ€¦  bentoml.xgboost  197.77 KiB  2022-10-24 10:38:47 
 credit_model:rgg7cp2tu26r3â€¦  bentoml.xgboost  197.06 KiB  2022-10-24 10:17:05 
 credit_model:c6dnh6stu2hmzâ€¦  bentoml.xgboost  197.04 KiB  2022-10-24 10:13:54 
 credit_model:l66rs3ctt2mk7â€¦  bentoml.xgboost  1.13 MiB    2022-10-24 09:18:39 
 mlzoomcamp_homework:jsi67fâ€¦  bentoml.sklearn  5.82 KiB    2022-10-14 10:48:43 
 mlzoomcamp_homework:qtzdz3â€¦  bentoml.sklearn  5.79 KiB    2022-10-13 16:42:14 


In [24]:
!curl -O https://s3.us-west-2.amazonaws.com/bentoml.com/mlzoomcamp/coolmodel.bentomodel


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  1724  100  1724    0     0   1627      0  0:00:01  0:00:01 --:--:--  1629


In [25]:
!bentoml models get mlzoomcamp_homework:qtzdz3slg6mwwdu5

name: mlzoomcamp_homework                                                      
version: qtzdz3slg6mwwdu5                                                      
module: bentoml.sklearn                                                        
labels: {}                                                                     
options: {}                                                                    
metadata: {}                                                                   
context:                                                                       
  framework_name: sklearn                                                      
  framework_versions:                                                          
    scikit-learn: 1.1.1                                                        
  bentoml_version: 1.0.7                                                       
  python_version: 3.9.12                                                       
signatures:                             

In [27]:
!type bentofile.yaml

service: "service.py:service"  
labels:
    owner: bentoml-team
    project: gallery
include:
- "*.py"  
python:
    packages:  
    - scikit-learn
    - pandas
    - xgboost
    - pydantic
    - numpy


In [28]:
!curl -O https://s3.us-west-2.amazonaws.com/bentoml.com/mlzoomcamp/coolmodel2.bentomodel

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  1728  100  1728    0     0   1647      0  0:00:01  0:00:01 --:--:--  1648


In [30]:
info = '[[6.4,3.5,4.5,1.2]]'

response = requests.post("http://localhost:3000/classify", headers={"content-type": "application/json"},
                         data=info,).text

response

'[1]'

In [32]:
!curl -O https://s3.us-west-2.amazonaws.com/bentoml.com/mlzoomcamp/coolmodel2.bentomodel

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  1728  100  1728    0     0   1577      0  0:00:01  0:00:01 --:--:--  1579


In [34]:
!bentoml models import coolmodel2.bentomodel

Error: [models] `import` failed: Item 'mlzoomcamp_homework:jsi67fslz6txydu5' already exists in the store <osfs 'C:\Users\gh611680\bentoml\models'>


In [35]:
request = df_test.iloc[45].to_dict()
print(json.dumps(request, indent=2))

{
  "seniority": 28,
  "home": "owner",
  "time": 36,
  "age": 46,
  "marital": "married",
  "records": "no",
  "job": "fixed",
  "expenses": 75,
  "income": 170.0,
  "assets": 6000.0,
  "debt": 0.0,
  "amount": 750,
  "price": 800
}


In [38]:
pydantic
pydantic

NameError: name 'pydantic' is not defined