**Installing the datarobot package**

In [36]:
pip install datarobot # installing datarobot package


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


**Other Important Imports**

In [4]:
import datarobot as dr #  importing datarobot package
import datetime      #  importing datetime package
import pandas as pd   #  importing pandas package

**Configure the Python Client**

In [37]:
# Initialization with arguments
dr.Client(token='NjJiMDFjZDUzMmJhZTYwMmI1MmFmNzFjOldIam95R0ZDNnhoajhFZ2RCRGJBWTdXS1BjOU95TGFXMklub3E4NnovZmc9', endpoint='https://app2.datarobot.com/api/v2')


<datarobot.rest.RESTClientObject at 0x7fd4b2019350>

In [38]:
dr.Client(config_path = "/content/~.configdatarobotdrconfig.yaml")



<datarobot.rest.RESTClientObject at 0x7fd4b2019350>

**Create the Project**

In [39]:
filename = '/content/10K_Lending_Club_Loans.csv'
now = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M')
project_name = '/content/10K_Lending_Club_Loans_{}'.format(now)
proj = dr.Project.create(sourcedata=filename, project_name=project_name)
print('Project ID: {}'.format(proj.id))


Project ID: 62b0a364c94547156d001147


**Select Features for Modeling**

In [24]:
raw = [feat_list for feat_list in proj.get_featurelists()
       if feat_list.name == 'Raw Features'][0]
raw_features = [
    {
        "name": feat,
        "type": dr.Feature.get(proj.id, feat).feature_type
    }
    for feat in raw.features
]
pd.DataFrame.from_dict(raw_features)

Unnamed: 0,name,type
0,loan_amnt,Numeric
1,funded_amnt,Numeric
2,term,Categorical
3,int_rate,Percentage
4,installment,Numeric
5,grade,Categorical
6,sub_grade,Categorical
7,emp_title,Text
8,emp_length,Categorical
9,home_ownership,Categorical


**Modify Feature Types**

In [40]:
proj.create_type_transform_feature(
    "delinq_2yrs(Cat)",  # new feature name
    "delinq_2yrs",       # parent name
    dr.enums.VARIABLE_TYPE_TRANSFORM.CATEGORICAL_INT
)

Feature(delinq_2yrs(Cat))

In [41]:
proj.create_type_transform_feature(
    "addr_state(Text)",  # new feature name
    "addr_state",        # parent name
    dr.enums.VARIABLE_TYPE_TRANSFORM.TEXT
)

Feature(addr_state(Text))

**Select Features for Modeling**

In [42]:
feature_list_name = "new_feature_list"

new_feature_list = proj.create_featurelist(
    feature_list_name,
    list((set(raw.features) - {"addr_state", "delinq_2yrs"}) |
         {"delinq_2yrs(Cat)", "delinq_2yrs(Cat)"})
)

**Run the Automated Modeling Process**

In [28]:
proj.set_target(
    "is_bad",
    mode=dr.enums.AUTOPILOT_MODE.FULL_AUTO,
    metric="LogLoss",
    featurelist_id=new_feature_list.id,
    worker_count=-1
)

proj.wait_for_autopilot()

In progress: 20, queued: 20 (waited: 0s)
In progress: 20, queued: 20 (waited: 0s)
In progress: 20, queued: 20 (waited: 1s)
In progress: 20, queued: 20 (waited: 2s)
In progress: 20, queued: 20 (waited: 3s)
In progress: 20, queued: 20 (waited: 5s)
In progress: 20, queued: 20 (waited: 8s)
In progress: 13, queued: 19 (waited: 15s)
In progress: 19, queued: 5 (waited: 28s)
In progress: 14, queued: 0 (waited: 48s)
In progress: 6, queued: 0 (waited: 69s)
In progress: 3, queued: 0 (waited: 89s)
In progress: 3, queued: 0 (waited: 109s)
In progress: 3, queued: 0 (waited: 130s)
In progress: 0, queued: 0 (waited: 150s)
In progress: 20, queued: 0 (waited: 170s)
In progress: 12, queued: 0 (waited: 190s)
In progress: 8, queued: 0 (waited: 211s)
In progress: 7, queued: 0 (waited: 231s)
In progress: 7, queued: 0 (waited: 251s)
In progress: 7, queued: 0 (waited: 272s)
In progress: 7, queued: 0 (waited: 292s)
In progress: 3, queued: 0 (waited: 312s)
In progress: 1, queued: 0 (waited: 332s)
In progress: 12

**Exploring Trained Models**

In [33]:
models = proj.get_models()
for idx, model in enumerate(models):
    print('[{}]: {} - {}'.
          format(idx, model.metrics['LogLoss']['validation'],
                 model.model_type))

[0]: 0.36535 - Light Gradient Boosted Trees Classifier with Early Stopping
[1]: 0.36608 - Light Gradient Boosted Trees Classifier with Early Stopping
[2]: 0.36676 - Advanced AVG Blender
[3]: 0.36686 - AVG Blender
[4]: 0.36687 - eXtreme Gradient Boosted Trees Classifier with Early Stopping
[5]: 0.36718 - eXtreme Gradient Boosted Trees Classifier with Early Stopping
[6]: 0.36737 - eXtreme Gradient Boosted Trees Classifier with Early Stopping
[7]: 0.36769 - Light Gradient Boosted Trees Classifier with Early Stopping
[8]: 0.36807 - Light Gradient Boosted Trees Classifier with Early Stopping
[9]: 0.36874 - Gradient Boosted Trees Classifier with Early Stopping
[10]: 0.3688 - Light Gradient Boosted Trees Classifier with Early Stopping
[11]: 0.36937 - Gradient Boosted Trees Classifier with Early Stopping
[12]: 0.36954 - eXtreme Gradient Boosted Trees Classifier with Early Stopping
[13]: 0.37001 - eXtreme Gradient Boosted Trees Classifier with Early Stopping and Unsupervised Learning Features
[

In [34]:
dataset = proj.upload_dataset(filename)

model = dr.ModelRecommendation.get(
    proj.id,
    dr.enums.RECOMMENDED_MODEL_TYPE.RECOMMENDED_FOR_DEPLOYMENT
).get_model()

pred_job = model.request_predictions(dataset.id)
preds = pred_job.get_result_when_complete()

**Results**

In [35]:
preds.head()

Unnamed: 0,row_id,prediction,positive_probability,prediction_threshold,class_0.0,class_1.0
0,0,0.0,0.044376,0.5,0.955624,0.044376
1,1,0.0,0.271786,0.5,0.728214,0.271786
2,2,0.0,0.041263,0.5,0.958737,0.041263
3,3,0.0,0.11053,0.5,0.88947,0.11053
4,4,0.0,0.027711,0.5,0.972289,0.027711
