# Imports

In [10]:
%reset -f
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
from pathlib import Path

from tqdm import tqdm

import pandas as pd

from sklearn.metrics import log_loss 

import pycaret.classification as pc

from controller import Controller

In [12]:
c = Controller('i01')

COLS_TO_DROP = [
    'PROJECT_ID',
    'TRAIN_VAL_TEST_SPLIT',
    'DEADLINE',
    'STATE_CHANGED_AT',
    'CREATED_AT',
    'LAUNCHED_AT',
    'KEYWORDS',
    
    'DESC',
    'NAME',
]

PLOT_TYPES = [
    'pipeline',
    'auc',
    'threshold',
    'pr',
    'confusion_matrix',
    'error',
    'class_report',
    'boundary',
    'learning',
    'calibration',
    'vc',
    'dimension',
    'feature',
    'feature_all',
    'parameter',
    'lift',
    'gain',
    'tree',
    'ks',
]

# Load data

In [13]:
filepath = fr'{c.get_path_data_prepared()}/01_df_development.pkl'
df_development = pd.read_pickle(filepath)
df_development = df_development.drop(COLS_TO_DROP, axis=1)
df_development

Unnamed: 0,GOAL,DISABLE_COMMUNICATION,COUNTRY,CURRENCY,BACKERS_COUNT,FINAL_STATUS,CREATE_LAUNCH_HOURS,CREATE_LAUNCH_HOURS_LOG,CREATE_DEADLINE_HOURS,CREATE_DEADLINE_HOURS_LOG,LAUNCHED_DEADLINE_HOURS
0,60000.0,False,US,USD,4,0,2375.831389,7.773524,3095.831389,8.038135,720.000000
1,800.0,False,US,USD,41,1,119.452500,4.791255,1400.657500,7.245411,1281.205000
2,10000.0,False,US,USD,1,0,6345.470556,8.755654,7185.470556,8.879955,840.000000
3,270.0,False,GB,GBP,0,0,0.920833,0.652759,192.920833,5.267450,192.000000
4,5.0,False,GB,GBP,0,0,823.233611,6.714454,1544.233611,7.342930,721.000000
...,...,...,...,...,...,...,...,...,...,...,...
108124,250.0,False,US,USD,24,1,835.094444,6.728742,1373.173056,7.225607,538.078611
108125,5000.0,False,US,USD,0,0,285.817778,5.658847,1005.817778,6.914550,720.000000
108126,45000.0,False,CA,CAD,36,0,310.938611,5.742806,1030.938611,6.939194,720.000000
108127,3000.0,False,US,USD,0,0,152.635278,5.034581,873.635278,6.773807,721.000000


In [14]:
path_lgbm = fr'{c.get_path_iteration()}/pycaret_best_model'
lgbm_tuned = pc.load_model(path_lgbm)
lgbm_tuned

Transformation Pipeline and Model Successfully Loaded


In [15]:
path_lgbm = fr'{c.get_path_iteration()}/pycaret_best_overall_model'
catboost_tuned = pc.load_model(path_lgbm)
catboost_tuned

Transformation Pipeline and Model Successfully Loaded


# Setup

In [16]:
s1 = pc.setup(data=df_development, train_size=0.8, target='FINAL_STATUS', session_id=42)

pc.remove_metric('Kappa')
pc.remove_metric('Accuracy')
pc.add_metric('logloss', 'Log Loss', log_loss, greater_is_better=False)
pc.get_metrics()

Unnamed: 0,Description,Value
0,Session id,42
1,Target,FINAL_STATUS
2,Target type,Binary
3,Original data shape,"(108129, 11)"
4,Transformed data shape,"(108129, 28)"
5,Transformed train set shape,"(86503, 28)"
6,Transformed test set shape,"(21626, 28)"
7,Numeric features,7
8,Categorical features,2
9,Preprocess,True


Unnamed: 0_level_0,Name,Display Name,Score Function,Scorer,Target,Args,Greater is Better,Multiclass,Custom
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
auc,AUC,AUC,<pycaret.internal.metrics.BinaryMulticlassScor...,"make_scorer(roc_auc_score, needs_proba=True, e...",pred_proba,"{'average': 'weighted', 'multi_class': 'ovr'}",True,True,False
recall,Recall,Recall,<pycaret.internal.metrics.BinaryMulticlassScor...,"make_scorer(recall_score, average=weighted)",pred,{'average': 'weighted'},True,True,False
precision,Precision,Prec.,<pycaret.internal.metrics.BinaryMulticlassScor...,"make_scorer(precision_score, average=weighted)",pred,{'average': 'weighted'},True,True,False
f1,F1,F1,<pycaret.internal.metrics.BinaryMulticlassScor...,"make_scorer(f1_score, average=weighted)",pred,{'average': 'weighted'},True,True,False
mcc,MCC,MCC,<function matthews_corrcoef at 0x7ff6e40f9510>,make_scorer(matthews_corrcoef),pred,{},True,True,False
logloss,Log Loss,Log Loss,<function log_loss at 0x7ff6e40f9cf0>,"make_scorer(log_loss, greater_is_better=False)",pred,{},False,True,True


# Evaluate models

In [17]:
def save_model_plots(location, plots, model):
    len_plots = len(plots)
    destination_dir = Path(location)
    destination_dir.mkdir(parents=True, exist_ok=True)

    for i, plot_type in enumerate(plots):
        print(f'Creating [{i+1}/{len_plots}] {plot_type}')
        
        try:
            plot_path = pc.plot_model(
                model,
                plot=plot_type,
                plot_kwargs={'title': plot_type},
                save=True
            )

            # Move plot to "location".
            if plot_path is not None:
                source_file = Path(plot_path)
                new_filename = f'{plot_type}.png'
                destination_file = destination_dir / new_filename
                source_file.rename(destination_file)
        except Exception as e:
            print(str(e))
            print(f'ERROR: {plot_type} could not be produced!')

In [18]:
dir_lgbm = f'{c.get_path_iteration()}/plots_lgbm_tuned'
save_model_plots(dir_lgbm, PLOT_TYPES, lgbm_tuned)

Creating [2/19] auc


Creating [3/19] threshold


Creating [4/19] pr


Creating [5/19] confusion_matrix


Creating [6/19] error


Creating [7/19] class_report


Creating [8/19] boundary


Creating [9/19] learning


Creating [10/19] calibration


keyword grid_b is not recognized; valid keywords are ['size', 'width', 'color', 'tickdir', 'pad', 'labelsize', 'labelcolor', 'zorder', 'gridOn', 'tick1On', 'tick2On', 'label1On', 'label2On', 'length', 'direction', 'left', 'bottom', 'right', 'top', 'labelleft', 'labelbottom', 'labelright', 'labeltop', 'labelrotation', 'grid_agg_filter', 'grid_alpha', 'grid_animated', 'grid_antialiased', 'grid_clip_box', 'grid_clip_on', 'grid_clip_path', 'grid_color', 'grid_dash_capstyle', 'grid_dash_joinstyle', 'grid_dashes', 'grid_data', 'grid_drawstyle', 'grid_figure', 'grid_fillstyle', 'grid_gapcolor', 'grid_gid', 'grid_in_layout', 'grid_label', 'grid_linestyle', 'grid_linewidth', 'grid_marker', 'grid_markeredgecolor', 'grid_markeredgewidth', 'grid_markerfacecolor', 'grid_markerfacecoloralt', 'grid_markersize', 'grid_markevery', 'grid_mouseover', 'grid_path_effects', 'grid_picker', 'grid_pickradius', 'grid_rasterized', 'grid_sketch_params', 'grid_snap', 'grid_solid_capstyle', 'grid_solid_joinstyle', 

Creating [12/19] dimension


Creating [13/19] feature


Creating [14/19] feature_all


Creating [15/19] parameter


Unnamed: 0,Parameters
boosting_type,gbdt
class_weight,
colsample_bytree,1.0
importance_type,split
learning_rate,0.1
max_depth,-1
min_child_samples,20
min_child_weight,0.001
min_split_gain,0.0
n_estimators,100


[Errno 2] No such file or directory: 'Hyperparameters.png' -> '/home/work/Dev/kickstarter/02_Outputs/i01/plots_lgbm_tuned/parameter.png'
ERROR: parameter could not be produced!
Creating [16/19] lift


Creating [17/19] gain


Creating [18/19] tree
Decision Tree plot is only available for scikit-learn Decision Trees and Forests, Ensemble models using those or Stacked models using those as meta (final) estimators.
ERROR: tree could not be produced!
Creating [19/19] ks


In [19]:
dir_catboost = f'{c.get_path_iteration()}/plots_catboost_tuned'
save_model_plots(dir_catboost, PLOT_TYPES, catboost_tuned)

Creating [2/19] auc


Creating [3/19] threshold


Creating [4/19] pr


Creating [5/19] confusion_matrix


Creating [6/19] error


Creating [7/19] class_report


Creating [8/19] boundary


Creating [9/19] learning


Creating [10/19] calibration


keyword grid_b is not recognized; valid keywords are ['size', 'width', 'color', 'tickdir', 'pad', 'labelsize', 'labelcolor', 'zorder', 'gridOn', 'tick1On', 'tick2On', 'label1On', 'label2On', 'length', 'direction', 'left', 'bottom', 'right', 'top', 'labelleft', 'labelbottom', 'labelright', 'labeltop', 'labelrotation', 'grid_agg_filter', 'grid_alpha', 'grid_animated', 'grid_antialiased', 'grid_clip_box', 'grid_clip_on', 'grid_clip_path', 'grid_color', 'grid_dash_capstyle', 'grid_dash_joinstyle', 'grid_dashes', 'grid_data', 'grid_drawstyle', 'grid_figure', 'grid_fillstyle', 'grid_gapcolor', 'grid_gid', 'grid_in_layout', 'grid_label', 'grid_linestyle', 'grid_linewidth', 'grid_marker', 'grid_markeredgecolor', 'grid_markeredgewidth', 'grid_markerfacecolor', 'grid_markerfacecoloralt', 'grid_markersize', 'grid_markevery', 'grid_mouseover', 'grid_path_effects', 'grid_picker', 'grid_pickradius', 'grid_rasterized', 'grid_sketch_params', 'grid_snap', 'grid_solid_capstyle', 'grid_solid_joinstyle', 

Creating [12/19] dimension


Creating [13/19] feature


Creating [14/19] feature_all


Creating [15/19] parameter


Unnamed: 0,Parameters
nan_mode,Min
eval_metric,Logloss
iterations,280
sampling_frequency,PerTree
leaf_estimation_method,Newton
grow_policy,SymmetricTree
penalties_coefficient,1
boosting_type,Plain
model_shrink_mode,Constant
feature_border_type,GreedyLogSum


[Errno 2] No such file or directory: 'Hyperparameters.png' -> '/home/work/Dev/kickstarter/02_Outputs/i01/plots_catboost_tuned/parameter.png'
ERROR: parameter could not be produced!
Creating [16/19] lift


Creating [17/19] gain


Creating [18/19] tree
Decision Tree plot is only available for scikit-learn Decision Trees and Forests, Ensemble models using those or Stacked models using those as meta (final) estimators.
ERROR: tree could not be produced!
Creating [19/19] ks
