# Temperament Classification Model
## Rebecca Lewis
### DSC 680

In [1]:
from pycaret.classification import *
import pandas as pd
import numpy as np

In [2]:
#load final data
dataset = pd.read_csv('../Data/modeling_dataset.csv')
dataset.head()

Unnamed: 0,request_type,species,size,call_to_dispatch_sec,lat,long,condition,temperament,incident_month,incident_weekday,incident_hour,Cluster
0,CRUELTY CASE,DOG,LARGE,2160.0,30.49464,-91.15616,FAIR,FRIENDLY,12,1,15,Cluster 1
1,TRAPPED ANIMAL,OTHER,LARGE,60.0,30.431417,-91.14107,FAIR,NORMAL,12,2,10,Cluster 1
2,STRAY,DOG,LARGE,0.0,30.64854,-91.15677,EMACIATED,FRIENDLY,12,2,9,Cluster 4
3,STRAY,DOG,SMALL,2400.0,30.429104,-91.185036,EMACIATED,NORMAL,12,2,15,Cluster 1
4,OWNER SIGNED RELEASE,DOG,EXTRA LARGE,420.0,30.529593,-91.125534,GOOD,FRIENDLY,12,2,10,Cluster 1


In [3]:
clf_model = setup(dataset, target='temperament', ignore_features=['condition', 'lat', 'long'], fix_imbalance=True)

Unnamed: 0,Description,Value
0,session_id,6331
1,Target,temperament
2,Target Type,Multiclass
3,Label Encoded,"DANGEROUS: 0, FRIENDLY: 1, NERVOUS: 2, NORMAL: 3, SCARED: 4, UNKNOWN: 5"
4,Original Data,"(38288, 12)"
5,Missing Values,False
6,Numeric Features,1
7,Categorical Features,7
8,Ordinal Features,False
9,High Cardinality Features,False


In [4]:
compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.6141,0.8124,0.4088,0.6118,0.6104,0.4142,0.4152,2.53
lightgbm,Light Gradient Boosting Machine,0.6005,0.7941,0.4145,0.6199,0.5989,0.4082,0.4154,1.343
et,Extra Trees Classifier,0.597,0.7917,0.4016,0.5958,0.5954,0.3904,0.3909,3.495
gbc,Gradient Boosting Classifier,0.5779,0.7757,0.3918,0.6183,0.5805,0.3881,0.4003,23.908
dt,Decision Tree Classifier,0.5614,0.6782,0.3771,0.5649,0.5629,0.3402,0.3403,0.402
ada,Ada Boost Classifier,0.4333,0.5628,0.3156,0.5687,0.4663,0.2341,0.2468,1.184
lda,Linear Discriminant Analysis,0.433,0.7311,0.4307,0.6327,0.4621,0.2705,0.2952,0.752
knn,K Neighbors Classifier,0.4144,0.6954,0.3378,0.5705,0.4422,0.234,0.2504,1.004
lr,Logistic Regression,0.3861,0.7348,0.4486,0.6084,0.4195,0.2321,0.2558,3.824
ridge,Ridge Classifier,0.3799,0.0,0.4768,0.631,0.4044,0.2309,0.2599,0.189


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=6331, verbose=0,
                       warm_start=False)

### Temperament: Building the Model

In [5]:
rf_model = create_model('rf')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.6166,0.819,0.398,0.6141,0.6133,0.4176,0.4187
1,0.6116,0.8074,0.3752,0.6078,0.6064,0.4066,0.4079
2,0.6082,0.8149,0.3823,0.6084,0.6061,0.4083,0.4093
3,0.6358,0.8172,0.4849,0.6335,0.6318,0.4471,0.4483
4,0.6201,0.817,0.3827,0.6132,0.6149,0.4187,0.4194
5,0.6123,0.816,0.3716,0.6057,0.6072,0.407,0.4076
6,0.6146,0.8087,0.5498,0.6146,0.6127,0.4184,0.4192
7,0.6078,0.8097,0.3792,0.6068,0.6039,0.4066,0.4081
8,0.6116,0.8086,0.3877,0.6108,0.6087,0.413,0.4142
9,0.6026,0.8055,0.3766,0.603,0.5995,0.3987,0.3997


In [6]:
tuned_rf_model = tune_model(rf_model, optimize='F1', choose_better=True)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.5446,0.7604,0.4205,0.6112,0.5514,0.3551,0.3711
1,0.5425,0.7523,0.3783,0.6111,0.5489,0.3486,0.3669
2,0.5526,0.7601,0.3987,0.6299,0.5617,0.3687,0.3875
3,0.5463,0.7615,0.3938,0.6275,0.5576,0.3597,0.3782
4,0.55,0.7557,0.3973,0.6159,0.5575,0.361,0.3773
5,0.5448,0.7531,0.3835,0.6211,0.5573,0.3557,0.3714
6,0.5388,0.7578,0.3997,0.62,0.5462,0.3533,0.3737
7,0.5422,0.7566,0.395,0.6161,0.5512,0.3532,0.3706
8,0.531,0.7526,0.3677,0.6083,0.5402,0.3389,0.3568
9,0.5381,0.7493,0.4021,0.6063,0.5437,0.3469,0.3649


In [7]:
evaluate_model(tuned_rf_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [8]:
#DANGEROUS: 0, FRIENDLY: 1, NERVOUS: 2, NORMAL: 3, SCARED: 4, UNKNOWN: 5

### Temperament: Running the model on test data

In [9]:
predict_model(tuned_rf_model);

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.6237,0.8194,0.4011,0.621,0.6202,0.4303,0.431


In [10]:
final_rf_model = finalize_model(tuned_rf_model)

In [11]:
save_model(final_rf_model,'Final_Temperament_rf_Model_0402021')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True,
                                       features_todrop=['condition', 'lat',
                                                        'long'],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[],
                                       target='temperament', time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value...
                  RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                         class_weight=None, criterion='gini',
                                         max_depth=None, max_