# Building a Model

### PyCaret is an open source, low-code machine learning library in Python that is used to train and deploy machine learning pipelines and models into production. PyCaret can be installed easily using pip.

In [1]:
from pycaret.datasets import get_data
dataset = get_data('employee')

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,department,salary,left
0,0.38,0.53,2,157,3,0,0,sales,low,1
1,0.8,0.86,5,262,6,0,0,sales,medium,1
2,0.11,0.88,7,272,4,0,0,sales,medium,1
3,0.72,0.87,5,223,5,0,0,sales,low,1
4,0.37,0.52,2,159,3,0,0,sales,low,1


In [2]:
(dataset['department']).unique()

array(['sales', 'accounting', 'hr', 'technical', 'support', 'management',
       'IT', 'product_mng', 'marketing', 'RandD'], dtype=object)

In [3]:
data_seen = dataset.sample(frac=0.95, random_state=780).reset_index(drop=True)
data_unseen = dataset.drop(data_seen.index).reset_index(drop=True)
#dataset=dataset.drop(['department','average_montly_hours'],axis=1)
print('Data for Modeling: ' + str(data_seen.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (14249, 10)
Unseen Data For Predictions: (750, 10)


In [4]:
from pycaret.classification import *
setting_up = setup(data = data_seen, target = 'left', session_id=123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,left
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(14249, 10)"
5,Missing Values,False
6,Numeric Features,3
7,Categorical Features,6
8,Ordinal Features,False
9,High Cardinality Features,False


In [5]:
compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9875,0.991,0.9566,0.9905,0.9732,0.965,0.9653,0.566
xgboost,Extreme Gradient Boosting,0.985,0.9921,0.9562,0.9802,0.968,0.9582,0.9584,1.074
lightgbm,Light Gradient Boosting Machine,0.9843,0.9928,0.9528,0.9807,0.9665,0.9562,0.9564,0.24
et,Extra Trees Classifier,0.9816,0.9907,0.9549,0.9674,0.961,0.9489,0.9491,0.51
catboost,CatBoost Classifier,0.9815,0.9919,0.9444,0.977,0.9604,0.9483,0.9486,6.39
gbc,Gradient Boosting Classifier,0.9744,0.9878,0.9255,0.9659,0.9452,0.9285,0.9289,0.667
dt,Decision Tree Classifier,0.9734,0.9687,0.9596,0.9314,0.9451,0.9276,0.9279,0.062
ada,Ada Boost Classifier,0.9587,0.9817,0.9044,0.9208,0.9124,0.8854,0.8856,0.25
knn,K Neighbors Classifier,0.9344,0.9674,0.8994,0.8379,0.8673,0.8239,0.825,0.154
lr,Logistic Regression,0.8989,0.9383,0.8038,0.7789,0.791,0.7244,0.7247,2.362


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=123, verbose=0,
                       warm_start=False)

In [6]:
rf = create_model('rf')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.988,0.994,0.9622,0.9871,0.9745,0.9666,0.9667
1,0.988,0.9897,0.958,0.9913,0.9744,0.9665,0.9667
2,0.99,0.9888,0.9706,0.9872,0.9788,0.9723,0.9723
3,0.991,0.9943,0.9748,0.9872,0.981,0.9751,0.9751
4,0.984,0.9921,0.9451,0.9868,0.9655,0.9551,0.9554
5,0.986,0.9949,0.9451,0.9956,0.9697,0.9606,0.9611
6,0.9789,0.9901,0.9156,0.9954,0.9538,0.9402,0.9416
7,0.993,0.997,0.9705,1.0,0.985,0.9804,0.9806
8,0.988,0.9856,0.958,0.9913,0.9744,0.9665,0.9667
9,0.988,0.9839,0.9664,0.9829,0.9746,0.9667,0.9668


In [7]:
tuned_model = tune_model(rf)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.9719,0.971,0.9202,0.9605,0.9399,0.9216,0.922
1,0.977,0.9695,0.9286,0.9736,0.9505,0.9355,0.936
2,0.979,0.9816,0.937,0.9738,0.955,0.9413,0.9416
3,0.9669,0.9825,0.9034,0.9556,0.9287,0.9072,0.9078
4,0.9669,0.9818,0.9198,0.9397,0.9296,0.908,0.9081
5,0.9789,0.9719,0.9241,0.9865,0.9542,0.9406,0.9414
6,0.9649,0.9728,0.8776,0.972,0.9224,0.8998,0.9017
7,0.9729,0.9783,0.9156,0.9688,0.9414,0.9238,0.9245
8,0.9779,0.9741,0.9244,0.9821,0.9524,0.938,0.9388
9,0.9809,0.9791,0.9412,0.9782,0.9593,0.9469,0.9472


In [8]:
final = finalize_model(tuned_model)
unseen_predictions = predict_model(final, data=data_unseen)
unseen_predictions.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,department,salary,left,Label,Score
0,0.11,0.89,6,306,4,0,0,technical,low,1,1,1.0
1,0.41,0.54,2,152,3,0,0,technical,low,1,1,0.9703
2,0.87,0.88,5,269,5,0,0,technical,low,1,1,0.9754
3,0.45,0.48,2,158,3,0,0,technical,low,1,1,0.9703
4,0.4,0.46,2,127,3,0,0,technical,low,1,1,0.9703


In [9]:
save_model(final,'Final_model')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[], target='left',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeric_strateg...
                  RandomForestClassifier(bootstrap=False, ccp_alpha=0.0,
                                         class_weight={}, criterion='entropy',
                                         max_depth=5, max_features=1.0,
                                         max_leaf_

# Building a Dashboard 

### Streamlit is an open-source Python library that makes it easy to build beautiful custom web-apps for machine learning and data science. Streamlit can be installed easily using pip