In [24]:
import pandas as pd
from pycaret.classification import *
import os
from sklearn.metrics import average_precision_score
from sklearn.utils import shuffle
from utils import *
import mlflow

### # Don't forget to run mlflow in terminal

mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0

In [25]:
mlflow.set_tracking_uri("http://localhost:5000")

In [26]:
df = pd.read_csv('data' + os.sep + 'creditcard.csv', encoding_errors='ignore', on_bad_lines='skip')

In [27]:
df.Class.value_counts()

0.0    284283
1.0       492
Name: Class, dtype: int64

In [28]:
df.Class.value_counts()

0.0    284283
1.0       492
Name: Class, dtype: int64

# Some undersampling - Drop random records with 0 targets in order to optimize the calculation time

In [17]:
df_train, df_test = get_raw_data()

In [18]:
df_train.target.value_counts()

0.0    2471
1.0     328
Name: target, dtype: int64

In [19]:
df_test.target.value_counts()

0.0    1215
1.0     164
Name: target, dtype: int64

In [21]:
df_train.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'target'],
      dtype='object')

# Training different models with different data preprocessing

In [33]:
numeric_feature_list = df_train.columns[0:-1].to_list()

In [32]:
clf1 = setup(silent=True,
             data = df_train, 
             target = 'target',
             numeric_features=numeric_feature_list,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'baseline',            )

add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9837,0.9828,0.876,0.9917,0.9287,0.9195,0.9227,0.9554,4.439
et,Extra Trees Classifier,0.9826,0.9759,0.8593,1.0,0.923,0.9133,0.9175,0.9463,1.136
rf,Random Forest Classifier,0.9821,0.9761,0.8635,0.9907,0.921,0.9111,0.9148,0.9446,0.981
ada,Ada Boost Classifier,0.9801,0.9686,0.8843,0.9526,0.9155,0.9042,0.9061,0.9461,0.86
gbc,Gradient Boosting Classifier,0.9796,0.9748,0.8717,0.9588,0.9117,0.9002,0.9024,0.9488,3.13
lr,Logistic Regression,0.978,0.9651,0.8635,0.9571,0.9058,0.8935,0.8962,0.9365,0.147
lda,Linear Discriminant Analysis,0.9699,0.9773,0.7568,1.0,0.8604,0.844,0.8549,0.9438,0.056
dt,Decision Tree Classifier,0.9668,0.9278,0.876,0.8599,0.8664,0.8475,0.8485,0.7692,0.113
ridge,Ridge Classifier,0.9622,0.0,0.6948,1.0,0.8174,0.7973,0.8151,0.0,0.028
nb,Naive Bayes,0.9576,0.9709,0.7153,0.9278,0.8051,0.7819,0.7918,0.8885,0.03


# Fixing imbalance

In [35]:
clf1 = setup(silent=True,
             data = df_train, 
             target = 'target',
             numeric_features=numeric_feature_list,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'fixing imbalance',
             fix_imbalance = True, 
            )

add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
et,Extra Trees Classifier,0.9821,0.9861,0.873,0.977,0.9216,0.9115,0.9137,0.9544,1.119
rf,Random Forest Classifier,0.9816,0.9806,0.89,0.9567,0.9214,0.911,0.9122,0.9538,1.213
gbc,Gradient Boosting Classifier,0.9801,0.9806,0.8944,0.9394,0.9156,0.9043,0.9051,0.9517,6.042
lightgbm,Light Gradient Boosting Machine,0.9816,0.9814,0.8774,0.9687,0.9201,0.9098,0.9116,0.9497,2.451
ada,Ada Boost Classifier,0.9801,0.9708,0.8984,0.9386,0.9172,0.9059,0.9067,0.9423,1.581
lr,Logistic Regression,0.9678,0.9643,0.8944,0.8526,0.8714,0.8531,0.8544,0.9323,0.193
lda,Linear Discriminant Analysis,0.9689,0.9651,0.8264,0.9118,0.8642,0.8467,0.8497,0.9086,0.098
nb,Naive Bayes,0.9633,0.9681,0.754,0.9296,0.8301,0.8099,0.8169,0.894,0.04
qda,Quadratic Discriminant Analysis,0.9597,0.9727,0.8857,0.8104,0.8431,0.8201,0.8233,0.861,0.092
dt,Decision Tree Classifier,0.9627,0.9333,0.8944,0.8197,0.8534,0.8321,0.8345,0.7468,0.188


# Removing outliers

In [36]:
clf1 = setup(data = df_train, 
             target = 'target', 
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'removing outliers',
             remove_outliers = True
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
lda,Linear Discriminant Analysis,0.9806,0.9805,0.76,1.0,0.8594,0.8493,0.8609,0.9231,0.037
lightgbm,Light Gradient Boosting Machine,0.9833,0.9714,0.8067,0.9862,0.8834,0.8746,0.882,0.9191,1.456
et,Extra Trees Classifier,0.9828,0.9736,0.8,0.9866,0.8792,0.8701,0.8781,0.9186,0.743
rf,Random Forest Classifier,0.9828,0.9638,0.8067,0.9783,0.8809,0.8718,0.8784,0.9037,0.938
ada,Ada Boost Classifier,0.9801,0.9682,0.8267,0.9246,0.8679,0.8573,0.8616,0.903,0.724
lr,Logistic Regression,0.978,0.963,0.8067,0.912,0.8524,0.8407,0.8446,0.9027,0.151
gbc,Gradient Boosting Classifier,0.978,0.9678,0.8133,0.9065,0.8543,0.8425,0.8457,0.898,2.49
nb,Naive Bayes,0.9742,0.9715,0.7467,0.9206,0.8212,0.8075,0.8145,0.8761,0.025
qda,Quadratic Discriminant Analysis,0.9538,0.9672,0.84,0.6804,0.7471,0.7222,0.7299,0.7712,0.034
dt,Decision Tree Classifier,0.9694,0.9073,0.8333,0.8043,0.814,0.7974,0.8002,0.6851,0.133


# Transformation

In [37]:
clf1 = setup(data = df_train, 
             target = 'target', 
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'transformation',
             transformation = True, 
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9847,0.9822,0.8797,0.9792,0.9256,0.9172,0.9195,0.9503,3.036
lr,Logistic Regression,0.9816,0.9812,0.8797,0.9536,0.9125,0.9023,0.9048,0.9455,0.058
et,Extra Trees Classifier,0.9842,0.9816,0.8613,0.9952,0.9222,0.9135,0.9171,0.9447,1.123
lda,Linear Discriminant Analysis,0.9806,0.9814,0.829,0.9944,0.9031,0.8925,0.8977,0.9428,0.057
rf,Random Forest Classifier,0.9837,0.9806,0.8658,0.9857,0.9206,0.9115,0.9147,0.9413,1.07
ada,Ada Boost Classifier,0.9796,0.9775,0.8658,0.9502,0.9035,0.8922,0.8949,0.9376,0.937
gbc,Gradient Boosting Classifier,0.9806,0.9799,0.8703,0.953,0.9077,0.897,0.8993,0.9353,3.706
knn,K Neighbors Classifier,0.9811,0.9482,0.8472,0.9805,0.9067,0.8964,0.9006,0.9013,0.132
qda,Quadratic Discriminant Analysis,0.9678,0.9624,0.8749,0.8424,0.8564,0.8384,0.8398,0.8816,0.044
nb,Naive Bayes,0.9602,0.9685,0.8288,0.8243,0.8221,0.7998,0.8025,0.7918,0.024


# Feature interaction

In [39]:
clf1 = setup(data = df_train, 
             target = 'target',
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'feature interaction', 
             feature_interaction = True, feature_ratio = True, 
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9811,0.988,0.8677,0.9608,0.9094,0.899,0.9019,0.9555,7.435
gbc,Gradient Boosting Classifier,0.976,0.9859,0.8677,0.9177,0.8889,0.8756,0.8779,0.9525,13.357
et,Extra Trees Classifier,0.9821,0.9798,0.863,0.9763,0.9133,0.9035,0.9073,0.9484,2.132
rf,Random Forest Classifier,0.9816,0.9787,0.863,0.9706,0.9109,0.9008,0.9043,0.945,1.962
ada,Ada Boost Classifier,0.9781,0.9718,0.8677,0.9325,0.8975,0.8852,0.8868,0.9388,2.703
lda,Linear Discriminant Analysis,0.9791,0.967,0.8491,0.9598,0.8976,0.8861,0.8902,0.9202,0.678
lr,Logistic Regression,0.9765,0.9493,0.8723,0.9161,0.8922,0.8791,0.8803,0.9072,1.519
knn,K Neighbors Classifier,0.9781,0.9374,0.831,0.9691,0.892,0.8799,0.8848,0.878,1.316
nb,Naive Bayes,0.9571,0.9533,0.6753,0.9163,0.7755,0.7526,0.7644,0.852,0.039
dt,Decision Tree Classifier,0.9668,0.9316,0.8861,0.8316,0.8563,0.8376,0.8392,0.7519,0.737


# Polynomial features

In [40]:
clf1 = setup(data = df_train, 
             target = 'target', 
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'polynomial features',
             polynomial_features = True,
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
lda,Linear Discriminant Analysis,0.978,0.9794,0.8194,0.9897,0.8937,0.8817,0.8882,0.9519,0.095
gbc,Gradient Boosting Classifier,0.9816,0.9809,0.881,0.9588,0.9169,0.9066,0.9085,0.9505,4.54
lightgbm,Light Gradient Boosting Machine,0.9821,0.9847,0.8723,0.9721,0.9185,0.9086,0.9108,0.9495,3.085
et,Extra Trees Classifier,0.9821,0.9725,0.868,0.9759,0.9179,0.908,0.9104,0.9449,1.165
rf,Random Forest Classifier,0.9816,0.9691,0.8723,0.9681,0.9164,0.9061,0.9085,0.9393,1.635
lr,Logistic Regression,0.978,0.9657,0.8678,0.9429,0.9022,0.8899,0.8918,0.9383,0.679
ada,Ada Boost Classifier,0.9816,0.9718,0.8812,0.9592,0.9174,0.9071,0.9088,0.9378,1.691
nb,Naive Bayes,0.978,0.9756,0.868,0.94,0.9014,0.8891,0.8907,0.9017,0.042
qda,Quadratic Discriminant Analysis,0.9668,0.9695,0.8984,0.8316,0.8627,0.8439,0.8454,0.8562,0.052
dt,Decision Tree Classifier,0.9724,0.9308,0.8767,0.8886,0.8805,0.8649,0.8663,0.7929,0.179


# Feature selection

In [41]:
clf1 = setup(data = df_train, 
             target = 'target',
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'feature selection',
             feature_selection = True, feature_selection_threshold = 0.5,
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
gbc,Gradient Boosting Classifier,0.9745,0.9807,0.8379,0.9393,0.8829,0.8687,0.8722,0.9483,2.949
lightgbm,Light Gradient Boosting Machine,0.9786,0.9791,0.8379,0.9763,0.8989,0.8871,0.892,0.9446,4.065
rf,Random Forest Classifier,0.9786,0.9795,0.8377,0.9776,0.8987,0.8869,0.8923,0.9441,1.529
et,Extra Trees Classifier,0.9786,0.9759,0.8379,0.976,0.8989,0.8871,0.892,0.9437,1.42
lr,Logistic Regression,0.9765,0.9761,0.851,0.9482,0.8925,0.8795,0.8837,0.9413,0.743
lda,Linear Discriminant Analysis,0.9689,0.9783,0.7413,0.9885,0.845,0.8282,0.8401,0.932,0.066
ada,Ada Boost Classifier,0.9745,0.964,0.8425,0.9372,0.8837,0.8695,0.8733,0.9213,0.991
nb,Naive Bayes,0.9617,0.9613,0.8377,0.837,0.8341,0.8126,0.8146,0.8489,0.039
qda,Quadratic Discriminant Analysis,0.9525,0.9633,0.8599,0.7681,0.8081,0.7812,0.785,0.8467,0.062
knn,K Neighbors Classifier,0.9592,0.9111,0.6668,0.9773,0.7886,0.7672,0.7865,0.8053,0.151


# Removing multicollinearity

In [42]:
clf1 = setup(data = df_train, 
             target = 'target', 
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'removing multicollinearity',
             remove_multicollinearity = True, multicollinearity_threshold = 0.6,
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9765,0.9751,0.8229,0.9683,0.8885,0.8755,0.8798,0.9324,2.416
et,Extra Trees Classifier,0.9755,0.9739,0.8004,0.9847,0.8815,0.8681,0.8747,0.9294,0.86
rf,Random Forest Classifier,0.976,0.9722,0.8184,0.9687,0.8856,0.8724,0.8772,0.928,1.043
lda,Linear Discriminant Analysis,0.9699,0.9522,0.7516,0.9842,0.8503,0.834,0.8445,0.9112,0.047
gbc,Gradient Boosting Classifier,0.9724,0.9771,0.8184,0.9343,0.8707,0.8555,0.8589,0.9072,1.739
ada,Ada Boost Classifier,0.9719,0.9529,0.8449,0.9097,0.8733,0.8576,0.86,0.9071,0.554
lr,Logistic Regression,0.9632,0.9317,0.7872,0.8808,0.83,0.8095,0.8119,0.8701,0.175
nb,Naive Bayes,0.951,0.9403,0.6443,0.9092,0.7515,0.7252,0.7399,0.8345,0.026
qda,Quadratic Discriminant Analysis,0.9464,0.9558,0.8229,0.7419,0.7792,0.7488,0.7508,0.7878,0.027
dt,Decision Tree Classifier,0.9586,0.9073,0.8405,0.8104,0.8225,0.7992,0.8011,0.6995,0.103


# PCA

In [43]:
clf1 = setup(data = df_train, 
             target = 'target', 
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'pca',
             pca = True, pca_components = 10
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
lr,Logistic Regression,0.977,0.9832,0.8659,0.9364,0.8983,0.8854,0.8872,0.9527,0.17
lightgbm,Light Gradient Boosting Machine,0.9775,0.982,0.8574,0.95,0.8999,0.8874,0.8897,0.9524,2.728
rf,Random Forest Classifier,0.9796,0.9815,0.8703,0.9551,0.9093,0.8979,0.9,0.9501,1.074
et,Extra Trees Classifier,0.9821,0.9802,0.8833,0.9634,0.9209,0.9108,0.9124,0.9498,1.228
gbc,Gradient Boosting Classifier,0.9765,0.9809,0.8529,0.945,0.8953,0.8821,0.8844,0.9465,1.359
lda,Linear Discriminant Analysis,0.9668,0.9813,0.7272,0.9897,0.8361,0.8182,0.8315,0.9459,0.035
ada,Ada Boost Classifier,0.976,0.9673,0.8489,0.9433,0.8926,0.8792,0.8813,0.9286,0.486
qda,Quadratic Discriminant Analysis,0.9576,0.9704,0.8746,0.7943,0.8302,0.8061,0.8089,0.8807,0.029
nb,Naive Bayes,0.9638,0.9572,0.8096,0.8785,0.8398,0.8195,0.822,0.8538,0.026
dt,Decision Tree Classifier,0.9622,0.9205,0.8659,0.8269,0.844,0.8225,0.8241,0.7316,0.061


# Selected model

### Keeping only performance increasing data preprocessing techniques

In [45]:
clf1 = setup(data = df_train, 
             target = 'target',
             numeric_features=numeric_feature_list,
             silent=True,
             log_experiment = True,
             use_gpu=True,
             experiment_name = 'selected_model',
             feature_interaction = True, feature_ratio = True, 
             fix_imbalance = True
            )
add_metric('apc', 'APC', average_precision_score, target = 'pred_proba')
best = compare_models(sort="APC")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,APC,TT (Sec)
rf,Random Forest Classifier,0.9775,0.9842,0.8676,0.9407,0.9017,0.889,0.8906,0.9511,2.011
gbc,Gradient Boosting Classifier,0.9755,0.9842,0.8723,0.9212,0.895,0.8812,0.8822,0.9505,12.081
lightgbm,Light Gradient Boosting Machine,0.9811,0.983,0.885,0.9555,0.917,0.9064,0.9084,0.95,1.803
ada,Ada Boost Classifier,0.974,0.9781,0.881,0.9025,0.8901,0.8753,0.8764,0.9459,4.933
et,Extra Trees Classifier,0.9796,0.9784,0.8721,0.9551,0.9101,0.8987,0.9008,0.9454,0.932
lda,Linear Discriminant Analysis,0.9684,0.9807,0.8848,0.857,0.8692,0.8512,0.8523,0.9219,0.14
lr,Logistic Regression,0.9648,0.963,0.8931,0.8292,0.8584,0.8383,0.8401,0.9204,0.572
nb,Naive Bayes,0.9551,0.9598,0.6688,0.9429,0.7806,0.7565,0.7714,0.887,0.078
knn,K Neighbors Classifier,0.95,0.947,0.8889,0.7474,0.8101,0.7817,0.7868,0.8475,0.515
dt,Decision Tree Classifier,0.95,0.9109,0.8594,0.7603,0.8044,0.7759,0.7794,0.6711,0.481


# Deploy the model

In [26]:
best

ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=3225, verbose=0,
                     warm_start=False)

In [30]:
# create api
create_api(best, 'fraud_voting_model_api')


API sucessfully created. This function only creates a POST API, it doesn't run it automatically.

To run your API, please run this command --> !python fraud_voting_model_api.py
    


In [31]:
best

ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=8777, verbose=0,
                     warm_start=False)

# Test REST API

In [36]:
# inference data
df_test.loc[df_test.target == 1].iloc[0]

ERROR! Session/line number was not unique in database. History logging moved to new session 252


Time          68207
V1       -13.192671
V2        12.785971
V3         -9.90665
V4         3.320337
V5        -4.801176
V6         5.760059
V7       -18.750889
V8       -37.353443
V9         -0.39154
V10       -5.052502
V11        4.406806
V12       -4.610756
V13       -1.909488
V14       -9.072711
V15       -0.226074
V16       -6.211557
V17       -6.248145
V18       -3.149247
V19        0.051576
V20        -3.49305
V21       27.202839
V22       -8.887017
V23        5.303607
V24       -0.639435
V25        0.263203
V26       -0.108877
V27        1.269566
V28        0.939407
Amount          1.0
target          1.0
Name: 102441, dtype: object

# Test REST API - in another way

In [7]:
import requests

In [8]:
df_test.loc[df_test.target == 1].iloc[0].to_json(compression="dict")

'{"Time":68207,"V1":-13.1926709562,"V2":12.7859706383,"V3":-9.9066500209,"V4":3.3203368829,"V5":-4.8011759323,"V6":5.7600585562,"V7":-18.750889158,"V8":-37.3534426379,"V9":-0.3915397439,"V10":-5.0525023671,"V11":4.4068055236,"V12":-4.6107564774,"V13":-1.9094879697,"V14":-9.072710934,"V15":-0.2260744509,"V16":-6.2115574821,"V17":-6.2481453534,"V18":-3.1492466947,"V19":0.0515761185,"V20":-3.4930499152,"V21":27.2028391573,"V22":-8.8870171409,"V23":5.3036069038,"V24":-0.6394348023,"V25":0.2632031235,"V26":-0.10887693,"V27":1.2695663554,"V28":0.9394073628,"Amount":1.0,"target":1.0}'

In [46]:
import json
url = 'http://127.0.0.1:8001/get_predict/'

params = json.loads(df_test.loc[df_test.target == 1].iloc[0].to_json())
response = requests.get(url, params).json()
print(response)

{'prediction': ['1.0']}


In [21]:
!python fraud_voting_model_api.py

Transformation Pipeline and Model Successfully Loaded
[32mINFO[0m:     Started server process [[36m2345[0m]
[32mINFO[0m:     Waiting for application startup.
[32mINFO[0m:     Application startup complete.
[32mINFO[0m:     Uvicorn running on [1mhttp://127.0.0.1:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     127.0.0.1:56959 - "[1mGET / HTTP/1.1[0m" [31m404 Not Found[0m
[32mINFO[0m:     127.0.0.1:56959 - "[1mGET /favicon.ico HTTP/1.1[0m" [31m404 Not Found[0m
[32mINFO[0m:     127.0.0.1:56960 - "[1mGET /docs HTTP/1.1[0m" [32m200 OK[0m
[32mINFO[0m:     127.0.0.1:56960 - "[1mGET /openapi.json HTTP/1.1[0m" [32m200 OK[0m
^C
[32mINFO[0m:     Shutting down
[32mINFO[0m:     Waiting for application shutdown.
[32mINFO[0m:     Application shutdown complete.
[32mINFO[0m:     Finished server process [[36m2345[0m]


KeyboardInterrupt: 

# Create a docker

In [10]:
create_docker('fraud_voting_model_api_with_get_for_docker3')

Writing requirements.txt
Writing Dockerfile
Dockerfile and requirements.txt successfully created.
To build image you have to run --> !docker image build -f "Dockerfile" -t IMAGE_NAME:IMAGE_TAG .
        


In [None]:
!docker image build -f "Dockerfile_8007" -t fraud_voting_model_api_with_get_for_docker3:latest .

[1A[1B[0G[?25l[+] Building 0.0s (0/1)                                                         
[?25h[1A[0G[?25l[+] Building 0.1s (1/2)                                                         
[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 247B                                       0.1s
[0m => [internal] load .dockerignore                                          0.0s
 => => transferring context:                                               0.0s
[?25h[1A[1A[1A[1A[1A[0G[?25l[+] Building 0.3s (2/2)                                                         
[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 247B                                       0.1s
[0m[34m => [internal] load .dockerignore                                          0.1s
[0m[34m => => transferring context: 2B                                          

[?25h[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 1.9s (4/9)                                                         
[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 247B                                       0.1s
[0m[34m => [internal] load .dockerignore                                          0.1s
[0m[34m => => transferring context: 2B                                            0.0s
[0m[34m => [internal] load metadata for docker.io/library/python:3.8-slim         0.0s
[0m[34m => [1/5] FROM docker.io/library/python:3.8-slim                           0.0s
[0m => [internal] load build context                                          1.6s
[?25h[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 2.1s (4/9)                                                         
[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 24

[?25h[1A[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 3.3s (4/9)                                                         
[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 247B                                       0.1s
[0m[34m => [internal] load .dockerignore                                          0.1s
[0m[34m => => transferring context: 2B                                            0.0s
[0m[34m => [internal] load metadata for docker.io/library/python:3.8-slim         0.0s
[0m[34m => [1/5] FROM docker.io/library/python:3.8-slim                           0.0s
[0m => [internal] load build context                                          3.0s
 => => transferring context: 7.18MB                                        1.1s
[?25h[1A[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 3.4s (4/9)                                                         
[34m => [internal] load build definitio

[?25h[1A[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 4.5s (4/9)                                                         
[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 247B                                       0.1s
[0m[34m => [internal] load .dockerignore                                          0.1s
[0m[34m => => transferring context: 2B                                            0.0s
[0m[34m => [internal] load metadata for docker.io/library/python:3.8-slim         0.0s
[0m[34m => [1/5] FROM docker.io/library/python:3.8-slim                           0.0s
[0m => [internal] load build context                                          4.2s
 => => transferring context: 30.25MB                                       2.3s
[?25h[1A[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 4.7s (4/9)                                                         
[34m => [internal] load build definitio

[?25h[1A[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 5.9s (4/9)                                                         
[34m => [internal] load build definition from Dockerfile                       0.1s
[0m[34m => => transferring dockerfile: 247B                                       0.1s
[0m[34m => [internal] load .dockerignore                                          0.1s
[0m[34m => => transferring context: 2B                                            0.0s
[0m[34m => [internal] load metadata for docker.io/library/python:3.8-slim         0.0s
[0m[34m => [1/5] FROM docker.io/library/python:3.8-slim                           0.0s
[0m => [internal] load build context                                          5.6s
 => => transferring context: 60.84MB                                       3.7s
[?25h[1A[1A[1A[1A[1A[1A[1A[1A[1A[0G[?25l[+] Building 6.0s (4/9)                                                         
[34m => [internal] load build definitio

Process LokyProcess-37:
Process LokyProcess-39:
Process LokyProcess-42:
Process LokyProcess-43:
Process LokyProcess-45:
Process LokyProcess-48:
Process LokyProcess-44:
Process LokyProcess-40:
Process LokyProcess-47:
Process LokyProcess-38:
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/Users/gerzsonboros/miniconda3/lib/python3.8/concurrent/futures/process.py", line 100, in _python_exit
    items = list(_threads_wakeups.items())
  File "/Users/gerzsonboros/miniconda3/lib/python3.8/weakref.py", line 434, in items
    yield key, value
  File "/Users/gerzsonboros/miniconda3/lib/python3.8/_weakrefset.py", line 26, in __exit__
    def __exit__(self, e, t, b):
KeyboardInterrupt
Exception ignored in: <module 'threading' from '/Users/gerzsonboros/miniconda3/lib/python3.8/threading.py'>
Traceback (most recent call last):
  File "/Users/gerzsonboros/miniconda3/lib/python3.8/threading.py", line 1355, in _shutdown
    def _shutdown():
KeyboardInterrupt: 
Exception ignor

In [85]:
!docker run -d -p 8007:8007 fraud_voting_model_api_with_get_for_docker3

687fc5d2f11bffaebb9044b773fe908a395d173975f1db811a3e000ceb803fbd
docker: Error response from daemon: driver failed programming external connectivity on endpoint practical_chaum (04c03a715fa493d98e09aeec4d624c6384cc20fa4cb89215a5495bd7eec44089): Bind for 0.0.0.0:8005 failed: port is already allocated.


In [5]:
!docker images

REPOSITORY                                          TAG        IMAGE ID       CREATED         SIZE
fraud_voting_model_api_with_get_for_docker2_image   latest     842a1bcc3892   5 minutes ago   2.17GB
fraud_voting_model_api_with_get_for_docker          latest     4b18ba7e373b   15 hours ago    2.16GB
fraud_voting_model_api                              latest     e3d11ce18018   16 hours ago    2.16GB
churn_voting_model_api                              latest     98ee34619e84   2 days ago      8.51GB
<none>                                              <none>     9d4d6d2d6b64   2 weeks ago     7.98GB
python                                              3.8-slim   d069e16584a2   2 weeks ago     123MB
gradflow/workbench/jupyter                          0.1.0      8cc8d764d69d   5 weeks ago     4.49GB
gradflow/workbench/mlflow                           0.1.0      002ae258879f   5 weeks ago     1.2GB
gradflow/workbench/postgres                         0.1.0      52c64288b248   5 weeks 

In [None]:
import json
url = 'http://127.0.0.1:8007/get_predict/'

params = json.loads(df_test.loc[df_test.target == 1].iloc[0].to_json())
response = requests.get(url, params).json()
print(response)