In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from pycaret.classification import *
from imblearn.over_sampling import *
from imblearn.under_sampling import *
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np
import os
import pycaret


In [2]:
TARGET = 'diabetes'
ANSWER = 42

In [3]:
# data3 = pd.read_csv(r"..\3label2.csv")
# data2 = pd.read_csv(r"..\binary.csv")

data = pd.read_csv("..\diabetes_cleaned.csv")
data.drop(['Unnamed: 0','heart_attack','angina_or_chd','chd_mi'], axis=1, inplace=True)

In [4]:
#display distribution of label records
print(data.diabetes.value_counts())

#convert the solution to binary
data.loc[data.diabetes.isin([2]), "diabetes"] = 1

#display distribution of label records
print(data.diabetes.value_counts())

0.0    254681
2.0     41479
1.0      6793
Name: diabetes, dtype: int64
0.0    254681
1.0     48272
Name: diabetes, dtype: int64


In [5]:
train_df, test_df = train_test_split(data, test_size=0.2, random_state=ANSWER, stratify=data['diabetes'])
test_labels = test_df[TARGET]
test_features = test_df.drop(columns=TARGET)

In [6]:
data.describe()

Unnamed: 0,diabetes,bmi,smoker,stroke,asthma,physical_activity,heavy_drinking,no_doctor_due_to_cost,any_healthcare_insurance,general_health_status,...,difficulty_walking,gender,age,education,income,race,sleep_time,years_smoked,routine_checkup,heart_related
count,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,...,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0,302953.0
mean,0.159338,3.031424,1.197948,0.042224,0.147353,0.775688,0.068562,0.0796,1.550023,2.530046,...,0.14971,0.493324,4.067515,3.130882,4.489614,1.299192,7.007915,1.017877,1.338376,1.0
std,0.365992,0.829445,1.40534,0.201101,0.354458,0.417129,0.252708,0.270673,0.772696,1.037703,...,0.356787,0.499956,1.762832,0.91421,1.60409,0.805823,1.447061,8.950758,0.781658,0.0
min,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
25%,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,...,0.0,0.0,3.0,2.0,3.0,1.0,6.0,0.0,1.0,1.0
50%,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,...,0.0,0.0,4.0,3.0,5.0,1.0,7.0,0.0,1.0,1.0
75%,0.0,4.0,3.0,0.0,0.0,1.0,0.0,0.0,2.0,3.0,...,0.0,1.0,5.0,4.0,6.0,1.0,8.0,0.0,1.0,1.0
max,1.0,4.0,3.0,1.0,1.0,1.0,1.0,1.0,4.0,5.0,...,1.0,1.0,7.0,4.0,7.0,6.0,24.0,99.0,4.0,1.0


In [7]:
# Pull max row from describe
MAX_LINE = 7
melt = data.describe()[MAX_LINE:MAX_LINE+1].melt()

# Pick those with more than 0/1 and < 24 to be categorical values.
categorical_features = list(melt[((melt.value > 1) & (melt.value < 24))][['variable']].to_numpy().reshape(11))
categorical_features

['bmi',
 'smoker',
 'any_healthcare_insurance',
 'general_health_status',
 'mental_health_status',
 'physical_health_status',
 'age',
 'education',
 'income',
 'race',
 'routine_checkup']

In [8]:
diabetes = setup(data = train_df, target = TARGET, categorical_features = categorical_features, session_id=ANSWER)

Unnamed: 0,Description,Value
0,Session id,42
1,Target,diabetes
2,Target type,Binary
3,Original data shape,"(242362, 22)"
4,Transformed data shape,"(242362, 63)"
5,Transformed train set shape,"(169653, 63)"
6,Transformed test set shape,"(72709, 63)"
7,Numeric features,10
8,Categorical features,11
9,Preprocess,True


In [9]:
import matplotlib.pyplot as plt

def print_test_heat_map3(model, sampler, strategy):
    predictions = predict_model(model, data=test_features)
    cm = confusion_matrix(test_labels, predictions["prediction_label"])
    cm_total = cm.sum(axis=1)
    result = [[0.0,0.0,0.0],[0.0,0.0,0.0],[0.0,0.0,0.0]]    
    for row in range(0,3):
        for column in range(0,3):
            result[row][column] = cm[row][column]/(cm_total[row])
            
    total_percent = result[0][0] + result[1][1] + result[2][2]
    diabetes_percent = result[1][1] + result[2][2]
    # print(f"Total Percent Accurate {total_percent*100}")
    # print(f"Diabetes Percent Accurate {diabetes_percent*100}")
    
    print(f"{result[0][0]*100},{result[1][1]*100},{result[2][2]*100},{type(model).__name__},{sampler},{strategy},{total_percent*100},{diabetes_percent*100}")
    # sns.heatmap(result, annot=True, 
    #             fmt='.2%', cmap='Blues')
    # plt.show()
    # plt.figure()
    
def print_test_heat_map2(model, sampler, strategy):
    predictions = predict_model(model, data=test_features)
    cm = confusion_matrix(test_labels, predictions["prediction_label"])
    cm_total = cm.sum(axis=1)
    result = [[0.0,0.0,0.0],[0.0,0.0,0.0]]    
    for row in range(0,2):
        for column in range(0,2):
            result[row][column] = cm[row][column]/(cm_total[row])
            
    total_percent = result[0][0] + result[1][1]
    diabetes_percent = result[1][1]
    # print(f"Total Percent Accurate {total_percent*100}")
    # print(f"Diabetes Percent Accurate {diabetes_percent*100}")
    
    print(f"{result[0][0]*100},{result[1][1]*100},{type(model).__name__},{sampler},{strategy},{total_percent*100},{diabetes_percent*100}")

    # sns.heatmap(result, annot=True, 
    #             fmt='.2%', cmap='Blues')
    # plt.show()
    # plt.figure()

In [10]:
def savemodel(model, sampler, strategy):
    model_name = f'model_{type(model).__name__}_{sampler}_{strategy}'
    save_model(model, model_name)

In [11]:
samplers_working = pd.read_csv(r"..\3label.csv")
samplers_working = samplers_working.drop_duplicates(subset=['Sampler','type'])[['Sampler','type']].sort_values(by='Sampler')
samplers_working.drop([28], axis =0, inplace=True)  #duplicate
samplers_working

Unnamed: 0,Sampler,type
98,ADASYN,auto
70,AllKNN,auto
42,NearMiss,all
56,RandomOverSampler,auto
14,SMOTE,all
0,TomekLinks,all


In [12]:
def Sampler(df, target, sampler_name, strategy):
    sampler = globals()[sampler_name](sampling_strategy=strategy)

    under_train_labels = df[target]
    under_train_features = df.drop(columns=target)
    
    X, y = sampler.fit_resample(under_train_features, under_train_labels)
    
    sampled_data = X
    sampled_data[target] = y
    
    print(sampled_data[target].value_counts())
    return sampled_data    

In [13]:
samplers_working


Unnamed: 0,Sampler,type
98,ADASYN,auto
70,AllKNN,auto
42,NearMiss,all
56,RandomOverSampler,auto
14,SMOTE,all
0,TomekLinks,all


In [14]:
train_df.diabetes.value_counts()

0.0    203744
1.0     38618
Name: diabetes, dtype: int64

In [None]:
# #Save models using base data
# for index, row in samplers_working.iterrows():
#     s = setup(train_df, target = TARGET, session_id = 123,verbose=False)
#     best = compare_models(n_select=14,verbose=False)
#     for model in best:   
#         try:
#             savemodel(model, "default", "none") 
#         except:
#             print("An exception occurred")


In [None]:
# #Save models using samplers
# for index, row in samplers_working.iterrows():
#     sampled_data = Sampler(train_df, TARGET, row.Sampler, row.type)
#     sampled_data.diabetes.value_counts()
#     s = setup(sampled_data, target = TARGET, session_id = 123,verbose=False)
#     best = compare_models(n_select=14,verbose=False)
#     for model in best:   
#         print(row.Sampler, row.type) 
#         try:
#             savemodel(model, row.Sampler, row.type) 
#         except:
#             print("An exception occurred")
          
#         #print(f'\nSampler: {sampler}, Strategy: {strategy}')
#         #print_test_heat_map(model, row.Sampler, row.type)

In [16]:
import os

In [None]:
# for filename in os.listdir(r'.'):
#     if ".pkl" in filename:
#         # checking if it is a file
#         if os.path.isfile(filename):
#             print(filename)
#             model = load_model(filename)
#             print_test_heat_map2(model,'File','in')
            

In [None]:
# model

In [None]:
#  gbc_r_a = load_model("model_GradientBoostingClassifier_RandomOverSampler_auto")
#  gbc_s_a = load_model('model_GradientBoostingClassifier_SMOTE_all')
#  blender = blend_models([gbc_r_a,gbc_s_a])
#  print_test_heat_map3(blender,"a","b")

#  #98.40780587403802,0.0,12.704918032786885,VotingClassifier,a,b,111.1127239068249,12.704918032786885


In [None]:
# models = []
# results = []

# for i in np.arange(0.1,1,0.1):
#     model = create_model('lightgbm', learning_rate = i)
#     model_results = pull().loc[['Mean']]
#     models.append(model)
#     results.append(model_results)
    
# results = pd.concat(results, axis=0)
# results.index = np.arange(0.1,1,0.1)
# results.plot()

In [None]:
# model = load_model('model_LogisticRegression_SMOTE_all')
# print_test_heat_map3(model,"SMOTE","all")

# result = tune_model(model)
# result
# # print_test_heat_map3(result,"SMOTE","all")

In [None]:
# lr = create_model("lr")
# # import numpy as np

# weights = get_config("X_train").shape[0]
# weights

In [None]:
# print_test_heat_map3(result,"SMOTE","all")

In [None]:
# test_labels.value_counts()

In [None]:
# print(50936/(50936+8296+1359))
# print(1359/(50936+8296+1359))
# 8296/(50936+8296+1359)

In [None]:
# print_test_heat_map3(result,"SMOTE","all")

In [None]:
# model = load_model('model_GradientBoostingClassifier_SMOTE_all')
# model.class_weight = {0:.10, 1:.45,2:.45}
# print_test_heat_map3(model,"SMOTE","all")

# result = tune_model(model, optimize="F1" )
# result

In [None]:
# from sklearn.model_selection import GridSearchCV
# from sklearn.ensemble import GradientBoostingClassifier


In [17]:
test_labels = test_df[TARGET]
test_features = test_df.drop(columns=TARGET)

In [None]:
# #Valid parameters are: ['ccp_alpha', 'criterion', 'init', 'learning_rate', 'loss', 'max_depth', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'n_estimators', 'n_iter_no_change', 'random_state', 'subsample', 'tol', 'validation_fraction', 'verbose', 'warm_start']
# params = [{
#     'learning_rate': [0.16,0.15,0.14,0.13, 0.12],
# }]

# gs_knn = GridSearchCV(GradientBoostingClassifier(),
#                       param_grid=params,
#                       scoring='accuracy',
#                       cv=5)

# gs_knn.fit(test_features, test_labels)
# gs_knn.best_params_

In [None]:
# from joblib import parallel_backend
# #Valid parameters are: ['ccp_alpha', 'criterion', 'init', 'learning_rate', 'loss', 'max_depth', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'n_estimators', 'n_iter_no_change', 'random_state', 'subsample', 'tol', 'validation_fraction', 'verbose', 'warm_start']
# params = [{
#     'learning_rate': [0.22,0.21,0.20,0.19],
#     'n_estimators': [1,2,3],
#     'min_samples_split':[2],
#     'min_samples_leaf':[1],
#     'max_depth':[4,5,6,7,8,9],
#     'max_features':[3,4,5,6,7]
# }]

# gs_knn = GridSearchCV(GradientBoostingClassifier(),
#                       param_grid=params,
#                       scoring='accuracy',
#                       cv=5)

# with parallel_backend('threading'):
#     gs_knn.fit(test_features, test_labels)
    
# gs_knn.best_params_

# gs_knn.param_grid
# clf = GradientBoostingClassifier(params={'learning_rate': 0.19,
#  'max_depth': 6,
#  'max_features': 5,
#  'min_samples_leaf': 1,
#  'min_samples_split': 2,
#  'n_estimators': 2})
# print_test_heat_map3(clf,"Test","Test")

In [None]:
# clf = GradientBoostingClassifier(**gs_knn.best_params_)
# clf.fit(test_features, test_labels)
# print_test_heat_map3(clf,"Test","Test")

In [None]:
import time
for filename in os.listdir(r'.'):
    if ".pkl" in filename:
        # checking if it is a file
        if os.path.isfile(filename):
            #start_time = time.time()
            filename = filename[:-4] #remove .pkl
            model=load_model(filename)
            print_test_heat_map2(model,filename,"Pre")
            result = tune_model(model, optimize = 'Recall', verbose=False )            
            print_test_heat_map2(result,filename,"Post") 
            #print( time.time() - start_time, "seconds")

In [None]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(solver='newton-cg')

#Setting the range for class weights
weights = np.linspace(0.0,0.99,200)

#Creating a dictionary grid for grid search
param_grid = {'class_weight': [{0:x, 1:1.0-x} for x in weights]}

#Fitting grid search to the train data with 5 folds
gridsearch = GridSearchCV(estimator= lr, 
                          param_grid= param_grid,
                          cv=StratifiedKFold(), 
                          n_jobs=-1, 
                          scoring='f1', 
                          verbose=2).fit(test_features, test_labels)

#Ploting the score for different values of weight
sns.set_style('whitegrid')
plt.figure(figsize=(12,8))
weigh_data = pd.DataFrame({ 'score': gridsearch.cv_results_['mean_test_score'], 'weight': (1- weights)})
sns.lineplot(weigh_data['weight'], weigh_data['score'])
plt.xlabel('Weight for class 1')
plt.ylabel('F1 score')
plt.xticks([round(i/10,1) for i in range(0,11,1)])
plt.title('Scoring for different class weights', fontsize=24)

In [None]:
lightgbm = create_model('lightgbm', class_weight = {0:.10, 1:.45,2:.45})
print_test_heat_map3(lightgbm,"lightgbm","?")

In [None]:
lightgbm_tuned = tune_model(lightgbm, optimize="Precision")
print_test_heat_map3(lightgbm_tuned,"lightgbm_tuned","?")


In [None]:
lightgbm_tuned

In [None]:
weights = [
    {0:0.10, 1:0.45, 2:0.45},
    {0:0.05, 1:0.55, 2:0.45},
]

for weight in weights:
    lightgbm = create_model('lightgbm', class_weight = weight, verbose=False)
    print_test_heat_map3(lightgbm,"lightgbm",f'{weight[0]},{weight[1]},{weight[2]}' )

In [None]:
weight[0]
           

42	NearMiss	all
56	RandomOverSampler	auto
14	SMOTE	all
0	TomekLinks	all

In [None]:
sampler_name = 'SMOTE'
sampler_type = 'all'

sampler_name = 'RandomOverSampler'
sampler_type = 'auto'

sampler_name = 'TomekLinks'
sampler_type = 'auto'

sampler_name = 'ADASYN'
sampler_type = 'auto'




In [None]:
sampled_data = Sampler(train_df, TARGET, sampler_name, sampler_type)
s = setup(sampled_data, target = TARGET, session_id = 123,verbose=False)

In [None]:
classifiers = ['lr','ridge','lightgbm']
#classifiers=['lightgbm']

In [None]:
for clf in classifiers:
    for i in range(15,26,3):
        for j in range(20,51,5):  
            k = 100-i-j
            lightgbm = create_model(clf, class_weight = {0:i,1:j,2:k}, verbose=False)
            print_test_heat_map3(lightgbm,clf,f'{i},{j},{k},{sampler_name},{sampler_type}' )
        
            

22.25145280351814,0.22075055187637968,98.45708775313405,LGBMClassifier,lightgbm,1,1,98,SMOTE,all,120.92929110852857,98.67783830501043
20.981231349144025,0.0,99.25265188042431,LGBMClassifier,lightgbm,1,11,88,SMOTE,all,120.23388322956832,99.25265188042431
22.25145280351814,0.0,98.93924783027965,LGBMClassifier,lightgbm,1,21,78,SMOTE,all,121.1907006337978,98.93924783027965
22.699073346945188,0.0,98.79459980713598,LGBMClassifier,lightgbm,1,31,68,SMOTE,all,121.49367315408116,98.79459980713598
24.126354641118265,1.1037527593818985,98.43297974927677,LGBMClassifier,lightgbm,1,41,58,SMOTE,all,123.66308714977694,99.53673250865866
26.089602638605307,3.384841795437822,97.61330761812921,LGBMClassifier,lightgbm,1,51,48,SMOTE,all,127.08775205217235,100.99814941356703
27.324485629024657,6.769683590875644,95.20250723240116,LGBMClassifier,lightgbm,1,61,38,SMOTE,all,129.29667645230145,101.9721908232768
29.24650541856447,15.967623252391464,89.09112825458052,LGBMClassifier,lightgbm,1,71,28,SMOTE,all,134.30525692553644,105.05875150697199
30.79550808858175,33.11258278145696,77.22999035679847,LGBMClassifier,lightgbm,1,81,18,SMOTE,all,141.13808122683716,110.34257313825542
32.33665776660908,59.01398086828551,48.384763741562196,LGBMClassifier,lightgbm,1,91,8,SMOTE,all,139.7354023764568,107.39874460984771
58.37325270928224,0.0,88.80183220829315,LGBMClassifier,lightgbm,11,1,88,SMOTE,all,147.1750849175754,88.80183220829315
58.95044762054342,0.0,88.57280617164899,LGBMClassifier,lightgbm,11,11,78,SMOTE,all,147.5232537921924,88.57280617164899
60.81356997015863,0.0,87.27097396335583,LGBMClassifier,lightgbm,11,21,68,SMOTE,all,148.08454393351445,87.27097396335583
63.23229150306267,0.0,85.59546769527483,LGBMClassifier,lightgbm,11,31,58,SMOTE,all,148.82775919833747,85.59546769527483
66.00832417150934,0.07358351729212656,83.2208293153327,LGBMClassifier,lightgbm,11,41,48,SMOTE,all,149.30273700413417,83.29441283262481
70.22538087011151,0.29433406916850624,78.56798457087754,LGBMClassifier,lightgbm,11,51,38,SMOTE,all,149.08769951015753,78.86231864004604
75.91094707083398,2.1339220014716704,69.50337512054003,LGBMClassifier,lightgbm,11,61,28,SMOTE,all,147.5482441928457,71.63729712201169
82.39555520653369,5.371596762325239,54.182738669238184,LGBMClassifier,lightgbm,11,71,18,SMOTE,all,141.94989063809712,59.55433543156342
89.57907962933878,12.803532008830022,23.5053037608486,LGBMClassifier,lightgbm,11,81,8,SMOTE,all,125.8879153990174,36.30883576967862
72.2278938275483,0.0,76.10896817743492,LGBMClassifier,lightgbm,21,1,78,SMOTE,all,148.3368620049832,76.10896817743492
72.58127846709597,0.0,75.40983606557377,LGBMClassifier,lightgbm,21,11,68,SMOTE,all,147.99111453266974,75.40983606557377
75.42209831945972,0.0,71.91417550626808,LGBMClassifier,lightgbm,21,21,58,SMOTE,all,147.3362738257278,71.91417550626808
78.88526778702686,0.0,66.61041465766635,LGBMClassifier,lightgbm,21,31,48,SMOTE,all,145.49568244469322,66.61041465766635
83.42233390921942,0.07358351729212656,58.510125361620055,LGBMClassifier,lightgbm,21,41,38,SMOTE,all,142.0060427881316,58.58370887891219
88.30689492696717,0.29433406916850624,47.04676952748312,LGBMClassifier,lightgbm,21,51,28,SMOTE,all,135.6479985236188,47.34110359665163
93.81380555991832,1.545253863134658,29.423818707810995,LGBMClassifier,lightgbm,21,61,18,SMOTE,all,124.78287813086398,30.969072570945656
98.03871525051045,2.207505518763797,7.509643201542912,LGBMClassifier,lightgbm,21,71,8,SMOTE,all,107.75586397081716,9.717148720306708
82.66059368619445,0.0,59.24541947926711,LGBMClassifier,lightgbm,31,1,68,SMOTE,all,141.90601316546156,59.24541947926711
83.33595099732997,0.0,58.23288331726133,LGBMClassifier,lightgbm,31,11,58,SMOTE,all,141.5688343145913,58.23288331726133
86.40450761740223,0.0,51.27772420443587,LGBMClassifier,lightgbm,31,21,48,SMOTE,all,137.6822318218381,51.27772420443587
90.60782158002199,0.0,41.52603664416586,LGBMClassifier,lightgbm,31,31,38,SMOTE,all,132.13385822418786,41.52603664416586
94.48719962305638,0.0,29.194792671166823,LGBMClassifier,lightgbm,31,41,28,SMOTE,all,123.6819922942232,29.194792671166823
97.77171352285221,0.14716703458425312,14.163452266152362,LGBMClassifier,lightgbm,31,51,18,SMOTE,all,112.08233282358881,14.310619300736615
99.64268886445737,0.36791758646063283,1.9768563162970105,LGBMClassifier,lightgbm,31,61,8,SMOTE,all,101.98746276721499,2.3447739027576433
90.66279252395162,0.0,40.6099324975892,LGBMClassifier,lightgbm,41,1,58,SMOTE,all,131.27272502154085,40.6099324975892
91.60907805874038,0.0,38.23529411764706,LGBMClassifier,lightgbm,41,11,48,SMOTE,all,129.84437217638742,38.23529411764706
94.79346631066437,0.0,27.736258437801347,LGBMClassifier,lightgbm,41,21,38,SMOTE,all,122.52972474846571,27.736258437801347
97.44385110727187,0.0,16.441658630665383,LGBMClassifier,lightgbm,41,31,28,SMOTE,all,113.88550973793726,16.441658630665383
99.31286320087953,0.0,5.8944069431051105,LGBMClassifier,lightgbm,41,41,18,SMOTE,all,105.20727014398463,5.8944069431051105
99.96073504005027,0.0,0.20491803278688525,LGBMClassifier,lightgbm,41,51,8,SMOTE,all,100.16565307283716,0.20491803278688525
96.26982880477462,0.0,21.51639344262295,LGBMClassifier,lightgbm,51,1,48,SMOTE,all,117.78622224739756,21.51639344262295
97.11009894769907,0.0,17.55062680810029,LGBMClassifier,lightgbm,51,11,38,SMOTE,all,114.66072575579935,17.55062680810029
98.83775718548767,0.0,8.751205400192864,LGBMClassifier,lightgbm,51,21,28,SMOTE,all,107.58896258568053,8.751205400192864
99.83901366420606,0.0,1.603182256509161,LGBMClassifier,lightgbm,51,31,18,SMOTE,all,101.44219592071522,1.603182256509161
99.98233076802262,0.0,0.08437801350048216,LGBMClassifier,lightgbm,51,41,8,SMOTE,all,100.0667087815231,0.08437801350048216
99.00855976126904,0.0,7.473481195756991,LGBMClassifier,lightgbm,61,1,38,SMOTE,all,106.48204095702603,7.473481195756991
99.56808544055285,0.0,3.7367405978784953,LGBMClassifier,lightgbm,61,11,28,SMOTE,all,103.30482603843134,3.7367405978784953
99.9548452960578,0.0,0.27724204435872707,LGBMClassifier,lightgbm,61,21,18,SMOTE,all,100.23208734041651,0.27724204435872707
99.99607350400503,0.0,0.03616200578592093,LGBMClassifier,lightgbm,61,31,8,SMOTE,all,100.03223550979095,0.03616200578592093
99.91754358410554,0.0,0.5544840887174541,LGBMClassifier,lightgbm,71,1,28,SMOTE,all,100.47202767282299,0.5544840887174541
99.99411025600754,0.0,0.06027000964320155,LGBMClassifier,lightgbm,71,11,18,SMOTE,all,100.05438026565075,0.06027000964320155
100.0,0.0,0.0,LGBMClassifier,lightgbm,71,21,8,SMOTE,all,100.0,0.0
100.0,0.0,0.0,LGBMClassifier,lightgbm,81,1,18,SMOTE,all,100.0,0.0
100.0,0.0,0.012054001928640309,LGBMClassifier,lightgbm,81,11,8,SMOTE,all,100.01205400192863,0.012054001928640309
100.0,0.0,0.0,LGBMClassifier,lightgbm,91,1,8,SMOTE,all,100.0,0.0
Classification
None, Pre, Diabetes
None	Pre	Diab	Classifier	Sampler	Mode	Total	Diab2
47	62.284043	32.450331	63.090646	GradientBoostingClassifier	RandomOverSampler	auto	157.825020	95.540977
46	62.325271	28.550405	65.923337	LGBMClassifier	RandomOverSampler	auto	156.799012	94.473741
48	64.202136	31.714496	60.342334	AdaBoostClassifier	RandomOverSampler	auto	156.258966	92.056830
63	63.833045	33.995585	57.581967	LogisticRegression	SMOTE	all	155.410598	91.577552
49	64.469138	31.420162	59.365959	LogisticRegression	RandomOverSampler	auto	155.255259	90.786121
50	62.884797	35.246505	56.231919	LinearDiscriminantAnalysis	RandomOverSampler	auto	154.363220	91.478424
64	62.142689	37.380427	54.689007	LinearDiscriminantAnalysis	SMOTE	all	154.212122	92.069434
8	62.117167	36.129507	55.810029	LinearDiscriminantAnalysis	ADASYN	auto	154.056703	91.939536
7	63.642610	32.008830	58.208775	LogisticRegression	ADASYN	auto	153.860216	90.217605
9	66.862337	27.740986	58.570395	RidgeClassifier	ADASYN	auto	153.173718	86.311381
65	67.364929	26.490066	59.185149	RidgeClassifier	SMOTE	all	153.040144	85.675216
51	68.269986	22.958057	61.366924	RidgeClassifier	RandomOverSampler	auto	152.594967	84.324981
52	67.101853	9.271523	75.204918	SGDClassifier	RandomOverSampler	auto	151.578295	84.476441
11	66.732763	19.793966	64.995178	SGDClassifier	ADASYN	auto	151.521907	84.789145
10	58.443930	26.122149	66.188525	GaussianNB	ADASYN	auto	150.754603	92.310673
66	58.481231	30.316409	61.620058	GaussianNB	SMOTE	all	150.417698	91.936467
67	75.378907	17.071376	56.846673	SGDClassifier	SMOTE	all	149.296956	73.918049
53	69.375294	17.586461	60.920926	GaussianNB	RandomOverSampler	auto	147.882681	78.507386
62	76.774776	3.311258	66.321119	AdaBoostClassifier	SMOTE	all	146.407153	69.632377
6	74.799749	5.518764	66.043877	AdaBoostClassifier	ADASYN	auto	146.362389	71.562640
61	83.088582	0.441501	59.763742	GradientBoostingClassifier	SMOTE	all	143.293824	60.205243
5	82.949191	0.220751	59.727580	GradientBoostingClassifier	ADASYN	auto	142.897521	59.948330
96	84.001492	2.722590	48.987464	GaussianNB	Base	None	135.711546	51.710054
97	86.867834	3.752759	27.603664	DecisionTreeClassifier	Base	None	118.224258	31.356424
92	97.259306	0.000000	17.044359	LinearDiscriminantAnalysis	Base	None	114.303665	17.044359
94	95.614104	0.367918	17.369817	KNeighborsClassifier	Base	None	113.351838	17.737734
86	97.801162	0.000000	15.537608	AdaBoostClassifier	Base	None	113.338771	15.537608
95	95.162557	0.147167	17.514465	ExtraTreesClassifier	Base	None	112.824189	17.661632
93	95.940003	0.147167	16.369335	RandomForestClassifier	Base	None	112.456505	16.516502
85	98.081907	0.000000	14.163452	GradientBoostingClassifier	Base	None	112.245359	14.163452
87	97.889508	0.000000	13.729508	LogisticRegression	Base	None	111.619017	13.729508
84	98.399953	0.000000	12.258920	LGBMClassifier	Base	None	110.658873	12.258920
88	99.795822	0.000000	1.952748	RidgeClassifier	Base	None	101.748571	1.952748
89	100.000000	0.000000	0.000000	QuadraticDiscriminantAnalysis	Base	None	100.000000	0.000000
90	100.000000	0.000000	0.000000	DummyClassifier	Base	None	100.000000	0.000000
91	100.000000	0.000000	0.000000	SGDClassifier	Base	None	100.000000	0.000000
Binary
None, Pre+Diabetes
None	Diab	Classifier	Sampler	Mode	Total	Diab2
18	69.334668	78.868863	LGBMClassifier	RandomOverSampler	auto	148.203531	78.868863
8	67.934900	78.568469	LinearDiscriminantAnalysis	ADASYN	auto	146.503369	78.568469
7	67.934900	78.568469	RidgeClassifier	ADASYN	auto	146.503369	78.568469
19	69.835287	78.371659	GradientBoostingClassifier	RandomOverSampler	auto	148.206946	78.371659
11	65.537821	77.874456	GaussianNB	ADASYN	auto	143.412277	77.874456
9	69.008776	77.460120	LogisticRegression	ADASYN	auto	146.468896	77.460120
35	69.717494	76.993992	RidgeClassifier	SMOTE	all	146.711486	76.993992
36	69.717494	76.993992	LinearDiscriminantAnalysis	SMOTE	all	146.711486	76.993992
21	70.257377	76.527864	RidgeClassifier	RandomOverSampler	auto	146.785241	76.527864
22	70.257377	76.527864	LinearDiscriminantAnalysis	RandomOverSampler	auto	146.785241	76.527864
20	71.266466	76.299979	AdaBoostClassifier	RandomOverSampler	auto	147.566446	76.299979
23	71.091741	75.750984	LogisticRegression	RandomOverSampler	auto	146.842725	75.750984
37	71.036771	75.699192	LogisticRegression	SMOTE	all	146.735963	75.699192
39	68.245087	75.077688	GaussianNB	SMOTE	all	143.322775	75.077688
38	73.651766	71.452248	SGDClassifier	SMOTE	all	145.104014	71.452248
24	75.452029	69.680961	SGDClassifier	RandomOverSampler	auto	145.132990	69.680961
10	75.457919	68.935156	SGDClassifier	ADASYN	auto	144.393075	68.935156
25	74.721715	66.034804	GaussianNB	RandomOverSampler	auto	140.756519	66.034804
17	68.969511	64.781438	KNeighborsClassifier	RandomOverSampler	auto	133.750949	64.781438
6	77.741131	64.512119	AdaBoostClassifier	ADASYN	auto	142.253251	64.512119
34	78.630465	63.900974	AdaBoostClassifier	SMOTE	all	142.531439	63.900974
5	68.009502	63.807748	KNeighborsClassifier	ADASYN	auto	131.817250	63.807748
33	70.241671	62.481873	KNeighborsClassifier	SMOTE	all	132.723544	62.481873
32	84.101930	55.127408	GradientBoostingClassifier	SMOTE	all	139.229338	55.127408
4	84.618254	53.428631	GradientBoostingClassifier	ADASYN	auto	138.046885	53.428631
55	84.099967	48.746634	GaussianNB	TomekLinks	all	132.846600	48.746634
68	84.149047	47.866169	GaussianNB	Base	None	132.015216	47.866169
69	86.834717	30.909468	DecisionTreeClassifier	Base	None	117.744185	30.909468
67	94.237980	19.618811	KNeighborsClassifier	Base	None	113.856791	19.618811
66	94.724856	18.645121	ExtraTreesClassifier	Base	None	113.369977	18.645121
65	95.368789	17.837166	RandomForestClassifier	Base	None	113.205955	17.837166
61	96.862791	16.656308	LinearDiscriminantAnalysis	Base	None	113.519100	16.656308
58	97.331998	16.428423	AdaBoostClassifier	Base	None	113.760422	16.428423
56	97.752125	14.605345	GradientBoostingClassifier	Base	None	112.357470	14.605345
60	97.544025	14.232443	LogisticRegression	Base	None	111.776467	14.232443
57	97.985747	13.714522	LGBMClassifier	Base	None	111.700270	13.714522
59	99.579873	3.148954	RidgeClassifier	Base	None	102.728827	3.148954
62	100.000000	0.000000	QuadraticDiscriminantAnalysis	Base	None	100.000000	0.000000
63	100.000000	0.000000	DummyClassifier	Base	None	100.000000	0.000000
64	100.000000	0.000000	SGDClassifier	Base	None	100.000000	0.000000
 	Description	Value
0	Session id	42
1	Target	diabetes
2	Target type	Multiclass
3	Original data shape	(242362, 22)
4	Transformed data shape	(242362, 63)
5	Transformed train set shape	(169653, 63)
6	Transformed test set shape	(72709, 63)
7	Numeric features	10
8	Categorical features	11
9	Preprocess	True
10	Imputation type	simple
11	Numeric imputation	mean
12	Categorical imputation	mode
13	Maximum one-hot encoding	25
14	Encoding method	None
15	Fold Generator	StratifiedKFold
16	Fold Number	10
17	CPU Jobs	-1
18	Use GPU	False
19	Log Experiment	False
20	Experiment Name	clf-default-name
21	USI	5c44
array(['bmi', 'smoker', 'any_healthcare_insurance',
       'general_health_status', 'mental_health_status',
       'physical_health_status', 'age', 'education', 'income', 'race',
       'routine_checkup'], dtype=object)
0.0    5434
1.0    5434
2.0    5434
Name: diabetes, dtype: int64
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
Transformation Pipeline and Model Successfully Saved
NearMiss all
...
TomekLinks all
Transformation Pipeline and Model Successfully Saved
TomekLinks all
Transformation Pipeline and Model Successfully Saved
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
Sampler	type
42	NearMiss	all
56	RandomOverSampler	auto
14	SMOTE	all
0	TomekLinks	all
model_AdaBoostClassifier_ADASYN_auto.pkl
model_AdaBoostClassifier_base_none.pkl
model_AdaBoostClassifier_NearMiss_all.pkl
model_AdaBoostClassifier_RandomOverSampler_auto.pkl
model_AdaBoostClassifier_SMOTE_all.pkl
model_AdaBoostClassifier_TomekLinks_all.pkl
model_DecisionTreeClassifier_ADASYN_auto.pkl
model_DecisionTreeClassifier_base_none.pkl
model_DecisionTreeClassifier_NearMiss_all.pkl
model_DecisionTreeClassifier_RandomOverSampler_auto.pkl
model_DecisionTreeClassifier_SMOTE_all.pkl
model_DecisionTreeClassifier_TomekLinks_all.pkl
model_DummyClassifier_ADASYN_auto.pkl
model_DummyClassifier_base_none.pkl
model_DummyClassifier_NearMiss_all.pkl
model_DummyClassifier_RandomOverSampler_auto.pkl
model_DummyClassifier_SMOTE_all.pkl
model_DummyClassifier_TomekLinks_all.pkl
model_ExtraTreesClassifier_ADASYN_auto.pkl
model_ExtraTreesClassifier_base_none.pkl
model_ExtraTreesClassifier_NearMiss_all.pkl
model_ExtraTreesClassifier_RandomOverSampler_auto.pkl
model_ExtraTreesClassifier_SMOTE_all.pkl
model_ExtraTreesClassifier_TomekLinks_all.pkl
model_GaussianNB_ADASYN_auto.pkl
...
model_SGDClassifier_NearMiss_all.pkl
model_SGDClassifier_RandomOverSampler_auto.pkl
model_SGDClassifier_SMOTE_all.pkl
model_SGDClassifier_TomekLinks_all.pkl
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded
 	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Fold	 	 	 	 	 	 	 
0	0.8606	0.8162	0.8606	0.8119	0.8191	0.1516	0.2045
1	0.8598	0.8198	0.8598	0.8092	0.8157	0.1311	0.1857
2	0.8607	0.8080	0.8607	0.8106	0.8169	0.1383	0.1950
3	0.8615	0.8069	0.8615	0.8142	0.8192	0.1499	0.2068
4	0.8620	0.8222	0.8620	0.8148	0.8195	0.1523	0.2106
5	0.8604	0.8123	0.8604	0.8100	0.8166	0.1373	0.1930
6	0.8618	0.8153	0.8618	0.8125	0.8175	0.1414	0.2017
7	0.8594	0.8127	0.8594	0.8079	0.8161	0.1357	0.1877
8	0.8605	0.8167	0.8605	0.8110	0.8157	0.1299	0.1876
9	0.8606	0.8137	0.8606	0.8113	0.8190	0.1523	0.2050
Mean	0.8607	0.8144	0.8607	0.8114	0.8175	0.1420	0.1977
Std	0.0008	0.0045	0.0008	0.0020	0.0015	0.0084	0.0086
98.40780587403802,0.0,12.704918032786885,VotingClassifier,a,b,111.1127239068249,12.704918032786885
Pipeline
numerical_imputer: TransformerWrapper
transformer: SimpleImputer

SimpleImputer
categorical_imputer: TransformerWrapper
TransformerWrapper(exclude=None, include=[],
                   transformer=SimpleImputer(add_indicator=False, copy=True,
                                             fill_value=None,
                                             keep_empty_features=False,
                                             missing_values=nan,
                                             strategy='most_frequent',
                                             verbose='deprecated'))
transformer: SimpleImputer

SimpleImputer
SimpleImputer(strategy='most_frequent')

GradientBoostingClassifier
GradientBoostingClassifier(random_state=123)
Transformation Pipeline and Model Successfully Loaded
83.08858174964662,0.44150110375275936,59.763741562198646,Pipeline,SMOTE,all,143.29382441559804,60.20524266595141
 	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Fold	 	 	 	 	 	 	 
0	0.8580	0.8073	0.8580	0.8076	0.8193	0.1599	0.2024
1	0.8567	0.8065	0.8567	0.8095	0.8187	0.1584	0.1978
2	0.8572	0.7999	0.8572	0.8054	0.8177	0.1509	0.1930
3	0.8584	0.7999	0.8584	0.8100	0.8216	0.1728	0.2140
4	0.8587	0.8116	0.8587	0.8095	0.8210	0.1692	0.2118
5	0.8572	0.7998	0.8572	0.8071	0.8194	0.1614	0.2016
6	0.8586	0.8037	0.8586	0.8076	0.8188	0.1556	0.2008
7	0.8563	0.7995	0.8563	0.8054	0.8182	0.1554	0.1946
8	0.8592	0.8065	0.8592	0.8149	0.8212	0.1695	0.2135
9	0.8599	0.8045	0.8599	0.8125	0.8235	0.1835	0.2261
Mean	0.8580	0.8039	0.8580	0.8089	0.8199	0.1637	0.2056
Std	0.0011	0.0039	0.0011	0.0029	0.0017	0.0094	0.0099
Fitting 10 folds for each of 10 candidates, totalling 100 fits

GradientBoostingClassifier
GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.15, loss='log_loss', max_depth=7,
                           max_features=1.0, max_leaf_nodes=None,
                           min_impurity_decrease=0.02, min_samples_leaf=5,
                           min_samples_split=5, min_weight_fraction_leaf=0.0,
                           n_estimators=230, n_iter_no_change=None,
                           random_state=123, subsample=0.85, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)
Transformation Pipeline and Model Successfully Loaded
63.8330453902937,33.99558498896248,57.58196721311475,Pipeline,SMOTE,all,155.41059759237092,91.57755220207721
 	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Fold	 	 	 	 	 	 	 
0	0.8570	0.8055	0.8570	0.8035	0.8149	0.1318	0.1765
1	0.8600	0.8113	0.8600	0.8104	0.8175	0.1421	0.1950
2	0.8565	0.7979	0.8565	0.8009	0.8123	0.1165	0.1620
3	0.8601	0.8007	0.8601	0.8105	0.8183	0.1474	0.1995
4	0.8596	0.8137	0.8596	0.8091	0.8177	0.1452	0.1958
5	0.8576	0.8043	0.8576	0.8029	0.8140	0.1267	0.1744
6	0.8590	0.8086	0.8590	0.8058	0.8147	0.1285	0.1806
7	0.8594	0.8063	0.8594	0.8084	0.8169	0.1401	0.1915
8	0.8610	0.8070	0.8610	0.8131	0.8178	0.1418	0.1989
9	0.8591	0.8039	0.8591	0.8083	0.8181	0.1494	0.1976
Mean	0.8589	0.8059	0.8589	0.8073	0.8162	0.1370	0.1872
Std	0.0014	0.0044	0.0014	0.0037	0.0020	0.0101	0.0123
Fitting 10 folds for each of 10 candidates, totalling 100 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
98.14080414637976,0.0,12.270973963355834,LogisticRegression,SMOTE,all,110.4117781097356,12.270973963355834
{'learning_rate': 0.13}
{'learning_rate': 0.19,
 'max_depth': 6,
 'max_features': 5,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'n_estimators': 2}
100.0,0.14716703458425312,0.0,GradientBoostingClassifier,Test,Test,100.14716703458426,0.14716703458425312
Model:model_AdaBoostClassifier_ADASYN_auto.pkl
Transformation Pipeline and Model Successfully Loaded
77.03392492539658,3.384841795437822,66.97203471552555,Pipeline,filename,Pre,147.39080143635994,70.35687651096337
 	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Fold	 	 	 	 	 	 	 
0	0.8591	0.8097	0.8591	0.8084	0.8179	0.1471	0.1957
1	0.8615	0.8142	0.8615	0.8141	0.8194	0.1513	0.2078
2	0.8605	0.8021	0.8605	0.8101	0.8168	0.1379	0.1937
3	0.8611	0.8027	0.8611	0.8127	0.8189	0.1496	0.2049
4	0.8610	0.8155	0.8610	0.8123	0.8189	0.1504	0.2052
5	0.8579	0.8064	0.8579	0.8031	0.8136	0.1236	0.1726
6	0.8612	0.8113	0.8612	0.8113	0.8165	0.1357	0.1952
7	0.8588	0.8083	0.8588	0.8068	0.8160	0.1361	0.1861
8	0.8617	0.8108	0.8617	0.8146	0.8186	0.1454	0.2041
9	0.8604	0.8078	0.8604	0.8112	0.8198	0.1573	0.2079
Mean	0.8603	0.8089	0.8603	0.8105	0.8176	0.1435	0.1973
Std	0.0012	0.0042	0.0012	0.0034	0.0018	0.0095	0.0107
Fitting 10 folds for each of 10 candidates, totalling 100 fits
98.27626825820637,0.0,12.53616200578592,AdaBoostClassifier,filename,Post,110.8124302639923,12.53616200578592
265.0840497016907 seconds
Model:model_AdaBoostClassifier_base_none.pkl
Transformation Pipeline and Model Successfully Loaded
97.90521438668132,0.0,14.729990356798456,Pipeline,filename,Pre,112.63520474347979,14.729990356798456
Initiated	. . . . . . . . . . . . . . . . . .	17:28:49
Status	. . . . . . . . . . . . . . . . . .	Fitting 10 Folds
Estimator	. . . . . . . . . . . . . . . . . .	Ada Boost Classifier
Fitting 10 folds for each of 10 candidates, totalling 100 fits
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
c:\Springboard\mec-mini-projects\Capstone\pyCaret\3label.ipynb Cell 29 line 1
      9 model=load_model(filename)
     10 print_test_heat_map3(model,"filename","Pre")
---> 11 result = tune_model(model)            
     12 print_test_heat_map3(result,"filename","Post") 
     13 print( time.time() - start_time, "seconds"          )

File c:\Users\djhar\anaconda3\envs\pyCaret\lib\site-packages\pycaret\utils\generic.py:965, in check_if_global_is_not_none.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    963     if globals_d[name] is None:
    964         raise ValueError(message)
--> 965 return func(*args, **kwargs)

File c:\Users\djhar\anaconda3\envs\pyCaret\lib\site-packages\pycaret\classification\functional.py:1208, in tune_model(estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
   1017 @check_if_global_is_not_none(globals(), _CURRENT_EXPERIMENT_DECORATOR_DICT)
   1018 def tune_model(
   1019     estimator,
   (...)
   1037     **kwargs,
   1038 ) -> Any:
   1039     """
   1040     This function tunes the hyperparameters of a given estimator. The output of
   1041     this function is a score grid with CV scores by fold of the best selected
   (...)
...
   1710 # We need to be careful: the job list can be filling up as
   1711 # we empty it and Python list are not thread-safe by
   1712 # default hence the use of the lock

KeyboardInterrupt: 
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
 	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Fold	 	 	 	 	 	 	 
0	0.8570	0.8055	0.8570	0.8035	0.8149	0.1318	0.1765
1	0.8599	0.8113	0.8599	0.8102	0.8174	0.1415	0.1943
2	0.8565	0.7979	0.8565	0.8009	0.8123	0.1165	0.1620
3	0.8601	0.8006	0.8601	0.8104	0.8181	0.1468	0.1988
4	0.8595	0.8137	0.8595	0.8090	0.8177	0.1450	0.1955
5	0.8576	0.8043	0.8576	0.8029	0.8140	0.1267	0.1744
6	0.8590	0.8086	0.8590	0.8058	0.8147	0.1285	0.1806
7	0.8594	0.8063	0.8594	0.8084	0.8169	0.1401	0.1915
8	0.8610	0.8070	0.8610	0.8131	0.8178	0.1418	0.1989
9	0.8591	0.8039	0.8591	0.8082	0.8180	0.1488	0.1970
Mean	0.8589	0.8059	0.8589	0.8072	0.8162	0.1368	0.1870
Std	0.0014	0.0044	0.0014	0.0037	0.0019	0.0099	0.0121
161775
97.60091094707083,0.0,14.633558341369335,GradientBoostingClassifier,SMOTE,all,112.23446928844017,14.633558341369335
Fitting 5 folds for each of 200 candidates, totalling 1000 fits
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
c:\Springboard\mec-mini-projects\Capstone\pyCaret\3label.ipynb Cell 32 line 2
     21 plt.figure(figsize=(12,8))
     22 weigh_data = pd.DataFrame({ 'score': gridsearch.cv_results_['mean_test_score'], 'weight': (1- weights)})
---> 23 sns.lineplot(weigh_data['weight'], weigh_data['score'])
     24 plt.xlabel('Weight for class 1')
     25 plt.ylabel('F1 score')

TypeError: lineplot() takes from 0 to 1 positional arguments but 2 were given
<Figure size 1200x800 with 0 Axes>
0.0    50936
2.0     8296
1.0     1359
Name: diabetes, dtype: int64
0.13691802412899606
0.8406529022462081
0.02242907362479576
97.60091094707083,0.0,14.633558341369335,GradientBoostingClassifier,SMOTE,all,112.23446928844017,14.633558341369335
Sampler	type
42	NearMiss	all
56	RandomOverSampler	auto
14	SMOTE	all
0	TomekLinks	all
79.2268729385896,0.07358351729212656,68.70781099324977,LGBMClassifier,lightgbm,?,148.0082674491315,68.78139451054189
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
c:\Springboard\mec-mini-projects\Capstone\pyCaret\3label.ipynb Cell 38 line 1
----> 1 lightgbm = create_model('GradientBoostingClassifier', class_weight = {0:.10, 1:.45,2:.45})
      2 print_test_heat_map3(lightgbm,"lightgbm","?")

File c:\Users\djhar\anaconda3\envs\pyCaret\lib\site-packages\pycaret\utils\generic.py:965, in check_if_global_is_not_none.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    963     if globals_d[name] is None:
    964         raise ValueError(message)
--> 965 return func(*args, **kwargs)

File c:\Users\djhar\anaconda3\envs\pyCaret\lib\site-packages\pycaret\classification\functional.py:1001, in create_model(estimator, fold, round, cross_validation, fit_kwargs, groups, probability_threshold, experiment_custom_tags, engine, verbose, return_train_score, **kwargs)
    872 @check_if_global_is_not_none(globals(), _CURRENT_EXPERIMENT_DECORATOR_DICT)
    873 def create_model(
    874     estimator: Union[str, Any],
   (...)
    885     **kwargs,
    886 ) -> Any:
    887     """
    888     This function trains and evaluates the performance of a given estimator
    889     using cross validation. The output of this function is a score grid with
   (...)
    998 
    999     """
-> 1001     return _CURRENT_EXPERIMENT.create_model(
...
   1339     raise ValueError(
   1340         f"Estimator {estimator} does not have the required fit() method."
   1341     )

ValueError: Estimator GradientBoostingClassifier not available. Please see docstring for list of available estimators.
Output is truncated. View as a scrollable element or open in a text editor. Adjust cell output settings...
 	Accuracy	AUC	Recall	Prec.	F1	Kappa	MCC
Fold	 	 	 	 	 	 	 
0	0.7697	0.8168	0.7697	0.8402	0.7922	0.3162	0.3427
1	0.7706	0.8206	0.7706	0.8434	0.7936	0.3239	0.3523
2	0.7634	0.8074	0.7634	0.8350	0.7868	0.2971	0.3219
3	0.7653	0.8075	0.7653	0.8388	0.7887	0.3091	0.3363
4	0.7730	0.8223	0.7730	0.8430	0.7950	0.3259	0.3532
5	0.7632	0.8121	0.7632	0.8391	0.7876	0.3068	0.3346
6	0.7728	0.8162	0.7728	0.8411	0.7945	0.3215	0.3476
7	0.7689	0.8124	0.7689	0.8398	0.7916	0.3142	0.3406
8	0.7711	0.8178	0.7711	0.8412	0.7934	0.3195	0.3461
9	0.7704	0.8129	0.7704	0.8382	0.7922	0.3119	0.3367
Mean	0.7688	0.8146	0.7688	0.8400	0.7916	0.3146	0.3412
Std	0.0034	0.0048	0.0034	0.0023	0.0028	0.0083	0.0089
Fitting 10 folds for each of 10 candidates, totalling 100 fits
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] bagging_fraction is set=0.7, subsample=1.0 will be ignored. Current value: bagging_fraction=0.7
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
[LightGBM] [Warning] feature_fraction is set=0.5, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5
[LightGBM] [Warning] bagging_fraction is set=0.7, subsample=1.0 will be ignored. Current value: bagging_fraction=0.7
[LightGBM] [Warning] bagging_freq is set=6, subsample_freq=0 will be ignored. Current value: bagging_freq=6
79.16797549866499,0.0,68.28592092574735,LGBMClassifier,lightgbm_tuned,?,147.45389642441233,68.28592092574735

LGBMClassifier
LGBMClassifier(bagging_fraction=0.7, bagging_freq=6, boosting_type='gbdt',
               class_weight={0: 0.1, 1: 0.45, 2: 0.45}, colsample_bytree=1.0,
               feature_fraction=0.5, importance_type='split', learning_rate=0.1,
               max_depth=-1, min_child_samples=66, min_child_weight=0.001,
               min_split_gain=0.4, n_estimators=90, n_jobs=-1, num_leaves=90,
               objective=None, random_state=123, reg_alpha=0.0005,
               reg_lambda=0.1, subsample=1.0, subsample_for_bin=200000,
               subsample_freq=0)
79.2268729385896,0.07358351729212656,68.70781099324977,LGBMClassifier,lightgbm,0.1,0.45,0.45,148.0082674491315,68.78139451054189
64.27084969373331,0.07358351729212656,85.39054966248794,LGBMClassifier,lightgbm,0.05,0.55,0.45,149.7349828735134,85.46413317978008
0.05
2.54240615674572,0.0,99.92767598842815,LogisticRegression,lr,1,1,98,SMOTE,all,102.47008214517388,99.92767598842815
2.850636092351186,0.36791758646063283,99.87945998071359,LogisticRegression,lr,1,11,88,SMOTE,all,103.0980136595254,100.24737756717424
1.3782000942359038,4.341427520235467,99.04773384763742,LogisticRegression,lr,1,21,78,SMOTE,all,104.7673614621088,103.38916136787289
0.5163342233390922,16.114790286975715,94.90115718418515,LogisticRegression,lr,1,31,68,SMOTE,all,111.53228169449996,111.01594747116086
0.229700015705984,38.26342899190581,82.00337512054003,LogisticRegression,lr,1,41,58,SMOTE,all,120.49650412815183,120.26680411244584
0.09816239987435213,66.29874908020604,55.7738669238187,LogisticRegression,lr,1,51,48,SMOTE,all,122.17077840389909,122.07261600402472
0.05300769593215014,86.90213392200147,28.218418514946965,LogisticRegression,lr,1,61,38,SMOTE,all,115.17356013288058,115.12055243694843
0.02944871996230564,95.87932303164092,9.522661523625844,LogisticRegression,lr,1,71,28,SMOTE,all,105.43143327522908,105.40198455526676
0.013742735982409299,99.70566593083149,1.1089681774349083,LogisticRegression,lr,1,81,18,SMOTE,all,100.82837684424881,100.81463410826639
0.00785299198994817,100.0,0.0,LogisticRegression,lr,1,91,8,SMOTE,all,100.00785299198995,100.0
27.829040364378827,0.0,98.39681774349084,LogisticRegression,lr,11,1,88,SMOTE,all,126.22585810786966,98.39681774349084
30.141746505418567,0.0,98.14368370298939,LogisticRegression,lr,11,11,78,SMOTE,all,128.28543020840794,98.14368370298939
32.04217056698602,1.839587932303164,97.25168756027,LogisticRegression,lr,11,21,68,SMOTE,all,131.1334460595592,99.09127549257317
29.90026700172766,15.15820456217807,91.44165863066537,LogisticRegression,lr,11,31,58,SMOTE,all,136.5001301945711,106.59986319284344
24.07727344118109,44.73877851361296,71.33558341369334,LogisticRegression,lr,11,41,48,SMOTE,all,140.15163536848738,116.07436192730631
19.070991047589132,75.86460632818248,38.874156219865,LogisticRegression,lr,11,51,38,SMOTE,all,133.80975359563664,114.73876254804748
15.523401916130044,91.75864606328183,13.560752169720347,LogisticRegression,lr,11,61,28,SMOTE,all,120.84280014913222,105.31939823300216
12.880870111512484,97.27740986019133,1.916586306653809,LogisticRegression,lr,11,71,18,SMOTE,all,112.0748662783576,99.19399616684514
10.79982723417622,98.52832965415746,0.0,LogisticRegression,lr,11,81,8,SMOTE,all,109.32815688833368,98.52832965415746
42.109706298099574,0.0,95.55207328833173,LogisticRegression,lr,21,1,78,SMOTE,all,137.66177958643132,95.55207328833173
45.20574839013664,0.0,94.66007714561235,LogisticRegression,lr,21,11,68,SMOTE,all,139.86582553574897,94.66007714561235
48.59824092979425,0.9565857247976454,92.98457087753134,LogisticRegression,lr,21,21,58,SMOTE,all,142.53939753212325,93.94115660232897
48.808308465525364,16.777041942604857,84.35390549662488,LogisticRegression,lr,21,31,48,SMOTE,all,149.9392559047551,101.13094743922974
42.14504476205434,54.15746872700515,54.158630665380905,LogisticRegression,lr,21,41,38,SMOTE,all,150.46114415444038,108.31609939238605
35.393434898696405,81.38337012509199,20.29893924783028,LogisticRegression,lr,21,51,28,SMOTE,all,137.07574427161865,101.68230937292226
30.22812941730799,91.09639440765268,3.0617164898746383,LogisticRegression,lr,21,61,18,SMOTE,all,124.38624031483532,94.15811089752732
26.276111198366575,94.03973509933775,0.012054001928640309,LogisticRegression,lr,21,71,8,SMOTE,all,120.32790029963296,94.0517891012664
53.995209674886134,0.0,91.15236258437801,LogisticRegression,lr,31,1,68,SMOTE,all,145.14757225926417,91.15236258437801
57.85495523794566,0.0,89.15139826422373,LogisticRegression,lr,31,11,58,SMOTE,all,147.00635350216936,89.15139826422373
61.76967174493482,1.1037527593818985,85.80038572806171,LogisticRegression,lr,31,21,48,SMOTE,all,148.67381023237846,86.9041384874436
62.431286320087956,20.897718910963945,70.81726133076181,LogisticRegression,lr,31,31,38,SMOTE,all,154.14626656181372,91.71498024172577
55.506910632951154,61.66298749080206,31.509161041465767,LogisticRegression,lr,31,41,28,SMOTE,all,148.679059165219,93.17214853226783
47.97392806659337,80.20603384841796,5.508678881388621,LogisticRegression,lr,31,51,18,SMOTE,all,133.68864079639994,85.71471272980658
41.92712423433328,86.75496688741721,0.03616200578592093,LogisticRegression,lr,31,61,8,SMOTE,all,128.71825312753643,86.79112889320314
63.97047275011779,0.0,84.93249758919961,LogisticRegression,lr,41,1,58,SMOTE,all,148.90297033931742,84.93249758919961
68.43097220040836,0.0,80.74975891996142,LogisticRegression,lr,41,11,48,SMOTE,all,149.18073112036979,80.74975891996142
73.36068792209832,1.7660044150110374,73.8066538090646,LogisticRegression,lr,41,21,38,SMOTE,all,148.93334614617396,75.57265822407564
73.3194597141511,29.80132450331126,47.830279652844744,LogisticRegression,lr,41,31,28,SMOTE,all,150.9510638703071,77.631604156156
65.40757028427831,63.20824135393672,10.354387656702025,LogisticRegression,lr,41,41,18,SMOTE,all,138.97019929491705,73.56262901063874
57.81961677399089,74.90802060338484,0.09643201542912247,LogisticRegression,lr,41,51,8,SMOTE,all,132.82406939280486,75.00445261881397
73.30571697816868,0.0,75.02410800385728,LogisticRegression,lr,51,1,48,SMOTE,all,148.32982498202597,75.02410800385728
78.27862415580336,0.0,68.7198649951784,LogisticRegression,lr,51,11,38,SMOTE,all,146.99848915098178,68.7198649951784
83.4831945971415,3.016924208977189,56.61764705882353,LogisticRegression,lr,51,21,28,SMOTE,all,143.1177658649422,59.634571267800716
81.25294487199623,37.895511405445184,19.15380906460945,LogisticRegression,lr,51,31,18,SMOTE,all,138.30226534205087,57.04932047005463
73.20951782629182,58.13097866077999,0.20491803278688525,LogisticRegression,lr,51,41,8,SMOTE,all,131.5454145198587,58.335896693566866
81.91848594314433,0.0,62.22275795564127,LogisticRegression,lr,61,1,38,SMOTE,all,144.1412438987856,62.22275795564127
87.00133500863829,0.0,51.374156219865,LogisticRegression,lr,61,11,28,SMOTE,all,138.3754912285033,51.374156219865
91.29103188314748,7.652685798381163,30.171166827386692,LogisticRegression,lr,61,21,18,SMOTE,all,129.11488450891534,37.823852625767856
86.34364692948013,34.363502575423105,0.819672131147541,LogisticRegression,lr,61,31,8,SMOTE,all,121.52682163605077,35.18317470657065
89.49465996544683,0.0,45.26277724204436,LogisticRegression,lr,71,1,28,SMOTE,all,134.7574372074912,45.26277724204436
94.5009423590388,0.0,28.86933461909354,LogisticRegression,lr,71,11,18,SMOTE,all,123.37027697813234,28.86933461909354
95.72600910947071,11.479028697571744,2.844744455159113,LogisticRegression,lr,71,21,8,SMOTE,all,110.04978226220157,14.323773152730856
95.86147322129732,0.0,23.348601735776278,LogisticRegression,lr,81,1,18,SMOTE,all,119.21007495707359,23.348601735776278
99.2578922569499,0.44150110375275936,4.544358727097396,LogisticRegression,lr,81,11,8,SMOTE,all,104.24375208780006,4.985859830850156
99.64268886445737,0.0,2.953230472516876,LogisticRegression,lr,91,1,8,SMOTE,all,102.59591933697423,2.953230472516876
0.0,0.0,100.0,RidgeClassifier,ridge,1,1,98,SMOTE,all,100.0,100.0
0.0,0.0,100.0,RidgeClassifier,ridge,1,11,88,SMOTE,all,100.0,100.0
0.0,3.164091243561442,99.48167791706847,RidgeClassifier,ridge,1,21,78,SMOTE,all,102.64576916062991,102.64576916062991
0.0,14.348785871964681,95.85342333654773,RidgeClassifier,ridge,1,31,68,SMOTE,all,110.20220920851243,110.20220920851243
0.0,37.74834437086093,83.0400192864031,RidgeClassifier,ridge,1,41,58,SMOTE,all,120.78836365726401,120.78836365726401
0.0,66.22516556291392,55.70154291224687,RidgeClassifier,ridge,1,51,48,SMOTE,all,121.92670847516078,121.92670847516078
0.0,87.49080206033848,26.759884281581485,RidgeClassifier,ridge,1,61,38,SMOTE,all,114.25068634191997,114.25068634191997
0.0,96.83590875643856,7.027483124397301,RidgeClassifier,ridge,1,71,28,SMOTE,all,103.86339188083586,103.86339188083586
0.0,99.85283296541574,0.1325940212150434,RidgeClassifier,ridge,1,81,18,SMOTE,all,99.9854269866308,99.9854269866308
0.0,100.0,0.0,RidgeClassifier,ridge,1,91,8,SMOTE,all,100.0,100.0
14.881419820951782,0.0,99.26470588235294,RidgeClassifier,ridge,11,1,88,SMOTE,all,114.14612570330473,99.26470588235294
20.164520182189413,0.0,98.8066538090646,RidgeClassifier,ridge,11,11,78,SMOTE,all,118.971173991254,98.8066538090646
25.037301711952253,0.515084621044886,98.21600771456124,RidgeClassifier,ridge,11,21,68,SMOTE,all,123.76839404755837,98.73109233560612
20.382440709910476,13.09786607799853,94.00916104146577,RidgeClassifier,ridge,11,31,58,SMOTE,all,127.48946782937477,107.10702711946429
9.810350243442752,44.5916114790287,74.63837994214079,RidgeClassifier,ridge,11,41,48,SMOTE,all,129.04034166461224,119.22999142116947
4.311292602481545,80.0588668138337,37.60848601735776,RidgeClassifier,ridge,11,51,38,SMOTE,all,121.97864543367301,117.66735283119148
2.3735668289618346,95.51140544518027,9.691417550626808,RidgeClassifier,ridge,11,61,28,SMOTE,all,107.57638982476891,105.20282299580708
1.5627454059996857,99.77924944812362,0.37367405978784957,RidgeClassifier,ridge,11,71,18,SMOTE,all,101.71566891391114,100.15292350791147
1.2564787183917072,99.92641648270786,0.0,RidgeClassifier,ridge,11,81,8,SMOTE,all,101.18289520109957,99.92641648270786
36.449662321344434,0.0,96.75747348119576,RidgeClassifier,ridge,21,1,78,SMOTE,all,133.20713580254017,96.75747348119576
41.45005497094393,0.0,95.52796528447445,RidgeClassifier,ridge,21,11,68,SMOTE,all,136.97802025541836,95.52796528447445
46.83720747604838,0.07358351729212656,93.99710703953713,RidgeClassifier,ridge,21,21,58,SMOTE,all,140.90789803287765,94.07069055682926
49.546489712580495,9.565857247976453,87.66875602700097,RidgeClassifier,ridge,21,31,48,SMOTE,all,146.78110298755792,97.2346132749774
42.56910632951154,50.18395879323032,56.19575699132112,RidgeClassifier,ridge,21,41,38,SMOTE,all,148.948822114063,106.37971578455145
32.8785142139155,83.81162619573216,16.092092574734814,RidgeClassifier,ridge,21,51,28,SMOTE,all,132.78223298438246,99.90371877046698
25.63805559918329,93.67181751287713,0.9281581485053039,RidgeClassifier,ridge,21,61,18,SMOTE,all,120.23803126056572,94.59997566138243
20.794722789382757,95.73215599705665,0.0,RidgeClassifier,ridge,21,71,8,SMOTE,all,116.52687878643941,95.73215599705665
50.82849065493953,0.0,92.59884281581485,RidgeClassifier,ridge,31,1,68,SMOTE,all,143.42733347075438,92.59884281581485
56.13907648814198,0.0,90.06750241080039,RidgeClassifier,ridge,31,11,58,SMOTE,all,146.20657889894235,90.06750241080039
61.433956337364535,0.07358351729212656,86.54773384763742,RidgeClassifier,ridge,31,21,48,SMOTE,all,148.05527370229407,86.62131736492955
65.14645830061254,14.56953642384106,73.93924783027965,RidgeClassifier,ridge,31,31,38,SMOTE,all,153.65524255473323,88.50878425412071
58.79927752473692,60.41206769683591,27.483124397299903,RidgeClassifier,ridge,31,41,28,SMOTE,all,146.69446961887274,87.89519209413581
49.28341448091723,80.0588668138337,1.796046287367406,RidgeClassifier,ridge,31,51,18,SMOTE,all,131.13832758211834,81.8549131012011
40.959242971572166,86.75496688741721,0.0,RidgeClassifier,ridge,31,61,8,SMOTE,all,127.71420985898938,86.75496688741721
61.86194440081672,0.0,86.3066538090646,RidgeClassifier,ridge,41,1,58,SMOTE,all,148.1685982098813,86.3066538090646
67.50628239359196,0.0,81.56943105110896,RidgeClassifier,ridge,41,11,48,SMOTE,all,149.0757134447009,81.56943105110896
73.67873409769122,0.22075055187637968,74.66248794599807,RidgeClassifier,ridge,41,21,38,SMOTE,all,148.56197259556566,74.88323849787444
76.36249411025601,25.01839587932303,45.94985535197685,RidgeClassifier,ridge,41,31,28,SMOTE,all,147.3307453415559,70.96825123129989
68.1973456887074,62.17807211184695,3.784956605593057,RidgeClassifier,ridge,41,41,18,SMOTE,all,134.1603744061474,65.96302871744001
58.37717920527721,73.87785136129507,0.0,RidgeClassifier,ridge,41,51,8,SMOTE,all,132.25503056657226,73.87785136129507
72.1670331396262,0.0,76.51880424300867,RidgeClassifier,ridge,51,1,48,SMOTE,all,148.68583738263487,76.51880424300867
78.37874980367519,0.0,68.69575699132112,RidgeClassifier,ridge,51,11,38,SMOTE,all,147.07450679499632,68.69575699132112
84.8947699073347,1.398086828550405,55.36403085824494,RidgeClassifier,ridge,51,21,28,SMOTE,all,141.65688759413004,56.76211768679534
83.96615360452331,36.055923473142016,9.727579556412728,RidgeClassifier,ridge,51,31,18,SMOTE,all,129.74965663407806,45.783503029554744
74.23825977697503,56.43855776306107,0.0,RidgeClassifier,ridge,51,41,8,SMOTE,all,130.6768175400361,56.43855776306107
82.16978168682269,0.0,62.12632594021215,RidgeClassifier,ridge,61,1,38,SMOTE,all,144.29610762703481,62.12632594021215
88.56015391864301,0.0,47.830279652844744,RidgeClassifier,ridge,61,11,28,SMOTE,all,136.39043357148773,47.830279652844744
94.13970472750118,5.59234731420162,22.39633558341369,RidgeClassifier,ridge,61,21,18,SMOTE,all,122.12838762511649,27.988682897615313
88.45021203078373,31.27299484915379,0.0,RidgeClassifier,ridge,61,31,8,SMOTE,all,119.72320687993752,31.27299484915379
91.62478404272028,0.0,39.259884281581485,RidgeClassifier,ridge,71,1,28,SMOTE,all,130.88466832430177,39.259884281581485
97.06690749175436,0.0,18.105110896817745,RidgeClassifier,ridge,71,11,18,SMOTE,all,115.1720183885721,18.105110896817745
98.14865713836973,6.548933038999265,0.0,RidgeClassifier,ridge,71,21,8,SMOTE,all,104.697590177369,6.548933038999265
98.64339563373645,0.0,9.631147540983607,RidgeClassifier,ridge,81,1,18,SMOTE,all,108.27454317472005,9.631147540983607
100.0,0.0,0.0,RidgeClassifier,ridge,81,11,8,SMOTE,all,100.0,0.0
100.0,0.0,0.0,RidgeClassifier,ridge,91,1,8,SMOTE,all,100.0,0.0