In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.discriminant_analysis import StandardScaler
from imblearn.combine import SMOTEENN
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
%pip install lightgbm


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [11]:
random_state=123
best_models = {}

In [12]:
# Read in the data
df = pd.read_csv('../Data/hashed_combined.csv')
df

Unnamed: 0,smoke,drink,age,pesticide,gender,skin_cancer_history,cancer_history,has_piped_water,has_sewage_system,fitspatrick,...,region_hash_0,region_hash_1,region_hash_2,region_hash_3,region_hash_4,region_hash_5,region_hash_6,region_hash_7,region_hash_8,region_hash_9
0,False,False,55,False,0,True,True,True,True,3.0,...,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,False,True,79,False,1,True,False,False,False,1.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,-1.0,2.0
2,False,True,52,False,0,False,True,True,True,3.0,...,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0
3,False,False,74,True,0,False,False,False,False,1.0,...,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,1.0,-1.0,0.0
4,False,True,58,True,0,True,True,True,True,1.0,...,1.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,-1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1700,False,False,23,True,0,False,True,True,True,0.0,...,-2.0,0.0,1.0,0.0,-1.0,0.0,0.0,-3.0,0.0,0.0
1701,False,False,27,False,0,False,False,True,True,0.0,...,-2.0,0.0,1.0,0.0,-1.0,0.0,0.0,-3.0,0.0,0.0
1702,True,True,23,False,1,False,False,True,True,0.0,...,-2.0,0.0,1.0,0.0,-1.0,0.0,0.0,-3.0,0.0,0.0
1703,False,False,23,True,0,False,False,True,False,0.0,...,-2.0,0.0,1.0,0.0,-1.0,0.0,0.0,-3.0,0.0,0.0


# New Section

In [13]:
from imblearn.under_sampling import ClusterCentroids, TomekLinks
from imblearn.over_sampling import RandomOverSampler

def splitting_data(df, sampling):
    X = df.drop(['diagnostic'], axis=1)
    y = df['diagnostic']

    if sampling == 'none':
        return X, y
    elif sampling == 'SMOTEENN':
        sampler = SMOTEENN(random_state=random_state)
    elif sampling == 'SMOTE':
        sampler = SMOTE(random_state=random_state)
    elif sampling == 'under':
        sampler = RandomUnderSampler(random_state=random_state)
    elif sampling == 'over':
        sampler = RandomOverSampler(random_state=random_state)
    elif sampling == 'cluster_centroids':
        sampler = ClusterCentroids(random_state=random_state)
    elif sampling == 'tomek_links':
        sampler = TomekLinks()

    X_resampled, y_resampled = sampler.fit_resample(X, y)
    return X_resampled, y_resampled


In [14]:

def training(X_train, y_train):
    # Create a KNN classifier with 5 neighbors
    LGBM = lgb.LGBMClassifier()
    # Fit the classifier to the data
    LGBM.fit(X_train, y_train)
    return LGBM

In [15]:
def best_model(modelName, accuracy, precision, recall, f1):
    best_models[modelName] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [16]:
def predict(modleName,LGBM, X_test ,y_test):
    # Predict the labels for the training data X
    y_pred = LGBM.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    cr=classification_report(y_test, y_pred, output_dict=True)
    precision = cr['weighted avg']['precision']
    recall = cr['weighted avg']['recall']
    f1 = cr['weighted avg']['f1-score']
    best_model(modleName,accuracy,precision,recall,f1)
    cr=classification_report(y_test, y_pred)
    print(cr)

In [17]:
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

def optimize_with_grid(X_train, y_train):

    # Initialize the LGBMClassifier
    LGBM = lgb.LGBMClassifier()

    # Define the parameter grid
    param_grid = {
        'num_leaves': [31, 50, 70],
        'learning_rate': [0.01, 0.1, 0.5],
        'n_estimators': [50, 100, 200]
    }

    # Initialize GridSearchCV
    LGBM_cv = GridSearchCV(LGBM, param_grid, cv=5)

    # Fit the grid search to the data
    LGBM_cv.fit(X_train, y_train)

    # Best parameters and best score
    best_params = LGBM_cv.best_params_
    best_score = LGBM_cv.best_score_
    best_estimator = LGBM_cv.best_estimator_
    print(best_params)
    print(best_score)

    return best_estimator

<h1> LGBM on original data with optimization </h1>

In [18]:
# using function with no sampling
X, y= splitting_data(df, 'none')
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
# Scale the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [19]:
#check number of observations in each class in the set
print("Number of observations in each class in the training set:")
print(y.value_counts())

Number of observations in each class in the training set:
1    1494
0     211
Name: diagnostic, dtype: int64


In [20]:
LGBM1 = training(X_train, y_train)
y_pred = predict('original',LGBM1, X_test, y_test)


: 

In [None]:
best_LGBM1 = optimize_with_grid(X_train, y_train)
prediction = predict('original_grid',best_LGBM1, X_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[LightGBM] [Info] Number of positive: 623, number of negative: 657
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000339 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1241
[LightGBM] [Info] Number of data points in the train set: 1280, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486719 -> initscore=-0.053137
[LightGBM] [Info] Start training from score -0.053137
[LightGBM] [Info] Number of positive: 623, number of negative: 657
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1241
[LightGBM] [Info] Number of data points in the train set: 1280, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.486719 -> initscore=-0.053137
[LightGBM] 

<h1> LGBM using SMOTE sampling </h1>

In [None]:
X,y = splitting_data(df, 'SMOTE')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
#check number of observations in each class in the set
print("Number of observations in each class in the training set:")
print(y.value_counts())

Number of observations in each class in the training set:
1    1013
0    1013
Name: Class, dtype: int64


In [None]:
LGBM2 =training(X_train, y_train)
y_pred = predict('SMOTE',LGBM2, X_test, y_test)

[LightGBM] [Info] Number of positive: 818, number of negative: 802
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1249
[LightGBM] [Info] Number of data points in the train set: 1620, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.504938 -> initscore=0.019754
[LightGBM] [Info] Start training from score 0.019754
              precision    recall  f1-score   support

           0       0.88      0.90      0.89       211
           1       0.89      0.86      0.88       195

    accuracy                           0.88       406
   macro avg       0.88      0.88      0.88       406
weighted avg       0.88      0.88      0.88       406



In [None]:
best_LGBM2 = optimize_with_grid(X_train, y_train)
prediction = predict('SMOTE_grid',best_LGBM2, X_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1246
[LightGBM] [Info] Number of data points in the train set: 1296, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.504630 -> initscore=0.018519
[LightGBM] [Info] Start training from score 0.018519
[LightGBM] [Info] Number of positive: 654, number of negative: 642
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1247
[LightGBM] [Info] Number of data points in the train set: 1296, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.504630 -> initscore=0.018519
[LightGBM] [Info] Start training from score 0.018519
[LightGBM] [Info] Number of positive: 655, number of negative: 641
[

<h1> LGBM using SMOTEENN sampling </h1>

In [None]:
X,y = splitting_data(df, 'SMOTEENN')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
print("Number of observations in each class in the set:")
print(y.value_counts())

Number of observations in each class in the set:
1    156
0    137
Name: Class, dtype: int64


In [None]:
LGBM3 =training(X_train, y_train)
y_pred = predict('SMOTEENN',LGBM3, X_test, y_test)

[LightGBM] [Info] Number of positive: 121, number of negative: 113
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 468
[LightGBM] [Info] Number of data points in the train set: 234, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.517094 -> initscore=0.068403
[LightGBM] [Info] Start training from score 0.068403
              precision    recall  f1-score   support

           0       0.89      0.67      0.76        24
           1       0.80      0.94      0.87        35

    accuracy                           0.83        59
   macro avg       0.85      0.80      0.82        59
weighted avg       0.84      0.83      0.83        59



In [None]:
from joblib import dump
dump(LGBM3,'/content/LGBM_SMOTEENN.joblib')

['/content/LGBM_SMOTEENN.joblib']

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
best_LGBM3 = optimize_with_grid(X_train, y_train)
prediction = predict('SMOTEENN_grid',best_LGBM3, X_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[LightGBM] [Info] Number of positive: 97, number of negative: 91
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 389
[LightGBM] [Info] Number of data points in the train set: 188, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.515957 -> initscore=0.063851
[LightGBM] [Info] Start training from score 0.063851
[LightGBM] [Info] Number of positive: 97, number of negative: 90
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000042 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 392
[LightGBM] [Info] Number of data points in the train set: 187, number of used features: 12
[LightGBM] [Info] [binary:BoostFromSc

<h1> DT on Random undersampling </h1>

In [None]:
X,y = splitting_data(df, 'under')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
print("Number of observations in each class in the set:")
print(y.value_counts())

Number of observations in each class in the set:
0    987
1    987
Name: Class, dtype: int64


In [None]:
LGBM4 =training(X_train, y_train)
y_pred = predict('undersampling',LGBM4, X_test, y_test)

[LightGBM] [Info] Number of positive: 792, number of negative: 787
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000265 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1240
[LightGBM] [Info] Number of data points in the train set: 1579, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501583 -> initscore=0.006333
[LightGBM] [Info] Start training from score 0.006333
              precision    recall  f1-score   support

           0       0.87      0.91      0.89       200
           1       0.90      0.86      0.88       195

    accuracy                           0.88       395
   macro avg       0.88      0.88      0.88       395
weighted avg       0.88      0.88      0.88       395



In [None]:
best_LGBM4 = optimize_with_grid(X_train, y_train)
prediction = predict('undersampling_grid',best_LGBM4, X_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[LightGBM] [Info] Number of positive: 634, number of negative: 629
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000106 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1242
[LightGBM] [Info] Number of data points in the train set: 1263, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501979 -> initscore=0.007918
[LightGBM] [Info] Start training from score 0.007918
[LightGBM] [Info] Number of positive: 633, number of negative: 630
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1238
[LightGBM] [Info] Number of data points in the train set: 1263, number of used features: 12
[LightGBM] [Info] [binary:Boo

<h1> DT on Random Oversampling </h1>

In [None]:
X,y = splitting_data(df, 'over')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
print("Number of observations in each class in the set:")
print(y.value_counts())

Number of observations in each class in the set:
1    1013
0    1013
Name: Class, dtype: int64


In [None]:
LGBM5 =training(X_train, y_train)
y_pred = predict('oversampling',LGBM5, X_test, y_test)

[LightGBM] [Info] Number of positive: 818, number of negative: 802
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000421 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1246
[LightGBM] [Info] Number of data points in the train set: 1620, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.504938 -> initscore=0.019754
[LightGBM] [Info] Start training from score 0.019754
              precision    recall  f1-score   support

           0       0.89      0.90      0.89       211
           1       0.89      0.88      0.88       195

    accuracy                           0.89       406
   macro avg       0.89      0.89      0.89       406
weighted avg       0.89      0.89      0.89       406



In [None]:
best_LGBM5 = optimize_with_grid(X_train, y_train)
prediction = predict('oversampling_grid',best_LGBM5, X_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[LightGBM] [Info] Number of positive: 655, number of negative: 641
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000325 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1242
[LightGBM] [Info] Number of data points in the train set: 1296, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.505401 -> initscore=0.021606
[LightGBM] [Info] Start training from score 0.021606
[LightGBM] [Info] Number of positive: 654, number of negative: 642
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1242
[LightGBM] [Info] Number of data points in the train set: 1296, number of used features: 12
[LightGBM] [Info] [binary:Boo

<h1> DT on Cluster Centroids </h1>

In [None]:
X,y = splitting_data(df, 'cluster_centroids')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [None]:
print("Number of observations in each class in the set:")
print(y.value_counts())

Number of observations in each class in the set:
0    987
1    987
Name: Class, dtype: int64


In [None]:
LGBM6 =training(X_train, y_train)
y_pred = predict('cluster_centroids',LGBM6, X_test, y_test)

[LightGBM] [Info] Number of positive: 792, number of negative: 787
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1246
[LightGBM] [Info] Number of data points in the train set: 1579, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501583 -> initscore=0.006333
[LightGBM] [Info] Start training from score 0.006333
              precision    recall  f1-score   support

           0       0.86      0.91      0.89       200
           1       0.90      0.85      0.88       195

    accuracy                           0.88       395
   macro avg       0.88      0.88      0.88       395
weighted avg       0.88      0.88      0.88       395



In [None]:
best_LGBM6 = optimize_with_grid(X_train, y_train)
prediction = predict('cluster_centroids_grid',best_LGBM6, X_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[LightGBM] [Info] Number of positive: 634, number of negative: 629
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000311 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1244
[LightGBM] [Info] Number of data points in the train set: 1263, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.501979 -> initscore=0.007918
[LightGBM] [Info] Start training from score 0.007918
[LightGBM] [Info] Number of positive: 633, number of negative: 630
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000070 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1244
[LightGBM] [Info] Number of data points in the train set: 1263, number of used features: 12
[LightGBM] [Info] [binary:Boo

<h1> DT on Tomek Links </h1>

In [None]:
X,y = splitting_data(df, 'tomek_links')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
print("Number of observations in each class in the set:")
print(y.value_counts())

Number of observations in each class in the set:
1    987
0    694
Name: Class, dtype: int64


In [None]:
LGBM7 =training(X_train, y_train)
y_pred = predict('tomek_links',LGBM7, X_test, y_test)

[LightGBM] [Info] Number of positive: 789, number of negative: 555
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000220 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1240
[LightGBM] [Info] Number of data points in the train set: 1344, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.587054 -> initscore=0.351798
[LightGBM] [Info] Start training from score 0.351798
              precision    recall  f1-score   support

           0       0.88      0.86      0.87       139
           1       0.91      0.91      0.91       198

    accuracy                           0.89       337
   macro avg       0.89      0.89      0.89       337
weighted avg       0.89      0.89      0.89       337



In [None]:
best_LGBM7 = optimize_with_grid(X_train, y_train)
prediction = predict('tomek_links_grid',best_LGBM7, X_test, y_test)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[LightGBM] [Info] Number of positive: 632, number of negative: 444
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1237
[LightGBM] [Info] Number of data points in the train set: 1076, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.587361 -> initscore=0.353065
[LightGBM] [Info] Start training from score 0.353065
[LightGBM] [Info] Number of positive: 631, number of negative: 444
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000068 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1238
[LightGBM] [Info] Number of data points in the train set: 1075, number of used features: 12
[LightGBM] [Info] [binary:Boo

In [None]:
best_model_df = pd.DataFrame.from_dict(best_models, orient='index')
best_model_df.sort_values(by='accuracy', ascending=False, inplace=True)
best_model_df

Unnamed: 0,accuracy,precision,recall,f1
original,0.9125,0.912534,0.9125,0.912465
original_grid,0.9025,0.902643,0.9025,0.90243
tomek_links_grid,0.89911,0.898907,0.89911,0.898878
SMOTEENN_grid,0.898305,0.898888,0.898305,0.897507
tomek_links,0.893175,0.893002,0.893175,0.893056
SMOTE_grid,0.891626,0.891622,0.891626,0.891602
oversampling,0.889163,0.88915,0.889163,0.889151
oversampling_grid,0.889163,0.88915,0.889163,0.889151
SMOTE,0.881773,0.88197,0.881773,0.881678
undersampling,0.881013,0.881738,0.881013,0.880916
