### Measures
 - Accuracy
 - Precision
 - Recall
 - F1 Score
 - AUC-ROC

In [3]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.metrics import classification_report,accuracy_score, precision_score, recall_score, f1_score, confusion_matrix 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
import lightgbm as lgb

from imblearn.pipeline import make_pipeline
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('../data/diabetes_data_encoded.csv').drop(columns=["Unnamed: 0"], axis=1)

In [None]:
print(df.shape)

In [None]:
readmit = df['readmitted'].value_counts()

# Print the counts
print("Occurrences of Readmitted:")
print("Value 0:", readmit[0])
print("Value 1:", readmit[1])
print('%:',readmit[1]/readmit[0])

In [6]:
X = df.drop('readmitted', axis=1)
y = df['readmitted'].values
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

under_sampler = RandomUnderSampler(random_state=42)
X_train_under, y_train_under = under_sampler.fit_resample(X_train, y_train)

In [8]:
pipeline = make_pipeline(SMOTE(random_state=42), RandomUnderSampler(random_state=42))
X_train_resampled, y_train_resampled = pipeline.fit_resample(X_train, y_train)

In [9]:
rfc= RandomForestClassifier(random_state=42)
rfc.fit(X_train_smote, y_train_smote)
rfc_predictions = rfc.predict(X_val) 
print(classification_report(y_val, rfc_predictions))
print(confusion_matrix(y_val, rfc_predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     26848
           1       0.88      0.78      0.83       438

    accuracy                           0.99     27286
   macro avg       0.94      0.89      0.91     27286
weighted avg       0.99      0.99      0.99     27286

[[26803    45]
 [   97   341]]


In [10]:
report = classification_report(y_val, rfc_predictions, output_dict=True)
conf_matrix = confusion_matrix(y_val, rfc_predictions)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Smote-RFC',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

metrics_df = pd.DataFrame(data)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   

           Confusion Matrix  
0  [[26803, 45], [97, 341]]  


In [11]:
rfc= RandomForestClassifier(random_state=42)
rfc.fit(X_train_under, y_train_under)
rfc_predictions = rfc.predict(X_val) 
print(classification_report(y_val, rfc_predictions))
print(confusion_matrix(y_val, rfc_predictions))

              precision    recall  f1-score   support

           0       1.00      0.96      0.98     26848
           1       0.25      0.88      0.39       438

    accuracy                           0.96     27286
   macro avg       0.63      0.92      0.69     27286
weighted avg       0.99      0.96      0.97     27286

[[25714  1134]
 [   52   386]]


In [12]:
report = classification_report(y_val, rfc_predictions, output_dict=True)
conf_matrix = confusion_matrix(y_val, rfc_predictions)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Undersampled-RFC',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   
1   Undersampled-RFC   0.986038  0.956534  0.968097  0.956534   

             Confusion Matrix  
0    [[26803, 45], [97, 341]]  
1  [[25714, 1134], [52, 386]]  


In [13]:
rfc= RandomForestClassifier(random_state=42)
rfc.fit(X_train_resampled, y_train_resampled)
rfc_predictions = rfc.predict(X_val) 
print(classification_report(y_val, rfc_predictions))
print(confusion_matrix(y_val, rfc_predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     26848
           1       0.89      0.78      0.83       438

    accuracy                           0.99     27286
   macro avg       0.94      0.89      0.91     27286
weighted avg       0.99      0.99      0.99     27286

[[26807    41]
 [   98   340]]


In [14]:
report = classification_report(y_val, rfc_predictions, output_dict=True)
conf_matrix = confusion_matrix(y_val, rfc_predictions)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Resampled-RFC',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   
1   Undersampled-RFC   0.986038  0.956534  0.968097  0.956534   
2      Resampled-RFC   0.994689  0.994906  0.994731  0.994906   

             Confusion Matrix  
0    [[26803, 45], [97, 341]]  
1  [[25714, 1134], [52, 386]]  
2    [[26807, 41], [98, 340]]  


In [15]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
X_train_under, y_train_under = under_sampler.fit_resample(X_train, y_train)
X_train_resampled, y_train_resampled = pipeline.fit_resample(X_train, y_train)

In [16]:
gbt = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100,max_depth=3, min_samples_split=2, min_samples_leaf=1, subsample=1,max_features='sqrt', random_state=10)
gbt.fit(X_train_smote,y_train_smote)
gbt_predictions = gbt.predict(X_val) 
print(classification_report(y_val, gbt_predictions))
print(confusion_matrix(y_val, gbt_predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     26848
           1       0.86      0.77      0.81       438

    accuracy                           0.99     27286
   macro avg       0.93      0.88      0.90     27286
weighted avg       0.99      0.99      0.99     27286

[[26792    56]
 [  100   338]]


In [17]:
report = classification_report(y_val, gbt_predictions, output_dict=True)
conf_matrix = confusion_matrix(y_val, gbt_predictions)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Smote-GBT',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   
1   Undersampled-RFC   0.986038  0.956534  0.968097  0.956534   
2      Resampled-RFC   0.994689  0.994906  0.994731  0.994906   
3          Smote-GBT   0.994060  0.994283  0.994134  0.994283   

             Confusion Matrix  
0    [[26803, 45], [97, 341]]  
1  [[25714, 1134], [52, 386]]  
2    [[26807, 41], [98, 340]]  
3   [[26792, 56], [100, 338]]  


In [18]:
gbt = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100,max_depth=3, min_samples_split=2, min_samples_leaf=1, subsample=1,max_features='sqrt', random_state=10)
gbt.fit(X_train_under,y_train_under)
gbt_predictions = gbt.predict(X_val) 
print(classification_report(y_val, gbt_predictions))
print(confusion_matrix(y_val, gbt_predictions))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98     26848
           1       0.29      0.84      0.43       438

    accuracy                           0.96     27286
   macro avg       0.64      0.91      0.70     27286
weighted avg       0.99      0.96      0.97     27286

[[25923   925]
 [   68   370]]


In [19]:
report = classification_report(y_val, gbt_predictions, output_dict=True)
conf_matrix = confusion_matrix(y_val, gbt_predictions)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Undersampled-GBT',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   
1   Undersampled-RFC   0.986038  0.956534  0.968097  0.956534   
2      Resampled-RFC   0.994689  0.994906  0.994731  0.994906   
3          Smote-GBT   0.994060  0.994283  0.994134  0.994283   
4   Undersampled-GBT   0.985960  0.963608  0.972311  0.963608   

             Confusion Matrix  
0    [[26803, 45], [97, 341]]  
1  [[25714, 1134], [52, 386]]  
2    [[26807, 41], [98, 340]]  
3   [[26792, 56], [100, 338]]  
4   [[25923, 925], [68, 370]]  


In [20]:
gbt = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100,max_depth=3, min_samples_split=2, min_samples_leaf=1, subsample=1,max_features='sqrt', random_state=10)
gbt.fit(X_train_resampled,y_train_resampled)
gbt_predictions = gbt.predict(X_val) 

In [21]:
report = classification_report(y_val, gbt_predictions, output_dict=True)
conf_matrix = confusion_matrix(y_val, gbt_predictions)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Resampled-GBT',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   
1   Undersampled-RFC   0.986038  0.956534  0.968097  0.956534   
2      Resampled-RFC   0.994689  0.994906  0.994731  0.994906   
3          Smote-GBT   0.994060  0.994283  0.994134  0.994283   
4   Undersampled-GBT   0.985960  0.963608  0.972311  0.963608   
5      Resampled-GBT   0.994060  0.994283  0.994134  0.994283   

             Confusion Matrix  
0    [[26803, 45], [97, 341]]  
1  [[25714, 1134], [52, 386]]  
2    [[26807, 41], [98, 340]]  
3   [[26792, 56], [100, 338]]  
4   [[25923, 925], [68, 370]]  
5   [[26792, 56], [100, 338]]  


In [22]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
X_train_under, y_train_under = under_sampler.fit_resample(X_train, y_train)
X_train_resampled, y_train_resampled = pipeline.fit_resample(X_train, y_train)

In [23]:
neg_to_pos_ratio = (y_train == 0).sum() / (y_train == 1).sum()
params = {
    'objective': 'binary',
    'metric': 'mae',
    'min_child_weight': 60,  # Adjust this value and experiment
    'random_state': 42,
    'max_delta_step': 1,
    'verbose': -1,
    'max_depth': 10, 
}

train_data = lgb.Dataset(X_train_under, label=y_train_under)
val_data = lgb.Dataset(X_val, label=y_val)

num_round = 200
bst = lgb.train(params, train_data, num_round, valid_sets=[val_data], early_stopping_rounds=10)

y_pred_proba = bst.predict(X_val)

y_pred_class = [1 if pred > 0.5 else 0 for pred in y_pred_proba]

[1]	valid_0's l1: 0.485896
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l1: 0.47206
[3]	valid_0's l1: 0.457282
[4]	valid_0's l1: 0.443534
[5]	valid_0's l1: 0.4304
[6]	valid_0's l1: 0.416701
[7]	valid_0's l1: 0.403878
[8]	valid_0's l1: 0.392265
[9]	valid_0's l1: 0.38156
[10]	valid_0's l1: 0.372057
[11]	valid_0's l1: 0.363564
[12]	valid_0's l1: 0.350498
[13]	valid_0's l1: 0.338647
[14]	valid_0's l1: 0.327871
[15]	valid_0's l1: 0.318233
[16]	valid_0's l1: 0.30933
[17]	valid_0's l1: 0.301518
[18]	valid_0's l1: 0.294255
[19]	valid_0's l1: 0.287751
[20]	valid_0's l1: 0.281889
[21]	valid_0's l1: 0.277894
[22]	valid_0's l1: 0.27524
[23]	valid_0's l1: 0.273814
[24]	valid_0's l1: 0.272069
[25]	valid_0's l1: 0.270612
[26]	valid_0's l1: 0.26962
[27]	valid_0's l1: 0.268824
[28]	valid_0's l1: 0.267392
[29]	valid_0's l1: 0.266823
[30]	valid_0's l1: 0.265892
[31]	valid_0's l1: 0.265199
[32]	valid_0's l1: 0.264704
[33]	valid_0's l1: 0.263843
[34]	valid_0's l1: 0.263369
[35

In [24]:
print(classification_report(y_val, y_pred_class, zero_division=0))
print(confusion_matrix(y_val, y_pred_class))

              precision    recall  f1-score   support

           0       1.00      0.90      0.94     26848
           1       0.11      0.80      0.20       438

    accuracy                           0.90     27286
   macro avg       0.56      0.85      0.57     27286
weighted avg       0.98      0.90      0.93     27286

[[24127  2721]
 [   89   349]]


In [25]:
report = classification_report(y_val, gbt_predictions, output_dict=True)
conf_matrix = confusion_matrix(y_val, gbt_predictions)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Undersampled-LGBM',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   
1   Undersampled-RFC   0.986038  0.956534  0.968097  0.956534   
2      Resampled-RFC   0.994689  0.994906  0.994731  0.994906   
3          Smote-GBT   0.994060  0.994283  0.994134  0.994283   
4   Undersampled-GBT   0.985960  0.963608  0.972311  0.963608   
5      Resampled-GBT   0.994060  0.994283  0.994134  0.994283   
6  Undersampled-LGBM   0.994060  0.994283  0.994134  0.994283   

             Confusion Matrix  
0    [[26803, 45], [97, 341]]  
1  [[25714, 1134], [52, 386]]  
2    [[26807, 41], [98, 340]]  
3   [[26792, 56], [100, 338]]  
4   [[25923, 925], [68, 370]]  
5   [[26792, 56], [100, 338]]  
6   [[26792, 56], [100, 338]]  


In [26]:
train_data = lgb.Dataset(X_train_smote, label=y_train_smote)
val_data = lgb.Dataset(X_val, label=y_val)

num_round = 200
bst = lgb.train(params, train_data, num_round, valid_sets=[val_data], early_stopping_rounds=10)

y_pred_proba = bst.predict(X_val)

y_pred_class = [1 if pred > 0.5 else 0 for pred in y_pred_proba]

[1]	valid_0's l1: 0.478615
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l1: 0.457309
[3]	valid_0's l1: 0.433489
[4]	valid_0's l1: 0.410055
[5]	valid_0's l1: 0.387
[6]	valid_0's l1: 0.364577
[7]	valid_0's l1: 0.342623
[8]	valid_0's l1: 0.321572
[9]	valid_0's l1: 0.301491
[10]	valid_0's l1: 0.282349
[11]	valid_0's l1: 0.264122
[12]	valid_0's l1: 0.246689
[13]	valid_0's l1: 0.230034
[14]	valid_0's l1: 0.2144
[15]	valid_0's l1: 0.199952
[16]	valid_0's l1: 0.186554
[17]	valid_0's l1: 0.173907
[18]	valid_0's l1: 0.162735
[19]	valid_0's l1: 0.151648
[20]	valid_0's l1: 0.142241
[21]	valid_0's l1: 0.132922
[22]	valid_0's l1: 0.12442
[23]	valid_0's l1: 0.116677
[24]	valid_0's l1: 0.109651
[25]	valid_0's l1: 0.103361
[26]	valid_0's l1: 0.0975684
[27]	valid_0's l1: 0.092299
[28]	valid_0's l1: 0.0875707
[29]	valid_0's l1: 0.0831658
[30]	valid_0's l1: 0.0791999
[31]	valid_0's l1: 0.0755039
[32]	valid_0's l1: 0.0722987
[33]	valid_0's l1: 0.069262
[34]	valid_0's l1: 0.066

In [27]:
print(classification_report(y_val, y_pred_class, zero_division=0))
print(confusion_matrix(y_val, y_pred_class))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     26848
           1       0.86      0.78      0.82       438

    accuracy                           0.99     27286
   macro avg       0.93      0.89      0.91     27286
weighted avg       0.99      0.99      0.99     27286

[[26794    54]
 [   95   343]]


In [28]:
report = classification_report(y_val, y_pred_class, output_dict=True)
conf_matrix = confusion_matrix(y_val, y_pred_class)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Smote-LGBM',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
print(metrics_df)

  Sampling Technique  Precision    Recall  F1 Score  Accuracy  \
0          Smote-RFC   0.994581  0.994796  0.994634  0.994796   
1   Undersampled-RFC   0.986038  0.956534  0.968097  0.956534   
2      Resampled-RFC   0.994689  0.994906  0.994731  0.994906   
3          Smote-GBT   0.994060  0.994283  0.994134  0.994283   
4   Undersampled-GBT   0.985960  0.963608  0.972311  0.963608   
5      Resampled-GBT   0.994060  0.994283  0.994134  0.994283   
6  Undersampled-LGBM   0.994060  0.994283  0.994134  0.994283   
7         Smote-LGBM   0.994340  0.994539  0.994407  0.994539   

             Confusion Matrix  
0    [[26803, 45], [97, 341]]  
1  [[25714, 1134], [52, 386]]  
2    [[26807, 41], [98, 340]]  
3   [[26792, 56], [100, 338]]  
4   [[25923, 925], [68, 370]]  
5   [[26792, 56], [100, 338]]  
6   [[26792, 56], [100, 338]]  
7    [[26794, 54], [95, 343]]  


In [29]:
neg_to_pos_ratio = (y_train == 0).sum() / (y_train == 1).sum()
params = {
    'objective': 'binary',
    'metric': 'mae',
    'min_child_weight': 60,  # Adjust this value and experiment
    'random_state': 42,
    'max_delta_step': 1,
    'verbose': -1,
    'max_depth': 10, 
}

train_data = lgb.Dataset(X_train_resampled, label=y_train_resampled)
val_data = lgb.Dataset(X_val, label=y_val)

num_round = 200
bst = lgb.train(params, train_data, num_round, valid_sets=[val_data], early_stopping_rounds=10)

y_pred_proba = bst.predict(X_val)

y_pred_class = [1 if pred > 0.5 else 0 for pred in y_pred_proba]

[1]	valid_0's l1: 0.478612
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l1: 0.457303
[3]	valid_0's l1: 0.433497
[4]	valid_0's l1: 0.410065
[5]	valid_0's l1: 0.387027
[6]	valid_0's l1: 0.364543
[7]	valid_0's l1: 0.342534
[8]	valid_0's l1: 0.321476
[9]	valid_0's l1: 0.301449
[10]	valid_0's l1: 0.282329
[11]	valid_0's l1: 0.264079
[12]	valid_0's l1: 0.24663
[13]	valid_0's l1: 0.229953
[14]	valid_0's l1: 0.214349
[15]	valid_0's l1: 0.199895
[16]	valid_0's l1: 0.186578
[17]	valid_0's l1: 0.174352
[18]	valid_0's l1: 0.163144
[19]	valid_0's l1: 0.152023
[20]	valid_0's l1: 0.142239
[21]	valid_0's l1: 0.132917
[22]	valid_0's l1: 0.124422
[23]	valid_0's l1: 0.116676
[24]	valid_0's l1: 0.109708
[25]	valid_0's l1: 0.103389
[26]	valid_0's l1: 0.097611
[27]	valid_0's l1: 0.0923813
[28]	valid_0's l1: 0.0875953
[29]	valid_0's l1: 0.083303
[30]	valid_0's l1: 0.0792484
[31]	valid_0's l1: 0.0755963
[32]	valid_0's l1: 0.0723947
[33]	valid_0's l1: 0.0694063
[34]	valid_0's l1: 

In [30]:
print(classification_report(y_val, y_pred_class, zero_division=0))
print(confusion_matrix(y_val, y_pred_class))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     26848
           1       0.86      0.79      0.82       438

    accuracy                           0.99     27286
   macro avg       0.93      0.89      0.91     27286
weighted avg       0.99      0.99      0.99     27286

[[26790    58]
 [   94   344]]


In [33]:
report = classification_report(y_val, y_pred_class, output_dict=True)
conf_matrix = confusion_matrix(y_val, y_pred_class)

# Extract metrics from classification report
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']
accuracy = report['accuracy']

# Create a DataFrame to store the metrics
data = {
    'Sampling Technique' : 'Resampled-LGBM',
    'Precision': precision,
    'Recall': recall,
    'F1 Score': f1_score,
    'Accuracy': accuracy,
    'Confusion Matrix': [conf_matrix]
}

df = pd.DataFrame(data)
metrics_df = metrics_df.append(df, ignore_index=True)
metrics_df.to_csv('../data/samplingResults.csv', header=1, index=0)

In [34]:
metrics_df

Unnamed: 0,Sampling Technique,Precision,Recall,F1 Score,Accuracy,Confusion Matrix
0,Smote-RFC,0.994581,0.994796,0.994634,0.994796,"[[26803, 45], [97, 341]]"
1,Undersampled-RFC,0.986038,0.956534,0.968097,0.956534,"[[25714, 1134], [52, 386]]"
2,Resampled-RFC,0.994689,0.994906,0.994731,0.994906,"[[26807, 41], [98, 340]]"
3,Smote-GBT,0.99406,0.994283,0.994134,0.994283,"[[26792, 56], [100, 338]]"
4,Undersampled-GBT,0.98596,0.963608,0.972311,0.963608,"[[25923, 925], [68, 370]]"
5,Resampled-GBT,0.99406,0.994283,0.994134,0.994283,"[[26792, 56], [100, 338]]"
6,Undersampled-LGBM,0.99406,0.994283,0.994134,0.994283,"[[26792, 56], [100, 338]]"
7,Smote-LGBM,0.99434,0.994539,0.994407,0.994539,"[[26794, 54], [95, 343]]"
8,Resampled-LGBM,0.994244,0.994429,0.994312,0.994429,"[[26790, 58], [94, 344]]"
9,Resampled-LGBM,0.994244,0.994429,0.994312,0.994429,"[[26790, 58], [94, 344]]"
