### Import necessary libraries

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, precision_recall_curve, auc
import category_encoders as ce

### Load the dataset


In [23]:
file_path = './Dataset/preprocessed_dataset.csv'

### Read the data in chunks

In [24]:
chunk_size = 10**6
chunks = []
for chunk in pd.read_csv(file_path, chunksize=chunk_size, index_col=0):
    chunks.append(chunk)

### Combine chunks into a single DataFrame

In [25]:
df = pd.concat(chunks, axis=0)

In [26]:
df

Unnamed: 0,User,Card,Year,Month,Day,Amount,Payment Method,Is Fraud?,Hour,Minute,Category
0,0,0,2002,9,1,134.09,on-site,No,6,21,Personal Spending
1,0,0,2002,9,1,38.48,on-site,No,6,42,Food/Groceries
2,0,0,2002,9,2,120.34,on-site,No,6,22,Food/Groceries
3,0,0,2002,9,2,128.95,on-site,No,17,45,Clothing
4,0,0,2002,9,3,104.71,on-site,No,6,23,Healthcare
...,...,...,...,...,...,...,...,...,...,...,...
19384,0,4,2009,2,7,22.70,on-site,No,16,16,Personal Spending
19385,0,4,2009,2,22,27.94,on-site,No,11,58,Home Goods
19386,0,4,2009,2,24,13.97,on-site,No,16,53,Healthcare
19387,0,4,2009,3,29,14.27,on-site,No,16,30,Food/Groceries


### Feature Engineering
#### Convert categorical features into numerical ones using OneHotEncoder

In [27]:
categorical_features = ['Payment Method', 'Category']
encoder = ce.BinaryEncoder(cols=categorical_features)
df_encoded = encoder.fit_transform(df[categorical_features])

df = df.drop(columns=categorical_features)
df = pd.concat([df, df_encoded], axis=1)

df['Is Fraud?'] = df['Is Fraud?'].apply(lambda x: 1 if x == 'Yes' else 0)

### Split the data into training and testing sets

In [8]:
X = df.drop(columns=['Is Fraud?'])
y = df['Is Fraud?']

### Perform stratified sampling

In [9]:
##sample_size = 0.1
##X_sample, _, y_sample, _ = train_test_split(X, y, test_size=1-sample_size, stratify=y, random_state=42)

In [10]:
X

Unnamed: 0,User,Card,Year,Month,Day,Amount,Hour,Minute,Payment Method_0,Payment Method_1,Category_0,Category_1,Category_2,Category_3
0,0,0,2002,9,1,134.09,6,21,0,1,0,0,0,1
1,0,0,2002,9,1,38.48,6,42,0,1,0,0,1,0
2,0,0,2002,9,2,120.34,6,22,0,1,0,0,1,0
3,0,0,2002,9,2,128.95,17,45,0,1,0,0,1,1
4,0,0,2002,9,3,104.71,6,23,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19384,0,4,2009,2,7,22.70,16,16,0,1,0,0,0,1
19385,0,4,2009,2,22,27.94,11,58,0,1,1,0,1,0
19386,0,4,2009,2,24,13.97,16,53,0,1,0,1,0,0
19387,0,4,2009,3,29,14.27,16,30,0,1,0,0,1,0


### Split the sample into training and testing sets

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

### Model Selection and Hyperparameter Tuning using Grid Search

In [12]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2, scoring='accuracy')

grid_search.fit(X_train, y_train)
#random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_dist, n_iter=100, cv=3, n_jobs=-1, verbose=2, scoring='recall', random_state=42)

#random_search.fit(X_train, y_train)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits


### Get the best parameters

In [13]:
best_params = grid_search.best_params_
print("Best parameters found: ", best_params)

Best parameters found:  {'bootstrap': False, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}


### Print the results of each hyperparameter testing step

In [15]:
print("\nRandomized Search Results:")
for i in range(len(grid_search.cv_results_['params'])):
    mean_score = grid_search.cv_results_['mean_test_score'][i]
    std_score = grid_search.cv_results_['std_test_score'][i]
    params = grid_search.cv_results_['params'][i]
    print(f"Iteration {i+1}:")
    print(f"Parameters: {params}")
    print(f"Mean Accuracy: {mean_score:.4f}")
    print(f"Standard Deviation: {std_score:.4f}")
    print("-" * 30)


Randomized Search Results:
Iteration 1:
Parameters: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Mean Accuracy: 0.9986
Standard Deviation: 0.0002
------------------------------
Iteration 2:
Parameters: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Mean Accuracy: 0.9987
Standard Deviation: 0.0004
------------------------------
Iteration 3:
Parameters: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Mean Accuracy: 0.9986
Standard Deviation: 0.0002
------------------------------
Iteration 4:
Parameters: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 50}
Mean Accuracy: 0.9986
Standard Deviation: 0.0002
------------------------------
Iteration 5:
Parameters: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators':

### Train the model with the best parameters

In [16]:
best_rf = grid_search.best_estimator_

### Evaluation

In [17]:
y_pred = best_rf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")


Confusion Matrix:
[[3873    0]
 [   2    3]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3873
           1       1.00      0.60      0.75         5

    accuracy                           1.00      3878
   macro avg       1.00      0.80      0.87      3878
weighted avg       1.00      1.00      1.00      3878


Accuracy Score:
Accuracy: 0.9995

Precision Score:
Precision: 1.0000

Recall Score:
Recall: 0.6000


In [3]:
F_path='./Dataset/preprocessed_dataset_v2.csv'

In [4]:
chunk_size = 10**6
chunks = []
for chunk in pd.read_csv(F_path, chunksize=chunk_size, index_col=0):
    chunks.append(chunk)

In [5]:
data = pd.concat(chunks, axis=0)

In [6]:
categorical_features = ['Payment Method', 'Category']
encoder = ce.BinaryEncoder(cols=categorical_features)
data_encoded = encoder.fit_transform(data[categorical_features])

data = data.drop(columns=categorical_features)
data = pd.concat([data, data_encoded], axis=1)

data['Is Fraud?'] = data['Is Fraud?'].apply(lambda x: 1 if x == 'Yes' else 0)

In [7]:
x = data.drop(columns=['Is Fraud?'])
Y = data['Is Fraud?']

In [8]:
x

Unnamed: 0,Year,Month,Day,Hour,Minute,Amount,Payment Method_0,Payment Method_1,Category_0,Category_1,Category_2,Category_3
0,2002,9,1,6,21,134.09,0,1,0,0,0,1
1,2002,9,1,6,42,38.48,0,1,0,0,1,0
2,2002,9,2,6,22,120.34,0,1,0,0,1,0
3,2002,9,2,17,45,128.95,0,1,0,0,1,1
4,2002,9,3,6,23,104.71,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
23998444,2020,2,27,22,23,-54.00,1,1,0,1,0,1
23998445,2020,2,27,22,24,54.00,1,1,0,1,0,1
23998446,2020,2,28,7,43,59.15,1,1,0,1,0,1
23998447,2020,2,28,20,10,43.12,1,1,0,1,0,1


In [9]:
Y

0           0
1           0
2           0
3           0
4           0
           ..
23998444    0
23998445    0
23998446    0
23998447    0
23998448    0
Name: Is Fraud?, Length: 23998449, dtype: int64

In [10]:
sample_size = 0.6
x_sample, _, Y_sample, _ = train_test_split(x, Y, test_size=1-sample_size, stratify=Y, random_state=42)

In [11]:
x_train, x_test, Y_train, Y_test = train_test_split(x_sample, Y_sample, test_size=0.2, stratify=Y_sample, random_state=42)

In [None]:
{'bootstrap': False, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}

In [21]:
rf = RandomForestClassifier(bootstrap= False, max_depth= None, min_samples_leaf= 1, min_samples_split= 2, n_estimators= 50,random_state=42)
rf.fit(x_train, Y_train)

In [22]:
Y_pred = rf.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

print("\nModel Summary") 

print(rf.summary())


Confusion Matrix:
[[2876264     133]
 [   2738     679]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.84      0.20      0.32      3417

    accuracy                           1.00   2879814
   macro avg       0.92      0.60      0.66   2879814
weighted avg       1.00      1.00      1.00   2879814


Accuracy Score:
Accuracy: 0.9990

Precision Score:
Precision: 0.8362

Recall Score:
Recall: 0.1987

Model Summary


AttributeError: 'RandomForestClassifier' object has no attribute 'summary'

In [None]:
rf.write()

In [11]:
from imblearn.combine import SMOTEENN

In [15]:
# Apply SMOTEENN to the training data
smote_enn = SMOTEENN(random_state=42)
x_train_res, Y_train_res = smote_enn.fit_resample(x_train, Y_train)

In [16]:
x_train_res

Unnamed: 0,Year,Month,Day,Hour,Minute,Amount,Payment Method_0,Payment Method_1,Category_0,Category_1,Category_2,Category_3
0,2014,2,11,4,30,53.340000,1,0,0,1,1,1
1,2010,12,13,3,54,7.620000,0,1,0,1,0,1
2,2004,10,3,7,18,37.400000,0,1,1,0,1,0
3,2007,2,24,8,59,73.460000,0,1,0,1,0,1
4,2011,7,30,21,37,12.400000,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
22822695,2018,5,6,12,48,-211.765681,0,0,0,0,0,0
22822696,2007,4,18,16,13,1.020815,1,0,0,0,0,1
22822697,2013,7,28,9,26,142.805401,1,0,1,0,0,0
22822698,2010,2,9,10,4,486.802005,0,0,0,0,0,0


In [17]:
Y_train_res

0           0
1           0
2           0
3           0
4           0
           ..
22822695    1
22822696    1
22822697    1
22822698    1
22822699    1
Name: Is Fraud?, Length: 22822700, dtype: int64

In [19]:
# Train a Random Forest Classifier
clf = RandomForestClassifier(bootstrap= False, max_depth= None, min_samples_leaf= 1, min_samples_split= 2, n_estimators= 50, random_state=42)
clf.fit(x_train_res, Y_train_res)

In [22]:
Y_pred = clf.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2874548    1849]
 [   2501     916]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.33      0.27      0.30      3417

    accuracy                           1.00   2879814
   macro avg       0.67      0.63      0.65   2879814
weighted avg       1.00      1.00      1.00   2879814


Accuracy Score:
Accuracy: 0.9985

Precision Score:
Precision: 0.3313

Recall Score:
Recall: 0.2681

Precision-Recall AUC: 0.2416


In [23]:
from xgboost import XGBClassifier

In [None]:
xgb_clf = XGBClassifier(scale_pos_weight=len(Y_train[Y_train == 0]) / len(Y_train[Y_train == 1]))
xgb_clf.fit(x_train, Y_train)

Y_pred = xgb_clf.predict(x_test)
print(classification_report(Y_test, Y_pred))

In [1]:
import lightgbm as lgb

In [25]:
lgb_clf = lgb.LGBMClassifier(scale_pos_weight=len(Y_train[Y_train == 0]) / len(Y_train[Y_train == 1]))
lgb_clf.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.816499 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001186 -> initscore=-6.735677
[LightGBM] [Info] Start training from score -6.735677


In [26]:
Y_pred = lgb_clf.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2104736  771661]
 [    925    2492]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.73      0.84   2876397
           1       0.00      0.73      0.01      3417

    accuracy                           0.73   2879814
   macro avg       0.50      0.73      0.43   2879814
weighted avg       1.00      0.73      0.84   2879814


Accuracy Score:
Accuracy: 0.7317

Precision Score:
Precision: 0.0032

Recall Score:
Recall: 0.7293

Precision-Recall AUC: 0.2416


class_weight={0: 1, 1: 10}

In [27]:
lgb_cl = lgb.LGBMClassifier(scale_pos_weight=len(Y_train[Y_train == 0]) / len(Y_train[Y_train == 1]))
lgb_cl.fit(x_train_res, Y_train_res)

[LightGBM] [Info] Number of positive: 11505576, number of negative: 11317124
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.726365 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 22822700, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.504129 -> initscore=0.016515
[LightGBM] [Info] Start training from score 0.016515


In [28]:
Y_pred = lgb_cl.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[1325840 1550557]
 [    112    3305]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.46      0.63   2876397
           1       0.00      0.97      0.00      3417

    accuracy                           0.46   2879814
   macro avg       0.50      0.71      0.32   2879814
weighted avg       1.00      0.46      0.63   2879814


Accuracy Score:
Accuracy: 0.4615

Precision Score:
Precision: 0.0021

Recall Score:
Recall: 0.9672

Precision-Recall AUC: 0.2416


In [29]:
lgb_c = lgb.LGBMClassifier(random_state=42)
lgb_c.fit(x_train_res, Y_train_res)

[LightGBM] [Info] Number of positive: 11505576, number of negative: 11317124
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 1.335957 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 22822700, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.504129 -> initscore=0.016515
[LightGBM] [Info] Start training from score 0.016515


In [30]:
Y_pred = lgb_c.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2763050  113347]
 [   1056    2361]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.96      0.98   2876397
           1       0.02      0.69      0.04      3417

    accuracy                           0.96   2879814
   macro avg       0.51      0.83      0.51   2879814
weighted avg       1.00      0.96      0.98   2879814


Accuracy Score:
Accuracy: 0.9603

Precision Score:
Precision: 0.0204

Recall Score:
Recall: 0.6910

Precision-Recall AUC: 0.2416


In [32]:
lgbC = lgb.LGBMClassifier(scale_pos_weight=100,random_state=42)
lgbC.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.628645 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001186 -> initscore=-6.735677
[LightGBM] [Info] Start training from score -6.735677


In [33]:
Y_pred = lgbC.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2769883  106514]
 [   1893    1524]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.96      0.98   2876397
           1       0.01      0.45      0.03      3417

    accuracy                           0.96   2879814
   macro avg       0.51      0.70      0.50   2879814
weighted avg       1.00      0.96      0.98   2879814


Accuracy Score:
Accuracy: 0.9624

Precision Score:
Precision: 0.0141

Recall Score:
Recall: 0.4460

Precision-Recall AUC: 0.2416


In [34]:
Lgb = lgb.LGBMClassifier(scale_pos_weight=50,random_state=42)
Lgb.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.733695 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001186 -> initscore=-6.735677
[LightGBM] [Info] Start training from score -6.735677


In [35]:
Y_pred = Lgb.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2839682   36715]
 [   1991    1426]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99   2876397
           1       0.04      0.42      0.07      3417

    accuracy                           0.99   2879814
   macro avg       0.52      0.70      0.53   2879814
weighted avg       1.00      0.99      0.99   2879814


Accuracy Score:
Accuracy: 0.9866

Precision Score:
Precision: 0.0374

Recall Score:
Recall: 0.4173

Precision-Recall AUC: 0.2416


In [36]:
l_gb = lgb.LGBMClassifier(scale_pos_weight=300,random_state=42)
l_gb.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.785855 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001186 -> initscore=-6.735677
[LightGBM] [Info] Start training from score -6.735677


In [37]:
Y_pred = l_gb.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2660772  215625]
 [   1328    2089]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.93      0.96   2876397
           1       0.01      0.61      0.02      3417

    accuracy                           0.92   2879814
   macro avg       0.50      0.77      0.49   2879814
weighted avg       1.00      0.92      0.96   2879814


Accuracy Score:
Accuracy: 0.9247

Precision Score:
Precision: 0.0096

Recall Score:
Recall: 0.6114

Precision-Recall AUC: 0.2416


In [38]:
li_gb = lgb.LGBMClassifier(scale_pos_weight=20,random_state=42)
li_gb.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.711337 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001186 -> initscore=-6.735677
[LightGBM] [Info] Start training from score -6.735677


In [39]:
Y_pred = li_gb.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2860223   16174]
 [   2298    1119]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00   2876397
           1       0.06      0.33      0.11      3417

    accuracy                           0.99   2879814
   macro avg       0.53      0.66      0.55   2879814
weighted avg       1.00      0.99      1.00   2879814


Accuracy Score:
Accuracy: 0.9936

Precision Score:
Precision: 0.0647

Recall Score:
Recall: 0.3275

Precision-Recall AUC: 0.2416


In [49]:
# Get the predicted probabilities
y_probs = lgb_c.predict_proba(x_test)[:, 1]

In [50]:
# Calculate precision-recall pairs for different thresholds
precision, recall, thresholds = precision_recall_curve(Y_test, y_probs)

In [51]:
# Find the threshold that gives the best balance between precision and recall
f1_scores = 2 * (precision * recall) / (precision + recall)
best_threshold = thresholds[np.argmax(f1_scores)]

In [52]:
print("Best Threshold:", best_threshold)

# Apply the best threshold to make final predictions
y_pred = (y_probs >= best_threshold).astype(int)


Best Threshold: 0.9762841233011991


In [55]:
ll_gb = lgb.LGBMClassifier(class_weight={0: 1, 1: 10},random_state=42)
ll_gb.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.772384 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.011738 -> initscore=-4.433092
[LightGBM] [Info] Start training from score -4.433092


In [56]:
Y_pred = ll_gb.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2871708    4689]
 [   1931    1486]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.24      0.43      0.31      3417

    accuracy                           1.00   2879814
   macro avg       0.62      0.72      0.65   2879814
weighted avg       1.00      1.00      1.00   2879814


Accuracy Score:
Accuracy: 0.9977

Precision Score:
Precision: 0.2406

Recall Score:
Recall: 0.4349

Precision-Recall AUC: 0.2416


In [59]:
llg_b = lgb.LGBMClassifier(class_weight={0: 1, 1: 50},random_state=42)
llg_b.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.759842 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.056059 -> initscore=-2.823654
[LightGBM] [Info] Start training from score -2.823654


In [60]:
Y_pred = llg_b.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2859668   16729]
 [   1364    2053]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00   2876397
           1       0.11      0.60      0.18      3417

    accuracy                           0.99   2879814
   macro avg       0.55      0.80      0.59   2879814
weighted avg       1.00      0.99      1.00   2879814


Accuracy Score:
Accuracy: 0.9937

Precision Score:
Precision: 0.1093

Recall Score:
Recall: 0.6008

Precision-Recall AUC: 0.2416


In [62]:
lg = lgb.LGBMClassifier(class_weight={0: 1, 1: 40},random_state=42)
lg.fit(x_train, Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.643635 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.045356 -> initscore=-3.046798
[LightGBM] [Info] Start training from score -3.046798


In [63]:
Y_pred = lg.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2862581   13816]
 [   1447    1970]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.12      0.58      0.21      3417

    accuracy                           0.99   2879814
   macro avg       0.56      0.79      0.60   2879814
weighted avg       1.00      0.99      1.00   2879814


Accuracy Score:
Accuracy: 0.9947

Precision Score:
Precision: 0.1248

Recall Score:
Recall: 0.5765

Precision-Recall AUC: 0.2416


In [64]:

param_grid = {
    'num_leaves': [31, 50, 70],
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [100, 200, 500],
    'class_weight': [{0: 1, 1: 10}, {0: 1, 1: 20}, {0: 1, 1: 50}]
}

# Initialize the GridSearchCV

grid_search = GridSearchCV(estimator=lgb.LGBMClassifier(random_state=42,class_weight={0: 1, 1: 10}), param_grid=param_grid, cv=3, scoring='f1', verbose=2, n_jobs=-1)

In [65]:
grid_search.fit(x_train, Y_train)

Fitting 3 folds for each of 81 candidates, totalling 243 fits


In [None]:
Y_pred = llg_b.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, clf.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")

In [18]:
light=lgb.LGBMClassifier(random_state=42,class_weight={0: 1, 1: 20}, max_depth= None, min_samples_leaf= 1, n_estimators= 50)
light.fit(x_train,Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.852585 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.023204 -> initscore=-3.739945
[LightGBM] [Info] Start training from score -3.739945


In [19]:
Y_pred = light.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, light.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2869326    7071]
 [   1855    1562]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.18      0.46      0.26      3417

    accuracy                           1.00   2879814
   macro avg       0.59      0.73      0.63   2879814
weighted avg       1.00      1.00      1.00   2879814


Accuracy Score:
Accuracy: 0.9969

Precision Score:
Precision: 0.1809

Recall Score:
Recall: 0.4571

Precision-Recall AUC: 0.2626


In [22]:
l=lgb.LGBMClassifier(random_state=42, max_depth= None, min_samples_leaf= 1, n_estimators= 50)
l.fit(x_train,Y_train)

[LightGBM] [Info] Number of positive: 13666, number of negative: 11505589
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.684749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 427
[LightGBM] [Info] Number of data points in the train set: 11519255, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.001186 -> initscore=-6.735677
[LightGBM] [Info] Start training from score -6.735677


In [23]:
Y_pred = l.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, light.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2875882     515]
 [   2843     574]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.53      0.17      0.25      3417

    accuracy                           1.00   2879814
   macro avg       0.76      0.58      0.63   2879814
weighted avg       1.00      1.00      1.00   2879814


Accuracy Score:
Accuracy: 0.9988

Precision Score:
Precision: 0.5271

Recall Score:
Recall: 0.1680

Precision-Recall AUC: 0.2626


In [24]:
rf_cl = RandomForestClassifier(bootstrap= False, max_depth= None, min_samples_leaf= 1, min_samples_split= 2, n_estimators= 50,random_state=42,class_weight={0: 1, 1: 10})
rf_cl.fit(x_train, Y_train)

In [25]:
Y_pred = rf_cl.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, light.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2876291     106]
 [   2782     635]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.86      0.19      0.31      3417

    accuracy                           1.00   2879814
   macro avg       0.93      0.59      0.65   2879814
weighted avg       1.00      1.00      1.00   2879814


Accuracy Score:
Accuracy: 0.9990

Precision Score:
Precision: 0.8570

Recall Score:
Recall: 0.1858

Precision-Recall AUC: 0.2626


In [26]:
rnd = RandomForestClassifier(random_state=42,class_weight={0: 1, 1: 10})
rnd.fit(x_train, Y_train)

In [27]:
Y_pred = rnd.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, light.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2876330      67]
 [   2816     601]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   2876397
           1       0.90      0.18      0.29      3417

    accuracy                           1.00   2879814
   macro avg       0.95      0.59      0.65   2879814
weighted avg       1.00      1.00      1.00   2879814


Accuracy Score:
Accuracy: 0.9990

Precision Score:
Precision: 0.8997

Recall Score:
Recall: 0.1759

Precision-Recall AUC: 0.2626


In [28]:
from xgboost import XGBClassifier

In [31]:
xgb_clf = XGBClassifier(random_state=42,scale_pos_weight=len(Y_train[Y_train == 0]) / len(Y_train[Y_train == 1]))
xgb_clf.fit(x_train, Y_train)

In [32]:
Y_pred = xgb_clf.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, light.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2676671  199726]
 [    462    2955]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.93      0.96   2876397
           1       0.01      0.86      0.03      3417

    accuracy                           0.93   2879814
   macro avg       0.51      0.90      0.50   2879814
weighted avg       1.00      0.93      0.96   2879814


Accuracy Score:
Accuracy: 0.9305

Precision Score:
Precision: 0.0146

Recall Score:
Recall: 0.8648

Precision-Recall AUC: 0.2626


In [33]:
xgb_cl = XGBClassifier(max_depth= None, n_estimators= 50,random_state=42,scale_pos_weight=len(Y_train[Y_train == 0]) / len(Y_train[Y_train == 1]))
xgb_cl.fit(x_train, Y_train)

Parameters: { "bootstrap", "min_samples_leaf", "min_samples_split" } are not used.



In [35]:
Y_pred = xgb_cl.predict(x_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)

print("\nConfusion Matrix:")
print(confusion_matrix(Y_test, Y_pred))

print("\nClassification Report:")
print(classification_report(Y_test, Y_pred))

print("\nAccuracy Score:")
print(f"Accuracy: {accuracy:.4f}")

print("\nPrecision Score:")
print(f"Precision: {precision:.4f}")

print("\nRecall Score:")
print(f"Recall: {recall:.4f}")

# Calculate Precision-Recall AUC
precision, recall, _ = precision_recall_curve(Y_test, light.predict_proba(x_test)[:, 1])
pr_auc = auc(recall, precision)
print(f"\nPrecision-Recall AUC: {pr_auc:.4f}")


Confusion Matrix:
[[2644394  232003]
 [    441    2976]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.92      0.96   2876397
           1       0.01      0.87      0.02      3417

    accuracy                           0.92   2879814
   macro avg       0.51      0.90      0.49   2879814
weighted avg       1.00      0.92      0.96   2879814


Accuracy Score:
Accuracy: 0.9193

Precision Score:
Precision: 0.0127

Recall Score:
Recall: 0.8709

Precision-Recall AUC: 0.2626
