In [1]:
# Imports
import pandas as pd
import pickle
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

In [2]:
# Loading preprocessed data
with open('train_test_splits.pkl', 'rb') as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

In [3]:
# Creating pipeline
model_pipeline = Pipeline(steps=[('classifier', GradientBoostingClassifier())])

In [4]:
# Training & fitting model
model_pipeline.fit(X_train, y_train)

In [5]:
# Making predictions on test set
y_pred = model_pipeline.predict(X_test)

In [6]:
# Displaying & evaluating model
print("Gradient Boosting Classifier:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("="*60)

Gradient Boosting Classifier:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

         1.0       1.00      1.00      1.00     20000

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Confusion Matrix:
 [[20000]]




In [7]:
from xgboost import XGBClassifier

In [8]:
# XGBoost Classifier
xgb_model = XGBClassifier()
xgb_model.fit(X_train, y_train)

# Predict on the test set
xgb_predictions = xgb_model.predict(X_test)

# Evaluation
print("Gradient Boosting Classifier:")
print("Accuracy:", accuracy_score(y_test, xgb_predictions))
print("Classification Report:\n", classification_report(y_test, xgb_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, xgb_predictions))
print("="*60)

Gradient Boosting Classifier:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

         1.0       1.00      1.00      1.00     20000

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Confusion Matrix:
 [[20000]]




In [10]:
from lightgbm import LGBMClassifier

In [11]:
# LightGBM Classifier
lgbm_model = LGBMClassifier()
lgbm_model.fit(X_train, y_train)

# Predict on the test set
lgbm_predictions = lgbm_model.predict(X_test)

# Evaluation
print("LightGBM Classifier:")
print("Accuracy:", accuracy_score(y_test, lgbm_predictions))
print("Classification Report:\n", classification_report(y_test, lgbm_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, lgbm_predictions))
print("="*60)

[LightGBM] [Info] Number of positive: 79999, number of negative: 1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003540 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1338
[LightGBM] [Info] Number of data points in the train set: 80000, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.999988 -> initscore=11.289769
[LightGBM] [Info] Start training from score 11.289769
LightGBM Classifier:
Accuracy: 0.99995
Classification Report:
               precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         0
         1.0       1.00      1.00      1.00     20000

    accuracy                           1.00     20000
   macro avg       0.50      0.50      0.50     20000
weighted avg       1.00      1.00      1.00     20000

Confusion Matrix:
 [[    0     0]
 [    1 19999]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
from sklearn.svm import SVC

In [13]:
# SVC Classifier
svc_model = SVC()
svc_model.fit(X_train, y_train)

# Predict on the test set
svc_predictions = svc_model.predict(X_test)

# Evaluation
print("SVC Classifier:")
print("Accuracy:", accuracy_score(y_test, svc_predictions))
print("Classification Report:\n", classification_report(y_test, svc_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, svc_predictions))
print("="*60)

SVC Classifier:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

         1.0       1.00      1.00      1.00     20000

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Confusion Matrix:
 [[20000]]




In [14]:
from sklearn.tree import DecisionTreeClassifier

In [15]:
# Decision Tree Classifier
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# Predict on the test set
dt_predictions = dt_model.predict(X_test)

# Evaluation
print("Decision Tree Classifier:")
print("Accuracy:", accuracy_score(y_test, dt_predictions))
print("Classification Report:\n", classification_report(y_test, dt_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, dt_predictions))
print("="*60)

Decision Tree Classifier:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

         1.0       1.00      1.00      1.00     20000

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Confusion Matrix:
 [[20000]]




In [16]:
from sklearn.ensemble import ExtraTreesClassifier

In [17]:
# Extra Trees Classifier
et_model = ExtraTreesClassifier()
et_model.fit(X_train, y_train)

# Predict on the test set
et_predictions = et_model.predict(X_test)

# Evaluation
print("Extra Trees Classifier:")
print("Accuracy:", accuracy_score(y_test, et_predictions))
print("Classification Report:\n", classification_report(y_test, et_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, et_predictions))
print("="*60)

Extra Trees Classifier:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

         1.0       1.00      1.00      1.00     20000

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Confusion Matrix:
 [[20000]]




In [19]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [20]:
from catboost import CatBoostClassifier

In [21]:
# Cat Boost Classifier
cat_model = CatBoostClassifier()
cat_model.fit(X_train, y_train)

# Predict on the test set
cat_predictions = cat_model.predict(X_test)

# Evaluation
print("Cat Boost Classifier:")
print("Accuracy:", accuracy_score(y_test, cat_predictions))
print("Classification Report:\n", classification_report(y_test, cat_predictions))
print("Confusion Matrix:\n", confusion_matrix(y_test, cat_predictions))
print("="*60)

Learning rate set to 0.06692
0:	learn: 0.4491382	total: 122ms	remaining: 2m 1s
1:	learn: 0.2887396	total: 171ms	remaining: 1m 25s
2:	learn: 0.1835346	total: 215ms	remaining: 1m 11s
3:	learn: 0.1202389	total: 237ms	remaining: 59s
4:	learn: 0.0789765	total: 262ms	remaining: 52.1s
5:	learn: 0.0503205	total: 291ms	remaining: 48.2s
6:	learn: 0.0343002	total: 319ms	remaining: 45.2s
7:	learn: 0.0245528	total: 343ms	remaining: 42.5s
8:	learn: 0.0174286	total: 365ms	remaining: 40.2s
9:	learn: 0.0129198	total: 405ms	remaining: 40.1s
10:	learn: 0.0087536	total: 430ms	remaining: 38.6s
11:	learn: 0.0066195	total: 452ms	remaining: 37.2s
12:	learn: 0.0051255	total: 474ms	remaining: 36s
13:	learn: 0.0040251	total: 510ms	remaining: 35.9s
14:	learn: 0.0033022	total: 573ms	remaining: 37.6s
15:	learn: 0.0026886	total: 633ms	remaining: 38.9s
16:	learn: 0.0022263	total: 713ms	remaining: 41.2s
17:	learn: 0.0018846	total: 763ms	remaining: 41.6s
18:	learn: 0.0016190	total: 811ms	remaining: 41.9s
19:	learn: 0.0

