# Business Client Segment Prediction Model

## Objective
Develop a predictive model to classify business clients into RFMT-based segments using demographic information collected at sign-up, enabling better engagement and retention strategies.

## Setup & Data Loading
### Import Dependencies

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random as python_random
import re
import itertools
import pickle

# Set environment variables for optimized performance
os.environ["OMP_NUM_THREADS"] = '4'  # Improve parallel processing efficiency

# Scikit-learn utilities
from sklearn.decomposition import PCA, FastICA
from sklearn.model_selection import (
    train_test_split, KFold, StratifiedKFold, RepeatedKFold, ShuffleSplit, StratifiedShuffleSplit, GridSearchCV
)
from sklearn.feature_selection import SelectFromModel, RFECV, RFE
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, PolynomialFeatures, Normalizer
from sklearn.metrics import (
    confusion_matrix, f1_score, roc_auc_score, mean_squared_error, accuracy_score, log_loss, classification_report
)

# Machine Learning Models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression, ElasticNet, RidgeClassifier, Lasso
from sklearn.ensemble import (
    IsolationForest, RandomForestClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier, 
    AdaBoostClassifier, VotingClassifier, StackingClassifier
)
from catboost import CatBoostClassifier, Pool
from xgboost import XGBRFClassifier, XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# Display working directory
print("Current Directory:", os.getcwd())


### Loading & Exploration
Load Datasets

In [None]:
# Load labeled corporate client segmentation data
labels_data = pd.read_csv(r"Machine Learning\Customer Segmentation\artifacts\Business_Clients_Clusters.csv")

# Load demographic data of corporate clients
demo_data = pd.read_csv(r"Machine Learning\Customer Segmentation\data\3_business_clients_demographics.csv")

# Make copies for processing
labels_data_copy = labels_data.copy()
demo_data_copy = demo_data.copy()

# Display dataset shapes
print(f"Labels Data Shape: {labels_data.shape}")
print(f"Demographic Data Shape: {demo_data.shape}")


Merge Datasets

In [None]:
# Merge demographic data with labeled segmentation dataset
data = pd.merge(left=labels_data, right=demo_data, on='cid', how='left')

# Display merged dataset shape
print(f"Merged Data Shape: {data.shape}")

# Display first few records
data.head()


Check for Missing Values & Duplicates

In [None]:
# Count missing values per column
missing_values = data.isnull().sum()
print("Missing Values:\n", missing_values[missing_values > 0])

# Check for duplicate client IDs
duplicate_counts = data['cid'].duplicated().sum()
print(f"Duplicate Client IDs: {duplicate_counts}")


Data Summary

In [None]:
# Display summary statistics
data.describe(include='all')

# Check dataset distribution
data.info()


## Data Preprocessing
### **Removing Unnecessary Columns**

In [None]:
# Display column names
data.columns

# Retain only columns that do not contain '2'
col_without_2 = [col for col in data.columns if '2' not in col]
final_data_1 = data[col_without_2]

# Selecting relevant columns for analysis
final_data = final_data_1[['cid', 'Cluster', 'created', 'account_type', 'is_synced_wb',
                           'is_afex_broker', 'is_kyc_complete', 'user_account_type', 
                           'used_referral_code', 'country', 'region', 'subregion', 
                           'state', 'rc_number', 'company_website', 'politically_exposed',
                           'date_of_incorporation', 'nature_of_business', 'city', 
                           'political_experience', 'place_of_incorporation']]

# Display dataset shape and sample records
final_data.shape
final_data.head()


### **Exploartory Data Analysis (EDA)**

In [None]:
# Unique values in 'used_referral_code'
set(final_data['used_referral_code'])

# Uncomment for additional EDA
# pd.DataFrame(final_data['nature_of_business'].value_counts().sort_values(ascending=False)).head(30)
# final_data['company_website'].count()
# final_data[final_data['rc_number'].astype(str).str.startswith('RC')]


### **Feature Engineering**

#### Handling Missing & Inconsistent Data

In [None]:
# Fill missing values in 'politically_exposed'
final_data['politically_exposed'] = final_data['politically_exposed'].fillna(False)

#### Creating New Features

**Website Presence**

In [None]:
import re

def is_website_address(x):
    pattern = re.compile(r'^(http[s]?://|www)[^\s]+[.]\w{2,}[/]?$')
    return "has website" if re.match(pattern, str(x)) else "does not have website"

final_data['has_website'] = final_data['company_website'].apply(is_website_address)

# Display results
final_data[['company_website', 'has_website']].iloc[150:180].head()


**RC Status**

In [None]:
def referral_null(x):
    return 'referred' if str(x).startswith(('COMX', 'comx')) else 'not referred'

final_data['is_referred'] = final_data['used_referral_code'].apply(referral_null)

# Display results
final_data[['used_referral_code', 'is_referred']].head(20)


**Company Age Calculation**

In [None]:
now = pd.to_datetime('now')
final_data['age'] = (now - pd.to_datetime(final_data['date_of_incorporation'], errors='coerce')).dt.total_seconds() / (60 * 60 * 24 * 365.25)

# Round age values for clarity
final_data['age'] = final_data['age'].apply(lambda x: round(x, 2))


**Datetime Feature Extraction**

final_data['created'] = pd.to_datetime(final_data['created'])

final_data['created_year'] = final_data['created'].dt.year
final_data['created_month'] = final_data['created'].dt.month
final_data['created_dayofweek'] = final_data['created'].dt.dayofweek
final_data['created_dayofmonth'] = final_data['created'].dt.day


#### Dropping Unnecessary Columns After Feature Engineering

In [None]:
to_drop = ['used_referral_code', 'subregion', 'city', 'rc_number', 'company_website', 
           'nature_of_business', 'political_experience', 'date_of_incorporation',
           'place_of_incorporation', 'cid']

final_processed = final_data.drop(to_drop, axis=1)

# Display dataset after dropping
final_processed.head()


### **Data Cleaning**

In [None]:
# Check data types and missing values
final_processed.info()
final_processed.isnull().sum()


#### Categorical, Numeric, and Boolean Columns Extraction

In [None]:
cat_column = final_processed.select_dtypes(include=['O']).columns.values
num_column = final_processed.select_dtypes(include=['float64', 'int64', 'int32', 'float32']).columns.values
bool_column = final_processed.select_dtypes(include=['bool']).columns.values


#### Handling Missing Values

In [None]:
# Fill missing values in categorical columns with mode
for col in cat_column:
    final_processed[col] = final_processed[col].fillna(final_processed[col].mode().values[0])

# Fill missing values in numerical columns with median
for column in num_column:
    final_processed[column] = final_processed[column].fillna(final_processed[column].median())


### **Encoding Categorical & Boolean Variables**

In [None]:
from sklearn.preprocessing import LabelEncoder

# Separate 'created' before encoding
final_processed_created = final_processed['created']
final_processed_others = final_processed.drop('created', axis=1)

# Label encode categorical and boolean columns
for column in cat_column:
    label_e = LabelEncoder()
    final_processed_others[column] = label_e.fit_transform(final_processed_others[column])

for column in bool_column:
    label_e = LabelEncoder()
    final_processed_others[column] = label_e.fit_transform(final_processed_others[column])

# Merge back 'created' column
tot_data = pd.concat([final_processed_created, final_processed_others], axis=1)
tot_data.head()


### **Splitting Data into Training & Testing Sets**

In [None]:
# Verify time range for splitting
tot_data.created.min(), tot_data.created.max()
tot_data.created.sort_values()

# Split based on date
training_data = tot_data[tot_data.created < '2023-01-01']
testing_data = tot_data[tot_data.created >= '2023-01-01']

# Drop 'created' column post-split
training_data.drop('created', axis=1, inplace=True)
testing_data.drop('created', axis=1, inplace=True)

# Display dataset shapes
training_data.shape, testing_data.shape


#### Feature & Traget Separation

In [None]:
test_features = testing_data.drop('Cluster', axis=1)
test_target = testing_data['Cluster']


### **Creating Train & Validation Sets**

In [None]:
from sklearn.model_selection import train_test_split

# Define features & target
X = training_data.drop('Cluster', axis=1)
y = training_data['Cluster']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Display shapes
X.shape, y.shape, X_train.shape, y_train.shape, X_test.shape, y_test.shape


### **Feature Scaling**

In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize StandardScaler
scaler = StandardScaler()

# Standardize the features
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)


#### Class Distribution Check

In [None]:
pd.DataFrame(y_train).value_counts(normalize=True) * 100
pd.DataFrame(y_test).value_counts(normalize=True) * 100

## Model Development

### **Modeling with Scaled Dataset**

#### **Linear Models**

##### LogisticRegression

In [None]:
model_lr = LogisticRegression(random_state = 42)
model_lr.fit(X_train_scaled,y_train)
pred_lr = model_lr.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lr))
print('\nf1 score ::::',f1_score(y_test,pred_lr, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lr))


 Accuracy :::: 0.7428571428571429

f1 score :::: 0.6691147942367455

classification_report ::::
                precision    recall  f1-score   support

           0       0.33      0.09      0.15        32
           1       0.77      0.97      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.37      0.35      0.33       175
weighted avg       0.64      0.74      0.67       175



##### RidgeClassifier

In [None]:
model_rc = RidgeClassifier(random_state = 42)
model_rc.fit(X_train_scaled,y_train)
pred_rc = model_rc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_rc))
print('\nf1 score ::::',f1_score(y_test,pred_rc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_rc))


 Accuracy :::: 0.7542857142857143

f1 score :::: 0.6740563784042044

classification_report ::::
                precision    recall  f1-score   support

           0       0.43      0.09      0.15        32
           1       0.77      0.98      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.40      0.36      0.34       175
weighted avg       0.65      0.75      0.67       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### GaussianNaïveBayes (GaussianNB)

In [None]:
model_gnb = GaussianNB()
model_gnb.fit(X_train_scaled,y_train)
pred_gnb = model_gnb.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_gnb))
print('\nf1 score ::::',f1_score(y_test,pred_gnb, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_gnb))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_gnb))


 Accuracy :::: 0.2

f1 score :::: 0.10015808303519427

classification_report ::::
                precision    recall  f1-score   support

           0       0.18      0.97      0.31        32
           1       0.80      0.03      0.06       131
           2       0.00      0.00      0.00        12

    accuracy                           0.20       175
   macro avg       0.33      0.33      0.12       175
weighted avg       0.63      0.20      0.10       175

\confusion_matrix ::::
  [[ 31   1   0]
 [127   4   0]
 [ 12   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### **Tree Model**
##### DecisionTreeClassifier

In [None]:
model_dtc = DecisionTreeClassifier(random_state = 42)
model_dtc.fit(X_train_scaled,y_train)
pred_dtc = model_dtc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_dtc))
print('\nf1 score ::::',f1_score(y_test,pred_dtc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_dtc))


 Accuracy :::: 0.6514285714285715

f1 score :::: 0.6442434404742224

classification_report ::::
                precision    recall  f1-score   support

           0       0.32      0.38      0.35        32
           1       0.77      0.78      0.78       131
           2       0.00      0.00      0.00        12

    accuracy                           0.65       175
   macro avg       0.37      0.38      0.37       175
weighted avg       0.64      0.65      0.64       175



#### **Bagging-Based Models (Parallel Ensemble)**
##### RandomForestClassifier

In [None]:
# 1

model_rfc = RandomForestClassifier(random_state = 42)
model_rfc.fit(X_train_scaled,y_train)
pred_rfc = model_rfc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_rfc))
print('\nf1 score ::::',f1_score(y_test,pred_rfc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_rfc))


 Accuracy :::: 0.7314285714285714

f1 score :::: 0.6939929627023221

classification_report ::::
                precision    recall  f1-score   support

           0       0.38      0.31      0.34        32
           1       0.79      0.90      0.84       131
           2       0.00      0.00      0.00        12

    accuracy                           0.73       175
   macro avg       0.39      0.40      0.40       175
weighted avg       0.66      0.73      0.69       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 2

model_rfc2 = RandomForestClassifier(n_estimators = 350,random_state = 42)
model_rfc2.fit(X_train_scaled,y_train)
pred_rfc2 = model_rfc2.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_rfc2))
print('\nf1 score ::::',f1_score(y_test,pred_rfc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_rfc2))


 Accuracy :::: 0.7371428571428571

f1 score :::: 0.6935479543930247

classification_report ::::
                precision    recall  f1-score   support

           0       0.41      0.28      0.33        32
           1       0.78      0.92      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.40      0.40      0.39       175
weighted avg       0.66      0.74      0.69       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### BaggingClassifier

In [None]:
# 1

model_bc = BaggingClassifier(random_state = 42)
model_bc.fit(X_train_scaled,y_train)
pred_bc = model_bc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_bc))
print('\nf1 score ::::',f1_score(y_test,pred_bc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_bc))


 Accuracy :::: 0.6628571428571428

f1 score :::: 0.6443984160828511

classification_report ::::
                precision    recall  f1-score   support

           0       0.26      0.31      0.29        32
           1       0.77      0.81      0.79       131
           2       0.00      0.00      0.00        12

    accuracy                           0.66       175
   macro avg       0.35      0.37      0.36       175
weighted avg       0.63      0.66      0.64       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 2

model_bc2 = BaggingClassifier(n_estimators  =7 ,random_state = 42)
model_bc2.fit(X_train_scaled,y_train)
pred_bc2 = model_bc2.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_bc2))
print('\nf1 score ::::',f1_score(y_test,pred_bc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_bc2))


 Accuracy :::: 0.6857142857142857

f1 score :::: 0.6622144942131724

classification_report ::::
                precision    recall  f1-score   support

           0       0.31      0.34      0.33        32
           1       0.78      0.83      0.80       131
           2       0.00      0.00      0.00        12

    accuracy                           0.69       175
   macro avg       0.36      0.39      0.38       175
weighted avg       0.64      0.69      0.66       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 3

model_bc3 = BaggingClassifier(estimator =RidgeClassifier(random_state=  42), n_estimators  =10 ,random_state = 42)
model_bc3.fit(X_train_scaled,y_train)
pred_bc3 = model_bc3.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_bc3))
print('\nf1 score ::::',f1_score(y_test,pred_bc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_bc3))


 Accuracy :::: 0.7485714285714286

f1 score :::: 0.670496644295302

classification_report ::::
                precision    recall  f1-score   support

           0       0.38      0.09      0.15        32
           1       0.77      0.98      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.38      0.36      0.34       175
weighted avg       0.64      0.75      0.67       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### ExtraTreesClassifier

In [None]:
model_etc = ExtraTreesClassifier(random_state = 42)
model_etc.fit(X_train_scaled,y_train)
pred_etc = model_etc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_etc))
print('\nf1 score ::::',f1_score(y_test,pred_etc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_etc))


 Accuracy :::: 0.7085714285714285

f1 score :::: 0.6612712914361375

classification_report ::::
                precision    recall  f1-score   support

           0       0.29      0.19      0.23        32
           1       0.77      0.90      0.83       131
           2       0.00      0.00      0.00        12

    accuracy                           0.71       175
   macro avg       0.35      0.36      0.35       175
weighted avg       0.63      0.71      0.66       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### **Boosting-Based Models (Sequential Ensemble)**
##### GradientBoostingClassifier (GBM)

In [None]:
model_gbc = GradientBoostingClassifier(random_state = 42)
model_gbc.fit(X_train_scaled,y_train)
pred_gbc = model_gbc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_gbc))
print('\nf1 score ::::',f1_score(y_test,pred_gbc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_gbc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_gbc))


 Accuracy :::: 0.68

f1 score :::: 0.6558381186765108

classification_report ::::
                precision    recall  f1-score   support

           0       0.33      0.22      0.26        32
           1       0.77      0.85      0.81       131
           2       0.00      0.00      0.00        12

    accuracy                           0.68       175
   macro avg       0.37      0.36      0.36       175
weighted avg       0.64      0.68      0.66       175

\confusion_matrix ::::
  [[  7  23   2]
 [ 12 112   7]
 [  2  10   0]]


##### LightGBMClassifier (LGBM)

In [None]:
# 1

model_lgbmc = LGBMClassifier(random_state = 42,is_unbalance = True)
model_lgbmc.fit(X_train_scaled,y_train)
pred_lgbmc = model_lgbmc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lgbmc))
print('\nf1 score ::::',f1_score(y_test,pred_lgbmc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lgbmc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_lgbmc))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000694 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 200
[LightGBM] [Info] Number of data points in the train set: 354, number of used features: 10
[LightGBM] [Info] Start training from score -1.592631
[LightGBM] [Info] Start training from score -0.297143
[LightGBM] [Info] Start training from score -2.924858

 Accuracy :::: 0.6914285714285714

f1 score :::: 0.6614940773173132

classification_report ::::
                precision    recall  f1-score   support

           0       0.31      0.28      0.30        32
           1       0.77      0.85      0.81       131
           2       0.00      0.00      0.00        12

    accuracy                           0.69       175
   macro avg       0.36      0.38      0.37       175
weighted avg       0.63      0.69      0.66       175

\confusi

In [None]:
# 2

model_lgbmc2 = LGBMClassifier(num_leaves= 200,
          feature_fraction= 0.1,
          bagging_freq= 2,
          bagging_fraction= 0.475,
          min_data_in_leaf= 10,
          objective= 'multiclass',
          num_class= 3,
          max_bin= 255,
          max_depth= -1,
          learning_rate= 0.1,
          scale_pos_weight= 25,
          boosting_type= 'gbdt',
          bagging_seed= 42,
          metric= 'multi_logloss',
          verbosity= -1,
          random_state= 42)
model_lgbmc2.fit(X_train_scaled,y_train)
pred_lgbmc2 = model_lgbmc2.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lgbmc2))
print('\nf1 score ::::',f1_score(y_test,pred_lgbmc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lgbmc2))
print('\nconfusion_matrix ::::\n ',confusion_matrix(y_test,pred_lgbmc2))


 Accuracy :::: 0.7485714285714286

f1 score :::: 0.6886020590487534

classification_report ::::
                precision    recall  f1-score   support

           0       0.43      0.19      0.26        32
           1       0.78      0.95      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.40      0.38      0.37       175
weighted avg       0.66      0.75      0.69       175


confusion_matrix ::::
  [[  6  26   0]
 [  6 125   0]
 [  2  10   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 3 ***

model_lgbmc3 = LGBMClassifier(num_leaves= 200,
          feature_fraction= 0.2,
          bagging_freq= 50,
          bagging_fraction= 0.5,
          min_data_in_leaf= 40,
          objective= 'multiclass',
          num_class= 3,
          max_bin= 255,
          max_depth= -1,
          learning_rate= 0.02,
          scale_pos_weight= 25,
          boosting_type= 'gbdt',
          bagging_seed= 11,
          metric= 'multi_logloss',
          verbosity= -1,
          random_state= 42)
model_lgbmc3.fit(X_train_scaled,y_train)
pred_lgbmc3 = model_lgbmc3.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lgbmc3))
print('\nf1 score ::::',f1_score(y_test,pred_lgbmc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lgbmc3))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_lgbmc3))


 Accuracy :::: 0.7485714285714286

f1 score :::: 0.6409337068160597

classification_report ::::
                precision    recall  f1-score   support

           0       0.00      0.00      0.00        32
           1       0.75      1.00      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.25      0.33      0.29       175
weighted avg       0.56      0.75      0.64       175

\confusion_matrix ::::
  [[  0  32   0]
 [  0 131   0]
 [  0  12   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### CatBoostClassifier

In [None]:
# 1

model_catb = CatBoostClassifier(random_state = 42,verbose = False)
model_catb.fit(X_train_scaled,y_train)
pred_catb = model_catb.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_catb))
print('\nf1 score ::::',f1_score(y_test,pred_catb, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_catb))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_catb))


 Accuracy :::: 0.6971428571428572

f1 score :::: 0.6729480580309619

classification_report ::::
                precision    recall  f1-score   support

           0       0.28      0.25      0.26        32
           1       0.78      0.86      0.82       131
           2       0.50      0.08      0.14        12

    accuracy                           0.70       175
   macro avg       0.52      0.40      0.41       175
weighted avg       0.67      0.70      0.67       175

\confusion_matrix ::::
  [[  8  24   0]
 [ 17 113   1]
 [  4   7   1]]


In [None]:
# 2

model_catb2 = CatBoostClassifier(iterations=100, random_state = 42, learning_rate=0.1, verbose = False, depth=7, class_weights=[1,1,1])
model_catb2.fit(X_train_scaled,y_train)
pred_catb2 = model_catb2.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_catb2))
print('\nf1 score ::::',f1_score(y_test,pred_catb2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_catb2))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_catb2))


 Accuracy :::: 0.7085714285714285

f1 score :::: 0.6612712914361375

classification_report ::::
                precision    recall  f1-score   support

           0       0.29      0.19      0.23        32
           1       0.77      0.90      0.83       131
           2       0.00      0.00      0.00        12

    accuracy                           0.71       175
   macro avg       0.35      0.36      0.35       175
weighted avg       0.63      0.71      0.66       175

\confusion_matrix ::::
  [[  6  26   0]
 [ 13 118   0]
 [  2  10   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### XGBoostClassifier

In [None]:
# 1

model_xgbc = XGBClassifier(random_state = 42,scale_pos_weight = 5)
model_xgbc.fit(X_train_scaled,y_train)
pred_xgbc = model_xgbc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_xgbc))
print('\nf1 score ::::',f1_score(y_test,pred_xgbc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_xgbc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_xgbc))

Parameters: { "scale_pos_weight" } are not used.




 Accuracy :::: 0.72

f1 score :::: 0.6445964331678617

classification_report ::::
                precision    recall  f1-score   support

           0       0.20      0.06      0.10        32
           1       0.75      0.95      0.84       131
           2       0.00      0.00      0.00        12

    accuracy                           0.72       175
   macro avg       0.32      0.34      0.31       175
weighted avg       0.60      0.72      0.64       175

\confusion_matrix ::::
  [[  2  30   0]
 [  7 124   0]
 [  1  11   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 2

model_xgbc2 = XGBClassifier(learning_rate = 0.02,random_state = 42,scale_pos_weight = 8)
model_xgbc2.fit(X_train_scaled,y_train)
pred_xgbc2 = model_xgbc2.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_xgbc2))
print('\nf1 score ::::',f1_score(y_test,pred_xgbc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_xgbc2))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_xgbc2))

Parameters: { "scale_pos_weight" } are not used.




 Accuracy :::: 0.7314285714285714

f1 score :::: 0.6513058485139022

classification_report ::::
                precision    recall  f1-score   support

           0       0.25      0.06      0.10        32
           1       0.75      0.96      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.73       175
   macro avg       0.33      0.34      0.32       175
weighted avg       0.61      0.73      0.65       175

\confusion_matrix ::::
  [[  2  30   0]
 [  5 126   0]
 [  1  11   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 3

model_xgbc3 = XGBClassifier(random_state = 42,)
model_xgbc3.fit(X_train_scaled,y_train)
pred_xgbc3 = model_xgbc3.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_xgbc3))
print('\nf1 score ::::',f1_score(y_test,pred_xgbc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_xgbc3))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_xgbc3))


 Accuracy :::: 0.72

f1 score :::: 0.6445964331678617

classification_report ::::
                precision    recall  f1-score   support

           0       0.20      0.06      0.10        32
           1       0.75      0.95      0.84       131
           2       0.00      0.00      0.00        12

    accuracy                           0.72       175
   macro avg       0.32      0.34      0.31       175
weighted avg       0.60      0.72      0.64       175

\confusion_matrix ::::
  [[  2  30   0]
 [  7 124   0]
 [  1  11   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### **Stacking-Based Model (Meta-Learning)**
##### VotingClassifier

In [None]:
# 1

clf1 = GaussianNB()
clf2 = LGBMClassifier(num_leaves= 200,
          feature_fraction= 0.1,
          bagging_freq= 2,
          bagging_fraction= 0.475,
          min_data_in_leaf= 10,
          objective= 'multiclass',
          num_class= 3,
          max_bin= 255,
          max_depth= -1,
          learning_rate= 0.1,
          scale_pos_weight= 25,
          boosting_type= 'gbdt',
          bagging_seed= 42,
          metric= 'multi_logloss',#'precision',
          verbosity= -1,
          random_state= 42)
clf3 =  GradientBoostingClassifier(random_state = 42)
clf4 = CatBoostClassifier(random_state = 42,verbose = False)
# clf5 = XGBClassifier(random_state = 42,scale_pos_weight = 5)
model_vc = VotingClassifier(estimators=[ ('gaussian', clf1), ('lgbm', clf2), ('gradient', clf3), ('catboost', clf4),
                                            #('xgb', clf5)
                                            ], weights = [1,1,1,1], voting='soft')

model_vc.fit(X_train_scaled,y_train)
pred_vc = model_vc.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc))
print('\nf1 score ::::',f1_score(y_test,pred_vc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.6685714285714286

f1 score :::: 0.6510312813208841

classification_report ::::
                precision    recall  f1-score   support

           0       0.28      0.34      0.31        32
           1       0.78      0.81      0.79       131
           2       0.00      0.00      0.00        12

    accuracy                           0.67       175
   macro avg       0.35      0.38      0.37       175
weighted avg       0.64      0.67      0.65       175

\confusion_matrix ::::
  [[ 11  21   0]
 [ 25 106   0]
 [  3   9   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 2

clf1 = GaussianNB()
clf2 =  GradientBoostingClassifier(random_state = 42)

model_vc2 = VotingClassifier(estimators=[ ('gaussian', clf1), ('gradient', clf2)], weights = [1,1], voting='hard')

model_vc2.fit(X_train_scaled,y_train)
pred_vc2 = model_vc2.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc2))
print('\nf1 score ::::',f1_score(y_test,pred_vc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc2))


 Accuracy :::: 0.2

f1 score :::: 0.10015808303519427

classification_report ::::
                precision    recall  f1-score   support

           0       0.18      0.97      0.31        32
           1       0.80      0.03      0.06       131
           2       0.00      0.00      0.00        12

    accuracy                           0.20       175
   macro avg       0.33      0.33      0.12       175
weighted avg       0.63      0.20      0.10       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 3

clf1 = GaussianNB()
clf2 =  GradientBoostingClassifier(random_state = 42)

model_vc3 = VotingClassifier(estimators=[ ('gaussian', clf1), ('gradient', clf2)], weights = [2,4], voting='hard')

model_vc3.fit(X_train_scaled,y_train)
pred_vc3 = model_vc3.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc3))
print('\nf1 score ::::',f1_score(y_test,pred_vc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc3))


 Accuracy :::: 0.68

f1 score :::: 0.6558381186765108

classification_report ::::
                precision    recall  f1-score   support

           0       0.33      0.22      0.26        32
           1       0.77      0.85      0.81       131
           2       0.00      0.00      0.00        12

    accuracy                           0.68       175
   macro avg       0.37      0.36      0.36       175
weighted avg       0.64      0.68      0.66       175



In [None]:
# 4

clf1 = GaussianNB()
clf2 =  GradientBoostingClassifier(random_state = 42)

model_vc4 = VotingClassifier(estimators=[ ('gaussian', clf1), ('gradient', clf2)], weights = [2,4], voting='soft')

model_vc4.fit(X_train_scaled,y_train)
pred_vc4 = model_vc4.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc4))
print('\nf1 score ::::',f1_score(y_test,pred_vc4, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc4))


 Accuracy :::: 0.6

f1 score :::: 0.6157539353224001

classification_report ::::
                precision    recall  f1-score   support

           0       0.29      0.53      0.38        32
           1       0.80      0.67      0.73       131
           2       0.00      0.00      0.00        12

    accuracy                           0.60       175
   macro avg       0.36      0.40      0.37       175
weighted avg       0.65      0.60      0.62       175



In [None]:
# 5

clf1 = RandomForestClassifier(n_estimators = 500,random_state = 42)
clf2 =  GradientBoostingClassifier(random_state = 42)

model_vc5 = VotingClassifier(estimators=[ ('random_forest', clf1), ('gradient', clf2)], weights = [1,4], voting='soft')

model_vc5.fit(X_train_scaled,y_train)
pred_vc5 = model_vc5.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc5))
print('\nf1 score ::::',f1_score(y_test,pred_vc5, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc5))


 Accuracy :::: 0.6914285714285714

f1 score :::: 0.6631670487785596

classification_report ::::
                precision    recall  f1-score   support

           0       0.35      0.22      0.27        32
           1       0.78      0.87      0.82       131
           2       0.00      0.00      0.00        12

    accuracy                           0.69       175
   macro avg       0.38      0.36      0.36       175
weighted avg       0.64      0.69      0.66       175



In [None]:
# 6

clf1 = RandomForestClassifier(n_estimators = 500,random_state = 42)
clf2 = GradientBoostingClassifier(random_state = 42)
clf3 = XGBClassifier(random_state = 42)

model_vc6 = VotingClassifier(estimators=[ ('random_forest', clf1), ('gradient', clf2), ('xgb', clf3)], weights = [3,50,8], voting='soft')

model_vc6.fit(X_train_scaled,y_train)
pred_vc6 = model_vc6.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc6))
print('\nf1 score ::::',f1_score(y_test,pred_vc6, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc6))


 Accuracy :::: 0.6914285714285714

f1 score :::: 0.6587836734693877

classification_report ::::
                precision    recall  f1-score   support

           0       0.33      0.19      0.24        32
           1       0.77      0.88      0.82       131
           2       0.00      0.00      0.00        12

    accuracy                           0.69       175
   macro avg       0.37      0.36      0.35       175
weighted avg       0.64      0.69      0.66       175



In [None]:
# 7

clf1 = GaussianNB()
clf2 = RandomForestClassifier(n_estimators = 500,random_state = 42)
clf3 = GradientBoostingClassifier(random_state = 42, loss= 'log_loss', learning_rate = 10.0, n_estimators= 100)
model_vc7 = VotingClassifier(estimators=[ ('gau', clf1),('rand', clf2), ('grad', clf3)], weights= [3, 17,30],voting='hard')

model_vc7.fit(X_train_scaled,y_train)
pred_vc7 = model_vc7.predict(X_test_scaled)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc7))
print('\nf1 score ::::',f1_score(y_test,pred_vc7, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc7))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc7))


 Accuracy :::: 0.14857142857142858

f1 score :::: 0.12144645326692735

classification_report ::::
                precision    recall  f1-score   support

           0       0.33      0.28      0.31        32
           1       1.00      0.04      0.07       131
           2       0.08      1.00      0.15        12

    accuracy                           0.15       175
   macro avg       0.47      0.44      0.18       175
weighted avg       0.82      0.15      0.12       175

\confusion_matrix ::::
  [[  9   0  23]
 [ 18   5 108]
 [  0   0  12]]


### **Test Best Performing Scaled Dataset Models**

#### LightGBMClassifier (LGBM)

In [None]:
model_final = LGBMClassifier(random_state = 42,is_unbalance = True)
model_final.fit(X_train_scaled,y_train)
pred_final = model_final.predict(X_test_scaled)
# print('\n Accuracy ::::', accuracy_score(y_test,pred_final))
# print('\nf1 score ::::',f1_score(y_test,pred_final))
# print('\nclassification_report ::::\n ',classification_report(y_test,pred_final))
# print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_final))

test_pred_final = model_final.predict(test_features)
print('\n Accuracy ::::', accuracy_score(test_target,test_pred_final))
print('\nf1 score ::::',f1_score(test_target,test_pred_final, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(test_target,test_pred_final))
print('\confusion_matrix ::::\n ',confusion_matrix(test_target,test_pred_final))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000399 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 200
[LightGBM] [Info] Number of data points in the train set: 354, number of used features: 10
[LightGBM] [Info] Start training from score -1.592631
[LightGBM] [Info] Start training from score -0.297143
[LightGBM] [Info] Start training from score -2.924858

 Accuracy :::: 0.9198717948717948

f1 score :::: 0.8814798167886649

classification_report ::::
                precision    recall  f1-score   support

           0       0.00      0.00      0.00        19
           1       0.92      1.00      0.96       287
           2       0.00      0.00      0.00         6

    accuracy                           0.92       312
   macro avg       0.31      0.33      0.32       312
weighted avg       0.85      0.92      0.88       312

\confusi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### GaussianNB

In [None]:
model_finalgnb = GaussianNB()
model_finalgnb.fit(X_train_scaled,y_train)
pred_finalgnb = model_finalgnb.predict(X_test_scaled)
# print('\n Accuracy ::::', accuracy_score(y_test,pred_finalgnb))
# print('\nf1 score ::::',f1_score(y_test,pred_finalgnb))
# print('\nclassification_report ::::\n ',classification_report(y_test,pred_finalgnb))
# print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_finalgnb))

test_pred_finalgnb = model_finalgnb.predict(test_features)
print('\n Accuracy ::::', accuracy_score(test_target,test_pred_finalgnb))
print('\nf1 score ::::',f1_score(test_target,test_pred_finalgnb, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(test_target,test_pred_finalgnb))
print('\confusion_matrix ::::\n ',confusion_matrix(test_target,test_pred_finalgnb))


 Accuracy :::: 0.09935897435897435

f1 score :::: 0.0810901031463827

classification_report ::::
                precision    recall  f1-score   support

           0       0.06      1.00      0.12        19
           1       1.00      0.04      0.08       287
           2       0.00      0.00      0.00         6

    accuracy                           0.10       312
   macro avg       0.35      0.35      0.07       312
weighted avg       0.92      0.10      0.08       312

\confusion_matrix ::::
  [[ 19   0   0]
 [275  12   0]
 [  6   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### **Modeling with Unscaled Dataset**

#### **Linear Models**

##### LogisticRegression

In [None]:
unscaled_model_lr = LogisticRegression(random_state = 42)
unscaled_model_lr.fit(X_train,y_train)
pred_lr = unscaled_model_lr.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lr))
print('\nf1 score ::::',f1_score(y_test,pred_lr, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lr))


 Accuracy :::: 0.7485714285714286

f1 score :::: 0.6430351288056207

classification_report ::::
                precision    recall  f1-score   support

           0       0.00      0.00      0.00        32
           1       0.75      1.00      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.25      0.33      0.29       175
weighted avg       0.56      0.75      0.64       175



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### RidgeClassifier

In [None]:
unscaled_model_rc = RidgeClassifier(random_state = 42)
unscaled_model_rc.fit(X_train,y_train)
pred_rc = unscaled_model_rc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_rc))
print('\nf1 score ::::',f1_score(y_test,pred_rc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_rc))


 Accuracy :::: 0.7542857142857143

f1 score :::: 0.6740563784042044

classification_report ::::
                precision    recall  f1-score   support

           0       0.43      0.09      0.15        32
           1       0.77      0.98      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.40      0.36      0.34       175
weighted avg       0.65      0.75      0.67       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### GaussianNaïveBayes (GaussianNB)

In [None]:
unscaled_model_gnb = GaussianNB()
unscaled_model_gnb.fit(X_train,y_train)
pred_gnb = unscaled_model_gnb.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_gnb))
print('\nf1 score ::::',f1_score(y_test,pred_gnb, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_gnb))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_gnb))


 Accuracy :::: 0.13714285714285715

f1 score :::: 0.13346958414480467

classification_report ::::
                precision    recall  f1-score   support

           0       0.00      0.00      0.00        32
           1       0.86      0.09      0.17       131
           2       0.07      1.00      0.14        12

    accuracy                           0.14       175
   macro avg       0.31      0.36      0.10       175
weighted avg       0.65      0.14      0.13       175

\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


#### **Tree-Based Model (Single)**

##### DecisionTreeClassifier

In [None]:
unscaled_model_dtc = DecisionTreeClassifier(random_state = 42)
unscaled_model_dtc.fit(X_train,y_train)
pred_dtc = unscaled_model_dtc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_dtc))
print('\nf1 score ::::',f1_score(y_test,pred_dtc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_dtc))


 Accuracy :::: 0.6514285714285715

f1 score :::: 0.6461420221002537

classification_report ::::
                precision    recall  f1-score   support

           0       0.34      0.38      0.36        32
           1       0.77      0.78      0.78       131
           2       0.00      0.00      0.00        12

    accuracy                           0.65       175
   macro avg       0.37      0.38      0.38       175
weighted avg       0.64      0.65      0.65       175



#### **Bagging-Based Models (Parallel Ensemble)**
##### RandomForestClassifier

In [None]:
# 1

unscaled_model_rfc = RandomForestClassifier(random_state = 42)
unscaled_model_rfc.fit(X_train,y_train)
pred_rfc = unscaled_model_rfc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_rfc))
print('\nf1 score ::::',f1_score(y_test,pred_rfc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_rfc))


 Accuracy :::: 0.7542857142857143

f1 score :::: 0.7065767735665696

classification_report ::::
                precision    recall  f1-score   support

           0       0.53      0.28      0.37        32
           1       0.78      0.94      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.44      0.41      0.41       175
weighted avg       0.68      0.75      0.71       175



In [None]:
# 2

unscaled_model_rfc2 = RandomForestClassifier(n_estimators = 350,random_state = 42)
unscaled_model_rfc2.fit(X_train,y_train)
pred_rfc2 = unscaled_model_rfc2.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_rfc2))
print('\nf1 score ::::',f1_score(y_test,pred_rfc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_rfc2))


 Accuracy :::: 0.7428571428571429

f1 score :::: 0.6960083054200701

classification_report ::::
                precision    recall  f1-score   support

           0       0.42      0.25      0.31        32
           1       0.79      0.93      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.40      0.39      0.39       175
weighted avg       0.67      0.74      0.70       175



##### BaggingClassifier

In [None]:
# 1

unscaled_model_bc = BaggingClassifier(random_state = 42)
unscaled_model_bc.fit(X_train,y_train)
pred_bc = unscaled_model_bc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_bc))
print('\nf1 score ::::',f1_score(y_test,pred_bc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_bc))


 Accuracy :::: 0.6628571428571428

f1 score :::: 0.6409929553840993

classification_report ::::
                precision    recall  f1-score   support

           0       0.26      0.28      0.27        32
           1       0.76      0.82      0.79       131
           2       0.00      0.00      0.00        12

    accuracy                           0.66       175
   macro avg       0.34      0.37      0.35       175
weighted avg       0.62      0.66      0.64       175



In [None]:
# 2

unscaled_model_bc2 = BaggingClassifier(n_estimators  =7 ,random_state = 42)
unscaled_model_bc2.fit(X_train,y_train)
pred_bc2 = unscaled_model_bc2.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_bc2))
print('\nf1 score ::::',f1_score(y_test,pred_bc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_bc2))


 Accuracy :::: 0.6857142857142857

f1 score :::: 0.6656211927150043

classification_report ::::
                precision    recall  f1-score   support

           0       0.33      0.38      0.35        32
           1       0.78      0.82      0.80       131
           2       0.00      0.00      0.00        12

    accuracy                           0.69       175
   macro avg       0.37      0.40      0.39       175
weighted avg       0.65      0.69      0.67       175



In [None]:
# 3

unscaled_model_bc3 = BaggingClassifier(estimator =RidgeClassifier(random_state=  42), n_estimators  =10 ,random_state = 42)
unscaled_model_bc3.fit(X_train,y_train)
pred_bc3 = unscaled_model_bc3.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_bc3))
print('\nf1 score ::::',f1_score(y_test,pred_bc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_bc3))


 Accuracy :::: 0.7485714285714286

f1 score :::: 0.683191094619666

classification_report ::::
                precision    recall  f1-score   support

           0       0.42      0.16      0.23        32
           1       0.77      0.96      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.40      0.37      0.36       175
weighted avg       0.65      0.75      0.68       175



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### ExtraTreesClassifier

In [None]:
unscaled_model_etc = ExtraTreesClassifier(random_state = 42)
unscaled_model_etc.fit(X_train,y_train)
pred_etc = unscaled_model_etc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_etc))
print('\nf1 score ::::',f1_score(y_test,pred_etc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_etc))


 Accuracy :::: 0.7028571428571428

f1 score :::: 0.6633858914590571

classification_report ::::
                precision    recall  f1-score   support

           0       0.26      0.16      0.20        32
           1       0.77      0.89      0.83       131
           2       0.25      0.08      0.12        12

    accuracy                           0.70       175
   macro avg       0.43      0.38      0.38       175
weighted avg       0.64      0.70      0.66       175



#### **Boosting-Based Models (Sequential Ensemble)**
##### GradientBoostingClassifier

In [None]:
unscaled_model_gbc = GradientBoostingClassifier(random_state = 42)
unscaled_model_gbc.fit(X_train,y_train)
pred_gbc = unscaled_model_gbc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_gbc))
print('\nf1 score ::::',f1_score(y_test,pred_gbc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_gbc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_gbc))


 Accuracy :::: 0.7428571428571429

f1 score :::: 0.701978021978022

classification_report ::::
                precision    recall  f1-score   support

           0       0.56      0.28      0.38        32
           1       0.78      0.92      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.45      0.40      0.41       175
weighted avg       0.69      0.74      0.70       175

\confusion_matrix ::::
  [[  9  22   1]
 [  7 121   3]
 [  0  12   0]]


##### LightGBM Classifier (LGBM)

In [None]:
# 1

unscaled_model_lgbmc = LGBMClassifier(random_state = 42,is_unbalance = True)
unscaled_model_lgbmc.fit(X_train,y_train)
pred_lgbmc = unscaled_model_lgbmc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lgbmc))
print('\nf1 score ::::',f1_score(y_test,pred_lgbmc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lgbmc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_lgbmc))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000510 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 194
[LightGBM] [Info] Number of data points in the train set: 354, number of used features: 10
[LightGBM] [Info] Start training from score -1.592631
[LightGBM] [Info] Start training from score -0.297143
[LightGBM] [Info] Start training from score -2.924858

 Accuracy :::: 0.72

f1 score :::: 0.6799305254016501

classification_report ::::
                precision    recall  f1-score   support

           0       0.38      0.28      0.32        32
           1       0.77      0.89      0.83       131
           2       0.00      0.00      0.00        12

    accuracy                           0.72       175
   macro avg       0.38      0.39      0.38       175
weighted avg       0.65      0.72      0.68       175

\confusion_matrix ::::

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 2

unscaled_model_lgbmc2 = LGBMClassifier(num_leaves= 200,
          feature_fraction= 0.1,
          bagging_freq= 2,
          bagging_fraction= 0.475,
          min_data_in_leaf= 10,
          objective= 'multiclass',
          num_class= 3,
          max_bin= 255,
          max_depth= -1,
          learning_rate= 0.1,
          scale_pos_weight= 25,
          boosting_type= 'gbdt',
          bagging_seed= 42,
          metric= 'multi_logloss',
          verbosity= -1,
          random_state= 42)
unscaled_model_lgbmc2.fit(X_train,y_train)
pred_lgbmc2 = unscaled_model_lgbmc2.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lgbmc2))
print('\nf1 score ::::',f1_score(y_test,pred_lgbmc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lgbmc2))
print('\nconfusion_matrix ::::\n ',confusion_matrix(y_test,pred_lgbmc2))


 Accuracy :::: 0.7314285714285714

f1 score :::: 0.6583618312189741

classification_report ::::
                precision    recall  f1-score   support

           0       0.30      0.09      0.14        32
           1       0.76      0.95      0.84       131
           2       0.00      0.00      0.00        12

    accuracy                           0.73       175
   macro avg       0.35      0.35      0.33       175
weighted avg       0.62      0.73      0.66       175


confusion_matrix ::::
  [[  3  29   0]
 [  6 125   0]
 [  1  11   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 3 ***

unscaled_model_lgbmc3 = LGBMClassifier(num_leaves= 200,
          feature_fraction= 0.2,
          bagging_freq= 50,
          bagging_fraction= 0.5,
          min_data_in_leaf= 40,
          objective= 'multiclass',
          num_class= 3,
          max_bin= 255,
          max_depth= -1,
          learning_rate= 0.02,
          scale_pos_weight= 25,
          boosting_type= 'gbdt',
          bagging_seed= 11,
          metric= 'multi_logloss',
          verbosity= -1,
          random_state= 42)
unscaled_model_lgbmc3.fit(X_train,y_train)
pred_lgbmc3 = unscaled_model_lgbmc3.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_lgbmc3))
print('\nf1 score ::::',f1_score(y_test,pred_lgbmc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_lgbmc3))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_lgbmc3))


 Accuracy :::: 0.7485714285714286

f1 score :::: 0.6409337068160597

classification_report ::::
                precision    recall  f1-score   support

           0       0.00      0.00      0.00        32
           1       0.75      1.00      0.86       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.25      0.33      0.29       175
weighted avg       0.56      0.75      0.64       175

\confusion_matrix ::::
  [[  0  32   0]
 [  0 131   0]
 [  0  12   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### CatBoostClassifier

In [None]:
# 1

unscaled_model_catb = CatBoostClassifier(random_state = 42,verbose = False)
unscaled_model_catb.fit(X_train,y_train)
pred_catb = unscaled_model_catb.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_catb))
print('\nf1 score ::::',f1_score(y_test,pred_catb, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_catb))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_catb))


 Accuracy :::: 0.7085714285714285

f1 score :::: 0.6842707743869034

classification_report ::::
                precision    recall  f1-score   support

           0       0.30      0.28      0.29        32
           1       0.79      0.87      0.83       131
           2       1.00      0.08      0.15        12

    accuracy                           0.71       175
   macro avg       0.70      0.41      0.42       175
weighted avg       0.72      0.71      0.68       175

\confusion_matrix ::::
  [[  9  23   0]
 [ 17 114   0]
 [  4   7   1]]


In [None]:
# 2

unscaled_model_catb2 = CatBoostClassifier(iterations=100, random_state = 42, learning_rate=0.1, verbose = False, depth=7, class_weights=[1,1,1])
unscaled_model_catb2.fit(X_train,y_train)
pred_catb2 = unscaled_model_catb2.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_catb2))
print('\nf1 score ::::',f1_score(y_test,pred_catb2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_catb2))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_catb2))


 Accuracy :::: 0.7142857142857143

f1 score :::: 0.6651348651348652

classification_report ::::
                precision    recall  f1-score   support

           0       0.30      0.19      0.23        32
           1       0.77      0.91      0.83       131
           2       0.00      0.00      0.00        12

    accuracy                           0.71       175
   macro avg       0.36      0.37      0.35       175
weighted avg       0.63      0.71      0.67       175

\confusion_matrix ::::
  [[  6  26   0]
 [ 12 119   0]
 [  2  10   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


##### XGBoostClassifier

In [None]:
# 1

unscaled_model_xgbc = XGBClassifier(random_state = 42,scale_pos_weight = 5)
unscaled_model_xgbc.fit(X_train,y_train)
pred_xgbc = unscaled_model_xgbc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_xgbc))
print('\nf1 score ::::',f1_score(y_test,pred_xgbc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_xgbc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_xgbc))

Parameters: { "scale_pos_weight" } are not used.




 Accuracy :::: 0.7257142857142858

f1 score :::: 0.6896045038705138

classification_report ::::
                precision    recall  f1-score   support

           0       0.42      0.34      0.38        32
           1       0.78      0.89      0.83       131
           2       0.00      0.00      0.00        12

    accuracy                           0.73       175
   macro avg       0.40      0.41      0.40       175
weighted avg       0.66      0.73      0.69       175

\confusion_matrix ::::
  [[ 11  21   0]
 [ 15 116   0]
 [  0  12   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# 2

unscaled_model_xgbc2 = XGBClassifier(learning_rate = 0.02,random_state = 42,scale_pos_weight = 8)
unscaled_model_xgbc2.fit(X_train,y_train)
pred_xgbc2 = unscaled_model_xgbc2.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_xgbc2))
print('\nf1 score ::::',f1_score(y_test,pred_xgbc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_xgbc2))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_xgbc2))

Parameters: { "scale_pos_weight" } are not used.




 Accuracy :::: 0.7428571428571429

f1 score :::: 0.702555761213005

classification_report ::::
                precision    recall  f1-score   support

           0       0.45      0.31      0.37        32
           1       0.79      0.92      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.41      0.41      0.41       175
weighted avg       0.67      0.74      0.70       175

\confusion_matrix ::::
  [[ 10  22   0]
 [ 10 120   1]
 [  2  10   0]]


In [None]:
# 3

unscaled_model_xgbc3 = XGBClassifier(random_state = 42,)
unscaled_model_xgbc3.fit(X_train,y_train)
pred_xgbc3 = unscaled_model_xgbc3.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_xgbc3))
print('\nf1 score ::::',f1_score(y_test,pred_xgbc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_xgbc3))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_xgbc3))


 Accuracy :::: 0.7257142857142858

f1 score :::: 0.6896045038705138

classification_report ::::
                precision    recall  f1-score   support

           0       0.42      0.34      0.38        32
           1       0.78      0.89      0.83       131
           2       0.00      0.00      0.00        12

    accuracy                           0.73       175
   macro avg       0.40      0.41      0.40       175
weighted avg       0.66      0.73      0.69       175

\confusion_matrix ::::
  [[ 11  21   0]
 [ 15 116   0]
 [  0  12   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### **Stacking-Based Model (Meta-Learning)**
##### VotingClassifier

In [None]:
# 1

clf1 = unscaled_model_gnb
clf2 = unscaled_model_lgbmc3
clf3 = unscaled_model_gbc


unscaled_model_vc = VotingClassifier(estimators=[ ('gaussian', clf1), ('lgbm', clf2), ('gradientboosting', clf3)], weights = [4,1,2], voting='hard')

unscaled_model_vc.fit(X_train,y_train)
pred_vc = unscaled_model_vc.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc))
print('\nf1 score ::::',f1_score(y_test,pred_vc, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.13714285714285715

f1 score :::: 0.13346958414480467

classification_report ::::
                precision    recall  f1-score   support

           0       0.00      0.00      0.00        32
           1       0.86      0.09      0.17       131
           2       0.07      1.00      0.14        12

    accuracy                           0.14       175
   macro avg       0.31      0.36      0.10       175
weighted avg       0.65      0.14      0.13       175

\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


In [None]:
# 2

clf1 = GaussianNB()
clf2 =  GradientBoostingClassifier(random_state = 42)

unscaled_model_vc2 = VotingClassifier(estimators=[ ('gaussian', clf1), ('gradient', clf2)], weights = [1,1], voting='hard')

unscaled_model_vc2.fit(X_train,y_train)
pred_vc2 = unscaled_model_vc2.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc2))
print('\nf1 score ::::',f1_score(y_test,pred_vc2, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc2))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.7371428571428571

f1 score :::: 0.6975479515114316

classification_report ::::
                precision    recall  f1-score   support

           0       0.53      0.28      0.37        32
           1       0.78      0.92      0.84       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.44      0.40      0.40       175
weighted avg       0.68      0.74      0.70       175

\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


In [None]:
# 3

clf1 = GaussianNB()
clf2 =  GradientBoostingClassifier(random_state = 42)

unscaled_model_vc3 = VotingClassifier(estimators=[ ('gaussian', clf1), ('gradient', clf2)], weights = [2,4], voting='hard')

unscaled_model_vc3.fit(X_train,y_train)
pred_vc3 = unscaled_model_vc3.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc3))
print('\nf1 score ::::',f1_score(y_test,pred_vc3, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc3))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.7428571428571429

f1 score :::: 0.701978021978022

classification_report ::::
                precision    recall  f1-score   support

           0       0.56      0.28      0.38        32
           1       0.78      0.92      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.45      0.40      0.41       175
weighted avg       0.69      0.74      0.70       175

\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


In [None]:
# 4

clf1 = GaussianNB()
clf2 =  GradientBoostingClassifier(random_state = 42)

unscaled_model_vc4 = VotingClassifier(estimators=[ ('gaussian', clf1), ('gradient', clf2)], weights = [2,4], voting='soft')

unscaled_model_vc4.fit(X_train,y_train)
pred_vc4 = unscaled_model_vc4.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc4))
print('\nf1 score ::::',f1_score(y_test,pred_vc4, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc4))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.72

f1 score :::: 0.7019555927943025

classification_report ::::
                precision    recall  f1-score   support

           0       0.58      0.22      0.32        32
           1       0.80      0.88      0.84       131
           2       0.21      0.33      0.26        12

    accuracy                           0.72       175
   macro avg       0.53      0.48      0.47       175
weighted avg       0.72      0.72      0.70       175

\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


In [None]:
# 5

clf1 = RandomForestClassifier(n_estimators = 500,random_state = 42)
clf2 =  GradientBoostingClassifier(random_state = 42)

unscaled_model_vc5 = VotingClassifier(estimators=[ ('random_forest', clf1), ('gradient', clf2)], weights = [1,4], voting='soft')

unscaled_model_vc5.fit(X_train,y_train)
pred_vc5 = unscaled_model_vc5.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc5))
print('\nf1 score ::::',f1_score(y_test,pred_vc5, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc5))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.7542857142857143

f1 score :::: 0.7160862155388471

classification_report ::::
                precision    recall  f1-score   support

           0       0.61      0.34      0.44        32
           1       0.79      0.92      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.47      0.42      0.43       175
weighted avg       0.70      0.75      0.72       175

\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


In [None]:
# 6

clf1 = RandomForestClassifier(n_estimators = 500,random_state = 42)
clf2 = GradientBoostingClassifier(random_state = 42)
clf3 = XGBClassifier(random_state = 42)

unscaled_model_vc6 = VotingClassifier(estimators=[ ('random_forest', clf1), ('gradient', clf2), ('xgb', clf3)], weights = [3,50,8], voting='soft')

unscaled_model_vc6.fit(X_train,y_train)
pred_vc6 = unscaled_model_vc6.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc6))
print('\nf1 score ::::',f1_score(y_test,pred_vc6, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc6))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.7485714285714286

f1 score :::: 0.711475125261372

classification_report ::::
                precision    recall  f1-score   support

           0       0.58      0.34      0.43        32
           1       0.78      0.92      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.75       175
   macro avg       0.45      0.42      0.43       175
weighted avg       0.69      0.75      0.71       175

\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


In [None]:
# 7

clf1 = GaussianNB()
clf2 = RandomForestClassifier(n_estimators = 500,random_state = 42)
clf3 = GradientBoostingClassifier(random_state = 42, loss= 'log_loss', learning_rate = 10.0, n_estimators= 100)
unscaled_model_vc7 = VotingClassifier(estimators=[ ('gau', clf1),('rand', clf2), ('grad', clf3)], weights= [3, 17,30],voting='hard')

unscaled_model_vc7.fit(X_train,y_train)
pred_vc7 = unscaled_model_vc7.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_vc7))
print('\nf1 score ::::',f1_score(y_test,pred_vc7, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_vc7))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc7))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_vc))


 Accuracy :::: 0.14857142857142858

f1 score :::: 0.12234023582907964

classification_report ::::
                precision    recall  f1-score   support

           0       0.35      0.28      0.31        32
           1       1.00      0.04      0.07       131
           2       0.08      1.00      0.15        12

    accuracy                           0.15       175
   macro avg       0.48      0.44      0.18       175
weighted avg       0.82      0.15      0.12       175

\confusion_matrix ::::
  [[  9   0  23]
 [ 17   5 109]
 [  0   0  12]]
\confusion_matrix ::::
  [[  0   2  30]
 [  1  12 118]
 [  0   0  12]]


### **Test Best Performing Unscaled Dataset Models**

#### GradientBoostingClassifier

In [None]:
unscaled_model_final = unscaled_model_gbc
unscaled_model_final.fit(X_train,y_train)
pred_final = unscaled_model_final.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_final))
print('\nf1 score ::::',f1_score(y_test,pred_final, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_final))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_final))

test_pred_final = unscaled_model_final.predict(test_features)
print('\n Accuracy ::::', accuracy_score(test_target,test_pred_final))
print('\nf1 score ::::',f1_score(test_target,test_pred_final, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(test_target,test_pred_final))
print('\confusion_matrix ::::\n ',confusion_matrix(test_target,test_pred_final))


 Accuracy :::: 0.7428571428571429

f1 score :::: 0.701978021978022

classification_report ::::
                precision    recall  f1-score   support

           0       0.56      0.28      0.38        32
           1       0.78      0.92      0.85       131
           2       0.00      0.00      0.00        12

    accuracy                           0.74       175
   macro avg       0.45      0.40      0.41       175
weighted avg       0.69      0.74      0.70       175

\confusion_matrix ::::
  [[  9  22   1]
 [  7 121   3]
 [  0  12   0]]

 Accuracy :::: 0.6634615384615384

f1 score :::: 0.7446395496549553

classification_report ::::
                precision    recall  f1-score   support

           0       0.07      0.11      0.09        19
           1       0.92      0.71      0.80       287
           2       0.02      0.17      0.03         6

    accuracy                           0.66       312
   macro avg       0.34      0.33      0.31       312
weighted avg       0.85  

#### XGBClassifier

In [None]:
unscaled_model_finalgnb = unscaled_model_xgbc
unscaled_model_finalgnb.fit(X_train,y_train)
pred_finalgnb = unscaled_model_finalgnb.predict(X_test)
print('\n Accuracy ::::', accuracy_score(y_test,pred_finalgnb))
print('\nf1 score ::::',f1_score(y_test,pred_finalgnb, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(y_test,pred_finalgnb))
print('\confusion_matrix ::::\n ',confusion_matrix(y_test,pred_finalgnb))

test_pred_finalgnb = unscaled_model_finalgnb.predict(test_features)
print('\n Accuracy ::::', accuracy_score(test_target,test_pred_finalgnb))
print('\nf1 score ::::',f1_score(test_target,test_pred_finalgnb, average='weighted'))
print('\nclassification_report ::::\n ',classification_report(test_target,test_pred_finalgnb))
print('\confusion_matrix ::::\n ',confusion_matrix(test_target,test_pred_finalgnb))

Parameters: { "scale_pos_weight" } are not used.




 Accuracy :::: 0.7257142857142858

f1 score :::: 0.6896045038705138

classification_report ::::
                precision    recall  f1-score   support

           0       0.42      0.34      0.38        32
           1       0.78      0.89      0.83       131
           2       0.00      0.00      0.00        12

    accuracy                           0.73       175
   macro avg       0.40      0.41      0.40       175
weighted avg       0.66      0.73      0.69       175

\confusion_matrix ::::
  [[ 11  21   0]
 [ 15 116   0]
 [  0  12   0]]

 Accuracy :::: 0.7532051282051282

f1 score :::: 0.8020052596975673

classification_report ::::
                precision    recall  f1-score   support

           0       0.09      0.26      0.13        19
           1       0.93      0.80      0.86       287
           2       0.00      0.00      0.00         6

    accuracy                           0.75       312
   macro avg       0.34      0.35      0.33       312
weighted avg       0.87 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Save The Model

The GradientBoosting Classifier model, trained on the unscaled dataset, was ultimately selected.

In [None]:
pickle.dump(unscaled_model_final, open('models/business_clients_rfmt_cluster_model.pkl', 'wb'))