# Weighting voting, stacking meta learning, majority voting

In [5]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.ensemble import BaggingClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


train_path = "train_data.csv"
test_path = "test_data.csv"

df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)

X_train, y_train = df_train.iloc[:, 1:], df_train['class']
X_test, y_test = df_test.iloc[:, 1:], df_test['class']

# Boosting Model (XGBoost)
xgb_model = XGBClassifier(n_estimators=300, learning_rate=0.1, max_depth=5, eval_metric="mlogloss", use_label_encoder=False, random_state=42)
xgb_model.fit(X_train, y_train)

# Bagging Model (Random Forest)
rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_split=5, random_state=42)
rf_model.fit(X_train, y_train)

# Neural Network (MLP)
mlp_model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
mlp_model.fit(X_train, y_train)

# Majority Voting Classifier
voting_majority = VotingClassifier(
    estimators=[("xgb", xgb_model), ("rf", rf_model), ("mlp", mlp_model)],
    voting="hard"
)
voting_majority.fit(X_train, y_train)

# Weighted Voting Classifier
voting_weighted = VotingClassifier(
    estimators=[("xgb", xgb_model), ("rf", rf_model), ("mlp", mlp_model)],
    voting="soft",
    weights=[1.5, 1.2, 1.0]  # XGBoost given more weight
)
voting_weighted.fit(X_train, y_train)

# Stacking Meta-Learning Classifier
stacking_model = StackingClassifier(
    estimators=[("xgb", xgb_model), ("rf", rf_model), ("mlp", mlp_model)],
    final_estimator=LogisticRegression(max_iter=500, random_state=42)
)
stacking_model.fit(X_train, y_train)

# Evaluation**
models = {
    "Majority Voting": voting_majority,
    "Weighted Voting": voting_weighted,
    "Stacking (Meta-Learning)": stacking_model
}

# Store results
fusion_results = {}

# Evaluate each fusion method
for name, model in models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    fusion_results[name] = acc

    print(f"\n{name} Accuracy: {acc:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Identify the best fusion approach
best_fusion = max(fusion_results, key=fusion_results.get)
best_accuracy = fusion_results[best_fusion]

print(f"\n🔹 Best High-Level Fusion Approach: {best_fusion} with Accuracy: {best_accuracy:.4f}")


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.




Majority Voting Accuracy: 0.8356

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.80      0.86        15
           1       0.83      0.67      0.74        15
           2       0.83      1.00      0.91        10
           3       0.80      0.80      0.80         5
           4       0.93      1.00      0.96        13
           5       0.83      0.83      0.83        12
           6       0.40      0.67      0.50         3

    accuracy                           0.84        73
   macro avg       0.79      0.82      0.80        73
weighted avg       0.85      0.84      0.84        73


Confusion Matrix:
 [[12  1  1  0  1  0  0]
 [ 1 10  0  0  0  2  2]
 [ 0  0 10  0  0  0  0]
 [ 0  0  0  4  0  0  1]
 [ 0  0  0  0 13  0  0]
 [ 0  0  1  1  0 10  0]
 [ 0  1  0  0  0  0  2]]

Weighted Voting Accuracy: 0.8219

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.80      0.

# Boosting

In [6]:
boosting_models = {
    "XGBoost": XGBClassifier(eval_metric="mlogloss", use_label_encoder=False, random_state=42),
    "LightGBM": LGBMClassifier(random_state=42),
    "CatBoost": CatBoostClassifier(verbose=0, random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "GradientBoosting": GradientBoostingClassifier(random_state=42),
}

boosting_results = {}

for name, model in boosting_models.items():
    print(f"🚀 Training {name}...")
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    boosting_results[name] = {"model": model, "accuracy": acc}

# Compare Boosting Models**
print("\n🔹 Boosting Model Performance:")
for name, result in boosting_results.items():
    print(f"{name} Accuracy: {result['accuracy']:.4f}")

# Identify the best boosting model
best_boosting = max(boosting_results, key=lambda k: boosting_results[k]["accuracy"])
best_accuracy = boosting_results[best_boosting]["accuracy"]

print(f"\n✅ Best Boosting Approach: {best_boosting} with Accuracy: {best_accuracy:.4f}")

# Evaluate the Best Boosting Model**
best_model = boosting_results[best_boosting]["model"]
y_pred_best = best_model.predict(X_test)

print("\n🔹 Final Best Model Evaluation:")
print("\nClassification Report:\n", classification_report(y_test, y_pred_best))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_best))


🚀 Training XGBoost...


Parameters: { "use_label_encoder" } are not used.



🚀 Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003886 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 102897
[LightGBM] [Info] Number of data points in the train set: 441, number of used features: 997
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
🚀 Training CatBoost...
🚀 Training AdaBoost...




🚀 Training GradientBoosting...

🔹 Boosting Model Performance:
XGBoost Accuracy: 0.8219
LightGBM Accuracy: 0.8356
CatBoost Accuracy: 0.8630
AdaBoost Accuracy: 0.3151
GradientBoosting Accuracy: 0.8219

✅ Best Boosting Approach: CatBoost with Accuracy: 0.8630

🔹 Final Best Model Evaluation:

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.87      0.93        15
           1       0.85      0.73      0.79        15
           2       0.77      1.00      0.87        10
           3       1.00      0.80      0.89         5
           4       1.00      1.00      1.00        13
           5       0.83      0.83      0.83        12
           6       0.40      0.67      0.50         3

    accuracy                           0.86        73
   macro avg       0.84      0.84      0.83        73
weighted avg       0.88      0.86      0.87        73


Confusion Matrix:
 [[13  1  1  0  0  0  0]
 [ 0 11  0  0  0  2  2]
 [ 0  0 10  0  0  0  

# Bagging

In [7]:
# Define Bagging Models with Default Parameters**
bagging_models = {
    "RandomForest": RandomForestClassifier(random_state=42),
    "Bagging (SVM)": BaggingClassifier(estimator=SVC(probability=True, kernel="rbf", random_state=42), random_state=42),
    "Bagging (MLP)": BaggingClassifier(estimator=MLPClassifier(max_iter=500, random_state=42), random_state=42),
    "Bagging (Logistic)": BaggingClassifier(estimator=LogisticRegression(max_iter=500, random_state=42), random_state=42),
    "Bagging (KNN)": BaggingClassifier(estimator=KNeighborsClassifier(), random_state=42),
    "Bagging (DecisionTree)": BaggingClassifier(estimator=DecisionTreeClassifier(random_state=42), random_state=42)
}

# Train Each Bagging Model and Evaluate**
bagging_results = {}

for name, model in bagging_models.items():
    print(f"🚀 Training {name}...")
    model.fit(X_train, y_train)  
    
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    bagging_results[name] = {"model": model, "accuracy": acc}

# Compare Bagging Models**
print("\n🔹 Bagging Model Performance:")
for name, result in bagging_results.items():
    print(f"{name} Accuracy: {result['accuracy']:.4f}")

# Identify the best bagging model
best_bagging = max(bagging_results, key=lambda k: bagging_results[k]["accuracy"])
best_accuracy = bagging_results[best_bagging]["accuracy"]

print(f"\n✅ Best Bagging Approach: {best_bagging} with Accuracy: {best_accuracy:.4f}")

# Evaluate the Best Bagging Model**
best_model = bagging_results[best_bagging]["model"]
y_pred_best = best_model.predict(X_test)

print("\n🔹 Final Best Model Evaluation:")
print("\nClassification Report:\n", classification_report(y_test, y_pred_best))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_best))


🚀 Training RandomForest...
🚀 Training Bagging (SVM)...
🚀 Training Bagging (MLP)...
🚀 Training Bagging (Logistic)...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🚀 Training Bagging (KNN)...
🚀 Training Bagging (DecisionTree)...

🔹 Bagging Model Performance:
RandomForest Accuracy: 0.7945
Bagging (SVM) Accuracy: 0.3562
Bagging (MLP) Accuracy: 0.2192
Bagging (Logistic) Accuracy: 0.5068
Bagging (KNN) Accuracy: 0.5205
Bagging (DecisionTree) Accuracy: 0.8493

✅ Best Bagging Approach: Bagging (DecisionTree) with Accuracy: 0.8493

🔹 Final Best Model Evaluation:

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.87      0.90        15
           1       0.91      0.67      0.77        15
           2       0.90      0.90      0.90        10
           3       0.62      1.00      0.77         5
           4       1.00      1.00      1.00        13
           5       0.85      0.92      0.88        12
           6       0.25      0.33      0.29         3

    accuracy                           0.85        73
   macro avg       0.78      0.81      0.79        73
weighted avg       0.87      0.85   