**Objective:** Build multiple classifier combinations—hard voting, soft voting, stacking—and compare them with AdaBoost (decision stumps).

**Problem Statement:**

A messaging platform wants to classify messages as Spam or Ham. You must implement and compare:

  1. individual base models,
  2. combined classifiers (voting + stacking), and
  3. a boosting model using decision stumps (AdaBoost with max_depth=1 trees).

### **GITHUB LINK:** https://github.com/PRASHIRAWAL/ML-ISA-Assignments/tree/main/Assignment%206

In [1]:
from google.colab import files
uploaded = files.upload()

Saving SMSSpamCollection to SMSSpamCollection


In [2]:
import pandas as pd

# Load raw file
df = pd.read_csv("SMSSpamCollection", sep='\t', header=None)

# Rename columns
df.columns = ["label", "message"]

df.head()

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [3]:
df["label"] = df["label"].map({"ham":0, "spam":1})

TF-IDF Preprocessing

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1,2),
    stop_words='english'
)

X = vectorizer.fit_transform(df["message"])
y = df["label"]

Stratified K-Fold

In [5]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

Base Models

In [6]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier, StackingClassifier, AdaBoostClassifier

nb = MultinomialNB()
lr = LogisticRegression(max_iter=1000)
svm = SVC(kernel='linear', probability=True)

Voting Classifiers

In [7]:
hard_voting = VotingClassifier(
    estimators=[('nb', nb), ('lr', lr), ('svm', svm)],
    voting='hard'
)

soft_voting = VotingClassifier(
    estimators=[('nb', nb), ('lr', lr), ('svm', svm)],
    voting='soft'
)

Stacking Classifier

In [8]:
stacking = StackingClassifier(
    estimators=[('nb', nb), ('lr', lr), ('svm', svm)],
    final_estimator=LogisticRegression()
)

AdaBoost with Decision Stumps

In [10]:
stump = DecisionTreeClassifier(max_depth=1)

adaboost = AdaBoostClassifier(
    estimator=stump,
    n_estimators=100,
    learning_rate=1.0
)

Evaluation

In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

def evaluate_model(model, X, y):
    precision, recall, f1, roc = [], [], [], []

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:,1]

        precision.append(precision_score(y_test, y_pred))
        recall.append(recall_score(y_test, y_pred))
        f1.append(f1_score(y_test, y_pred))
        roc.append(roc_auc_score(y_test, y_prob))

    return {
        "Precision": np.mean(precision),
        "Recall": np.mean(recall),
        "F1": np.mean(f1),
        "ROC-AUC": np.mean(roc)
    }

Running all the models


In [13]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

def evaluate_model(model, X, y):
    precision, recall, f1, roc = [], [], [], []

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        if hasattr(model, "predict_proba"):
            y_prob = model.predict_proba(X_test)[:,1]
            roc.append(roc_auc_score(y_test, y_prob))
        else:
            roc.append(np.nan)
        precision.append(precision_score(y_test, y_pred))
        recall.append(recall_score(y_test, y_pred))
        f1.append(f1_score(y_test, y_pred))

    return {
        "Precision": np.mean(precision),
        "Recall": np.mean(recall),
        "F1": np.mean(f1),
        "ROC-AUC": np.nanmean(roc) if any(~np.isnan(roc)) else np.nan
    }

models = {
    "Naive Bayes": nb,
    "Logistic Regression": lr,
    "Linear SVM": svm,
    "Hard Voting": hard_voting,
    "Soft Voting": soft_voting,
    "Stacking": stacking,
    "AdaBoost (Stumps)": adaboost
}

results = {}

for name, model in models.items():
    print(f"Evaluating {name}...")
    results[name] = evaluate_model(model, X, y)

results_df = pd.DataFrame(results).T
results_df

Evaluating Naive Bayes...
Evaluating Logistic Regression...
Evaluating Linear SVM...
Evaluating Hard Voting...
Evaluating Soft Voting...
Evaluating Stacking...
Evaluating AdaBoost (Stumps)...


Unnamed: 0,Precision,Recall,F1,ROC-AUC
Naive Bayes,0.998425,0.832635,0.907937,0.989672
Logistic Regression,0.988868,0.706774,0.824127,0.991116
Linear SVM,0.982473,0.896904,0.937665,0.992245
Hard Voting,0.993701,0.848707,0.915462,
Soft Voting,0.986467,0.884832,0.93278,0.992617
Stacking,0.973391,0.92502,0.94846,0.992356
AdaBoost (Stumps),0.954503,0.421754,0.584726,0.926743


In [14]:
results_df.to_csv("ensemble_comparison.csv")

In [15]:
files.download("ensemble_comparison.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Final Model Predictions

In [16]:
best_model = stacking
best_model.fit(X, y)

predictions = best_model.predict(X)
probabilities = best_model.predict_proba(X)[:,1]

final_df = pd.DataFrame({
    "MessageId": range(len(df)),
    "Actual": y,
    "Predicted": predictions,
    "Probability": probabilities
})

final_df.to_csv("final_model_predictions.csv", index=False)
files.download("final_model_predictions.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>