#  Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import GridSearchCV
import joblib  # To save the best model


In [2]:
cd C:\Users\soumy\OneDrive\Documents\Semester 4\AML

C:\Users\soumy\OneDrive\Documents\Semester 4\AML


# Loading and preprocessing.

In [3]:
# Load preprocessed train, validation, and test data
train_df = pd.read_csv("train.csv")
val_df = pd.read_csv("validation.csv")
test_df = pd.read_csv("test.csv")

# Load the TF-IDF vectorizer used in preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words="english")
X_train = vectorizer.fit_transform(train_df['text'])
X_val = vectorizer.transform(val_df['text'])
X_test = vectorizer.transform(test_df['text'])

y_train = train_df['label']
y_val = val_df['label']
y_test = test_df['label']


# Method for model training.

In [4]:
def train_model(model, X_train, y_train):
    """Train a model on the given training data."""
    model.fit(X_train, y_train)
    return model


# Method for model evaluation

In [5]:
def score_model(model, X, y):
    """Evaluate a model on a given dataset and return key metrics."""
    y_pred = model.predict(X)
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1
    }


# Prediction function

In [6]:
def evaluate_predictions(model, X, y):
    """Print the classification report comparing actual vs predicted values."""
    y_pred = model.predict(X)
    print("Classification Report:\n", classification_report(y, y_pred))


# Training on 3 benchmark models.

In [9]:
from sklearn.svm import SVC

# Initialize models
models = {
    "Naïve Bayes": MultinomialNB(),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "SVM": SVC(kernel='linear', probability=True)  # Using a linear kernel for text classification
}

# Train models and evaluate on train & validation
for name, model in models.items():
    print(f"\nTraining {name}...")

    try:
        model = train_model(model, X_train, y_train)

        # Score on train and validation
        train_scores = score_model(model, X_train, y_train)
        val_scores = score_model(model, X_val, y_val)

        print(f"{name} Train Scores:", train_scores)
        print(f"{name} Validation Scores:", val_scores)

        # Evaluate predictions
        print(f"\n{name} Validation Evaluation:")
        evaluate_predictions(model, X_val, y_val)

    except Exception as e:
        print(f"⚠️ Error in training {name}: {e}")



Training Naïve Bayes...
Naïve Bayes Train Scores: {'accuracy': 0.9823076923076923, 'precision': 1.0, 'recall': 0.8693181818181818, 'f1_score': 0.9300911854103344}
Naïve Bayes Validation Scores: {'accuracy': 0.9748803827751196, 'precision': 1.0, 'recall': 0.8037383177570093, 'f1_score': 0.8911917098445596}

Naïve Bayes Validation Evaluation:
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       729
           1       1.00      0.80      0.89       107

    accuracy                           0.97       836
   macro avg       0.99      0.90      0.94       836
weighted avg       0.98      0.97      0.97       836


Training Logistic Regression...
Logistic Regression Train Scores: {'accuracy': 0.9669230769230769, 'precision': 0.9950372208436724, 'recall': 0.759469696969697, 'f1_score': 0.8614393125671321}
Logistic Regression Validation Scores: {'accuracy': 0.9533492822966507, 'precision': 0.9857142857142858, 'recal

# Naïve Bayes Validation

## Accuracy: 0.9749
## Precision: 1.00
## Recall: 0.80
## F1-score: 0.89

# Logistic Regression Validation

## Accuracy: 0.9533
## Precision: 0.99
## Recall: 0.64
## F1-score: 0.78
## SVM Validation

## Accuracy: 0.9833
## Precision: 0.99
## Recall: 0.88
## F1-score: 0.93

# Which model performed best?
## Highest accuracy: SVM (0.9833)
## Highest recall: SVM (0.88)
## Highest F1-score: SVM (0.93)
### Although Naïve Bayes shows perfect precision (1.00), its recall (0.80) is notably lower than SVM’s recall (0.88). Overall, SVM has the best balance across accuracy, precision, recall, and F1-score, making it the top performer among the three.

# Hyperparameter tuning 

In [11]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for SVM
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization strength
    'kernel': ['linear']  # Keep linear for text classification
}

grid_search = GridSearchCV(SVC(probability=True), param_grid, cv=5, scoring="accuracy")
grid_search.fit(X_train, y_train)

print("Best SVM Parameters:", grid_search.best_params_)


Best SVM Parameters: {'C': 10, 'kernel': 'linear'}


# Retraining the best model from scratch on the combined train + validation set.

In [12]:
import numpy as np

# Combine train and validation data
X_combined = np.vstack((X_train.toarray(), X_val.toarray()))
y_combined = np.hstack((y_train.values, y_val.values))

# Train the final SVM model 
final_model = SVC(C=grid_search.best_params_['C'], kernel='linear', probability=True)
final_model.fit(X_combined, y_combined)


# Displaying the final metrics

In [14]:
# Convert X_test to dense since final_model (SVC) was trained on dense input
X_test_dense = X_test.toarray()

# Score on test data
test_scores = score_model(final_model, X_test_dense, y_test)
print("\nFinal Model Test Scores:", test_scores)

# Evaluate predictions
print("\nFinal Model Test Evaluation:")
evaluate_predictions(final_model, X_test_dense, y_test)



Final Model Test Scores: {'accuracy': 0.9832535885167464, 'precision': 0.9711538461538461, 'recall': 0.9017857142857143, 'f1_score': 0.9351851851851851}

Final Model Test Evaluation:
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       724
           1       0.97      0.90      0.94       112

    accuracy                           0.98       836
   macro avg       0.98      0.95      0.96       836
weighted avg       0.98      0.98      0.98       836



# SMS Spam Classification Report 📊

## 🔹 Model Performance Summary
### The final SVM (Support Vector Machine) model achieved high accuracy on the test set:

| Metric      | Score  |
|------------|--------|
| Accuracy  | 98.33% |
| Precision | 97.11% |
| Recall    | 90.18% |
| F1-score  | 93.52% |

## 🔹 Key Observations
- ### The high accuracy (98.33%) indicates the model generalizes well.
- ### Precision (97.11%) suggests a low false-positive rate (few ham messages misclassified as spam).
- ### Recall (90.18%) shows that most spam messages are correctly identified, but a few might be missed.
- ### F1-score (93.52%) balances precision and recall, confirming strong overall performance.



