# SVM Model Training and Evaluation
This notebook trains and evaluates Support Vector Machine (SVM) models for sentiment classification using both TF-IDF and Bag-of-Words features.

In [None]:
# Import required libraries
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import joblib
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')

# Paths
pca_dir = 'pca_data'
label_dir = 'split_data'

# Load reduced TF-IDF and BoW features
X_tfidf_train = np.load(f"{pca_dir}/X_tfidf_train_reduced.npy")
X_tfidf_test = np.load(f"{pca_dir}/X_tfidf_test_reduced.npy")
X_bow_train = np.load(f"{pca_dir}/X_bow_train_reduced.npy")
X_bow_test = np.load(f"{pca_dir}/X_bow_test_reduced.npy")

# Load labels
y_tfidf_train = np.load(f"{label_dir}/y_tfidf_train.npy")
y_tfidf_test = np.load(f"{label_dir}/y_tfidf_test.npy")
y_bow_train = np.load(f"{label_dir}/y_bow_train.npy")
y_bow_test = np.load(f"{label_dir}/y_bow_test.npy")
# Define helper for training & evaluating
def train_evaluate_model(model, X_train, X_test, y_train, y_test, model_name, feature_type, color='Blues'):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    # Save model
    filename = f"{model_name.lower().replace(' ', '')}{feature_type.lower()}.pkl"
    joblib.dump(model, filename)
    print(f"Saved: {filename}\n")
    acc = accuracy_score(y_test, y_pred)
    print(f"{model_name} using {feature_type}")
    print("Accuracy:", acc)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))


# SVM model with default parameters
svm_model = SVC(random_state=42, probability=True)

# Evaluate on both TF-IDF and BoW
train_evaluate_model(svm_model, X_tfidf_train, X_tfidf_test, y_tfidf_train, y_tfidf_test, "SVM", "TF-IDF", color='Blues')
train_evaluate_model(svm_model, X_bow_train, X_bow_test, y_bow_train, y_bow_test, "SVM", "BoW", color='Greens')


## Results Analysis
The SVM model performance will be compared between TF-IDF and Bag-of-Words features. Based on the accuracy scores and classification reports, we can determine which feature representation works better for our sentiment analysis task with SVM.