## Logistic Regression, Random Forest, and SVM

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Load the dataset
df = shuffled_df

# Define the category mapping
category_mapping = {'AC': 0, 'PC': 1, 'TC': 2, 'NC': 3}
df['category_int'] = df['category'].map(category_mapping)

# Split into train-test
X_train, X_test, y_train, y_test = train_test_split(df['response'], df['category_int'], test_size=0.2, random_state=42)

# Convert text to TF-IDF vectors
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2))
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train classifiers
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(probability=True),
    "Random Forest": RandomForestClassifier(n_estimators=100)
}

for name, model in models.items():
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")

# Example prediction
example_text = ["I am nervous about the exams."]
example_tfidf = vectorizer.transform(example_text)
pred_class = models["Logistic Regression"].predict(example_tfidf)
print(f"Predicted Class: {pred_class[0]}")


Logistic Regression Accuracy: 0.7907
SVM Accuracy: 0.7914
Random Forest Accuracy: 0.7914
Predicted Class: 0
