In [None]:
# Cell 6: Evaluation helpers
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

def evaluate_model(name, model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    print(f"Model: {name}")
    print(f"Accuracy: {acc:.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4,3))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['FAKE', 'REAL'],
                yticklabels=['FAKE', 'REAL'])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(f'Confusion Matrix - {name}')
    plt.show()

    return acc

In [None]:
# Cell 7: Train multiple models on TF-IDF features
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier

results_tfidf = {}

# 1. Logistic Regression
log_reg = LogisticRegression(max_iter=2000)
results_tfidf['LogisticRegression'] = evaluate_model(
    "Logistic Regression (TF-IDF)",
    log_reg,
    X_train_tfidf, X_test_tfidf, y_train, y_test
)

# 2. Multinomial Naive Bayes
nb = MultinomialNB()
results_tfidf['MultinomialNB'] = evaluate_model(
    "Multinomial Naive Bayes (TF-IDF)",
    nb,
    X_train_tfidf, X_test_tfidf, y_train, y_test
)

# 3. Linear SVM
svm_clf = LinearSVC()
results_tfidf['LinearSVC'] = evaluate_model(
    "Linear SVM (TF-IDF)",
    svm_clf,
    X_train_tfidf, X_test_tfidf, y_train, y_test
)

# 4. Random Forest
rf_clf = RandomForestClassifier(n_estimators=200, random_state=42)
results_tfidf['RandomForest'] = evaluate_model(
    "Random Forest (TF-IDF)",
    rf_clf,
    X_train_tfidf, X_test_tfidf, y_train, y_test
)

results_tfidf

In [None]:
# Cell 12: Train models on Word2Vec features
results_w2v = {}

# 1. Logistic Regression
log_reg_w2v = LogisticRegression(max_iter=2000)
results_w2v['LogisticRegression'] = evaluate_model(
    "Logistic Regression (Word2Vec)",
    log_reg_w2v,
    X_train_w2v, X_test_w2v, y_train_w2v, y_test_w2v
)

# 2. Linear SVM
svm_w2v = LinearSVC()
results_w2v['LinearSVC'] = evaluate_model(
    "Linear SVM (Word2Vec)",
    svm_w2v,
    X_train_w2v, X_test_w2v, y_train_w2v, y_test_w2v
)

# 3. Random Forest
rf_w2v = RandomForestClassifier(n_estimators=200, random_state=42)
results_w2v['RandomForest'] = evaluate_model(
    "Random Forest (Word2Vec)",
    rf_w2v,
    X_train_w2v, X_test_w2v, y_train_w2v, y_test_w2v
)

results_w2v

In [None]:
# Cell 13: Compare model performances
print("=== TF-IDF Models ===")
for k, v in results_tfidf.items():
    print(f"{k}: {v:.4f}")

print("\n=== Word2Vec Models ===")
for k, v in results_w2v.items():
    print(f"{k}: {v:.4f}")
