In [2]:
import pandas as pd
import ast
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score


# Load cleaned data
df = pd.read_csv("cleaned_file.csv")
df = df.sample(n=50000, random_state=42)

# Convert stringified lists back to lists
df['content'] = df['content'].apply(ast.literal_eval)

# Filter and map 'type' values into binary classification
accepted_types = ['fake', 'satire',  'bias', 'conspiracy', 'clickbait','reliable', 'political']
df = df[df['type'].notna()]
df = df[df['type'].str.lower().isin(accepted_types)]

def map_type(x):
    x = x.lower()
    if x in ['fake', 'satire' ,'conspiracy', 'bias']:
        return 0
    elif x in ['reliable', 'political', 'clickbait']:
        return 1
    else:
        return None

# Create label column
df['label'] = df['type'].apply(map_type)

# Remove rows with empty tokens
df = df[df['content'].map(len) > 0]

# Split data
X = df['content']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define TF-IDF vectorizer for tokenized input
tfidf = TfidfVectorizer(
    tokenizer=lambda x: x,
    preprocessor=lambda x: x,
    token_pattern=None,
    max_df=0.60,
    min_df=5,
    ngram_range=(1, 3)
)

# === SVM Pipeline ===
svm_pipeline = Pipeline([
    ('tfidf', tfidf),
    ('clf', SVC(kernel='linear', C=1.0, probability=True, class_weight='balanced'))
])

svm_pipeline.fit(X_train, y_train)
y_pred_svm = svm_pipeline.predict(X_test)

print("=== SVM Model Results ===")
print(classification_report(y_test, y_pred_svm))
print("Recall:", recall_score(y_test, y_pred_svm))
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("F1 Score:", f1_score(y_test, y_pred_svm))


  df = pd.read_csv("cleaned_file.csv")


=== SVM Model Results ===
              precision    recall  f1-score   support

           0       0.85      0.85      0.85      3535
           1       0.88      0.88      0.88      4404

    accuracy                           0.87      7939
   macro avg       0.86      0.86      0.86      7939
weighted avg       0.87      0.87      0.87      7939

Recall: 0.8814713896457765
Accuracy: 0.8662300037788134
F1 Score: 0.8796736913664174
