# SVM Model

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix
import joblib

## 1. Load CSV data

In [31]:
svm_df = pd.read_csv("../data/processed/balanced_fake_news_dataset.csv")

In [32]:
svm_df.head()

Unnamed: 0,text,label,original_label,dataset,clean_text,tokens,processed_text,sentiment
0,Says comprehensive immigration reform will add...,0,half-true,LIAR,says comprehensive immigration reform will add...,"['say', 'comprehensive', 'immigration', 'refor...",say comprehensive immigration reform add billi...,0.25
1,Ellen DeGeneres makes joke about Jennifer Anis...,0,0,FakeNewsNet_Minimal,ellen degeneres makes joke about jennifer anis...,"['ellen', 'degeneres', 'make', 'joke', 'jennif...",ellen degeneres make joke jennifer aniston mar...,0.296
2,"When we lower tax rates, we generate more in r...",0,half-true,LIAR,when we lower tax rates we generate more in re...,"['lower', 'tax', 'rate', 'generate', 'revenue'...",lower tax rate generate revenue happened reaga...,-0.296
3,Karma it s a beautiful thing A massive makeshi...,0,0,ISOT,karma it s a beautiful thing a massive makeshi...,"['karma', 'beautiful', 'thing', 'massive', 'ma...",karma beautiful thing massive makeshift refuge...,0.5719
4,Ellen DeGeneres' wife Portia de Rossi makes he...,0,0,FakeNewsNet_Minimal,ellen degeneres wife portia de rossi makes her...,"['ellen', 'degeneres', 'wife', 'portia', 'de',...",ellen degeneres wife portia de rossi make cry ...,-0.0516


## 2. Select text and label columns

In [33]:
X_text = svm_df['processed_text']  # or 'clean_text'
y = svm_df['label']

## 3. Vectorize the text using TF-IDF

In [34]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(X_text)


## 4. Split the data


In [35]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## 5. Train the SVM

In [36]:
model = SVC(kernel='linear', C=1.0)
model.fit(X_train, y_train)

## 6. Evaluate

In [37]:

y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.86      0.85      6091
           1       0.86      0.85      0.86      6315

    accuracy                           0.85     12406
   macro avg       0.85      0.85      0.85     12406
weighted avg       0.85      0.85      0.85     12406



## 7. Save the model and vectorizer

In [38]:
joblib.dump(model, '../models/svm_text_model.pkl')
joblib.dump(vectorizer, '../models/tfidf_vectorizer.pkl')

['../models/tfidf_vectorizer.pkl']

In [39]:
# Load vectorizer and model
vectorizer = joblib.load('tfidf_vectorizer.pkl')
model = joblib.load('svm_text_model.pkl')
#Example prediction
new_texts = ["Breaking news: Elvis spotted on Mars, aliens confirm his hit single!"]
X_new = vectorizer.transform(new_texts)
prediction = model.predict(X_new)

In [40]:
print(prediction)

[0]
