In [1]:
import pandas as pd

# Load data
true_df = pd.read_csv("True.csv")
fake_df = pd.read_csv("Fake.csv")

# Add labels
true_df['label'] = 1  # real
fake_df['label'] = 0  # fake

# Combine datasets
data = pd.concat([true_df, fake_df], axis=0).reset_index(drop=True)

# Drop unnecessary columns
data = data.drop(columns=['date'])  # if date exists

# Shuffle data
data = data.sample(frac=1).reset_index(drop=True)


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

X = data['text']     # Input text
y = data['label']    # Labels

# Convert text to numerical data
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_vec = vectorizer.fit_transform(X)


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

# Train a model
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.9942093541202672
Confusion Matrix:
 [[4664   35]
 [  17 4264]]


In [5]:
import pickle

# Save model
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save vectorizer
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)
