In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from my_model import PassiveAggressiveClassifier



df = pd.read_csv("cleaned_news.csv")
df.dropna(subset=['content', 'label'], inplace=True)


X_raw = df['content'].values
y_original = df['label'].values 

y = 1 - y_original 

vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_vectorized = vectorizer.fit_transform(X_raw).toarray()

X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

model = PassiveAggressiveClassifier(max_iter=100, C=0.1, mode="PA-I")
model.fit(X_train, y_train)

with open("pa_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("Model and vectorizer saved successfully.")
print("Label mapping: REAL=0, FAKE=1")
print("Train Accuracy:", model.score(X_train, y_train))
print("Test Accuracy:", model.score(X_test, y_test))


Model and vectorizer saved successfully.
Label mapping: REAL=0, FAKE=1
Train Accuracy: 1.0
Test Accuracy: 0.9172633253778838
