In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('news.csv')  # Make sure you have the dataset
print("Dataset shape:", df.shape)
print("Columns:", df.columns)

# Display first few rows
print(df.head())

# Split the data
X = df['text']  # news content
y = df['label']  # FAKE or REAL

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf.fit_transform(X_train)
tfidf_test = tfidf.transform(X_test)

# Train classifier
model = PassiveAggressiveClassifier(max_iter=50)
model.fit(tfidf_train, y_train)

# Predict on test set
y_pred = model.predict(tfidf_test)

# Accuracy and Confusion Matrix
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc*100:.2f}%")

cm = confusion_matrix(y_test, y_pred, labels=['FAKE', 'REAL'])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Fake', 'Real'], yticklabels=['Fake', 'Real'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
