In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
import string

# Load dataset (make sure to replace with your actual path)
dataset_path = 'C:\\Users\\sarav\\FakeNewsDetector\\FakeNewsDetector\\.ipynb_checkpoints\\fake_or_real_news.csv.zip'
df = pd.read_csv(dataset_path)

# Preprocess the dataset
stop_words = stopwords.words('english')
df['text'] = df['text'].apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in stop_words and word not in string.punctuation]))

# Split the dataset
X = df['text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

# Transform the text data using TfidfVectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

# Train the PassiveAggressiveClassifier
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

# Predict on the test set
y_pred = pac.predict(tfidf_test)

# Calculate accuracy
score = accuracy_score(y_test, y_pred)
print(f'Accuracy: {round(score*100, 2)}%')

# Confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=['FAKE', 'REAL'])
print(cm)

# Function to predict if a news article is real or fake
def predict_news(news):
    # Preprocess the input news
    news_cleaned = ' '.join([word for word in news.split() if word.lower() not in stop_words and word not in string.punctuation])
    # Transform the news using the trained TfidfVectorizer
    news_tfidf = tfidf_vectorizer.transform([news_cleaned])
    # Predict the label
    prediction = pac.predict(news_tfidf)
    return prediction[0]

# Example usage
news_article = input("Enter a news article to check if it is real or fake: ")
result = predict_news(news_article)
print(f'The news article is: {result}')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sarav\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Accuracy: 92.74%
[[586  52]
 [ 40 589]]
