In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load the data  # Replace with your file path
news_data = pd.read_csv("C:\\Users\\gudis\\Downloads\\fake_or_real_news.csv")

# Step 2: Data Preprocessing
# Keep only the necessary columns
news_data = news_data[['text', 'label']]

# Encode the labels ("REAL" -> 1, "FAKE" -> 0)
news_data['label'] = news_data['label'].map({'REAL': 1, 'FAKE': 0})

# Step 3: Split the data into training and testing sets
X = news_data['text']  # Features (news text)
y = news_data['label']  # Target variable (label)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 4: Convert text to numerical data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Step 5: Build and train the Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

# Step 6: Make predictions and evaluate the model
y_pred = nb_model.predict(X_test_tfidf)

# Calculate and print accuracy and classification report
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n")
print(classification_rep)

# Step 7: Add an input feature to predict whether news is REAL or FAKE
def predict_news():
    print("\nEnter a news article to check if it's REAL or FAKE:")
    user_input = input("News: ")
    if user_input.strip() == "":
        print("Please provide a valid input.")
        return

    # Transform the input using the trained TF-IDF vectorizer
    user_input_tfidf = tfidf_vectorizer.transform([user_input])
    
    # Make a prediction
    prediction = nb_model.predict(user_input_tfidf)[0]
    
    # Output the result
    result = "REAL" if prediction == 1 else "FAKE"
    print(f"\nThe news article is predicted to be: {result}")

# Call the function to allow user input
predict_news()


Accuracy: 84.14%
Classification Report:

              precision    recall  f1-score   support

           0       0.98      0.70      0.81       633
           1       0.77      0.98      0.86       634

    accuracy                           0.84      1267
   macro avg       0.87      0.84      0.84      1267
weighted avg       0.87      0.84      0.84      1267


Enter a news article to check if it's REAL or FAKE:


News:    virat kohli is new prime minister



The news article is predicted to be: REAL
