In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the datasets
fake_df = pd.read_csv("Fake.csv")
true_df = pd.read_csv("True.csv")

# Add labels: 0 = fake, 1 = real
fake_df["label"] = 0
true_df["label"] = 1

# Combine both datasets
data = pd.concat([fake_df, true_df], axis=0)
data = data.sample(frac=1).reset_index(drop=True)  # Shuffle the data

# Use only the text and label columns
data = data[['text', 'label']]

# Drop missing values if any
data.dropna(inplace=True)

# Split the data into training and testing sets
X = data['text']
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Make predictions
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
print(" Accuracy:", accuracy_score(y_test, y_pred))
print("\n Classification Report:\n", classification_report(y_test, y_pred))
print("\n Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Function to predict new input
def predict_news(news_text):
    vect = vectorizer.transform([news_text])
    prediction = model.predict(vect)[0]
    return " Real News" if prediction == 1 else "Fake News"

# Example usage
sample = "NASA is planning a new moon mission in 2025."
print("\nSample Text:", sample)
print("Prediction:", predict_news(sample))


 Accuracy: 0.9850779510022272

 Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.99      4660
           1       0.98      0.99      0.98      4320

    accuracy                           0.99      8980
   macro avg       0.98      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980


 Confusion Matrix:
 [[4583   77]
 [  57 4263]]

Sample Text: NASA is planning a new moon mission in 2025.
Prediction: Fake News
