<a href="https://colab.research.google.com/github/JMandal02/Fake_News_Detection/blob/main/Fake_News_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load datasets
fake_df = pd.read_csv("/content/Fake.csv")
true_df = pd.read_csv("/content/True.csv")

# Add labels
fake_df["label"] = 0  # Fake
true_df["label"] = 1  # Real

# Combine
df = pd.concat([fake_df, true_df])
df = df[["text", "label"]]
df = df.sample(frac=1).reset_index(drop=True)



# **Data Preprocessing**

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import re

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and word not in string.punctuation]
    return ' '.join(tokens)

df["clean_text"] = df["text"].apply(preprocess_text)



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# **Text Vectorization (TF-IDF)**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df["clean_text"])
y = df["label"]


# **Model Training**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)

# Naive Bayes (Optional)
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)


# **Model Evaluation**

In [None]:
from sklearn.metrics import accuracy_score, classification_report

log_preds = log_model.predict(X_test)
nb_preds = nb_model.predict(X_test)

print("Logistic Regression Accuracy:", accuracy_score(y_test, log_preds))
print("Naive Bayes Accuracy:", accuracy_score(y_test, nb_preds))
print("\nClassification Report:\n", classification_report(y_test, log_preds))



Logistic Regression Accuracy: 0.9838530066815144
Naive Bayes Accuracy: 0.9269487750556793

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98      4713
           1       0.98      0.98      0.98      4267

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



# **Save Trained Model (Pickle)**

In [None]:
import pickle

# Save models
pickle.dump(log_model, open("model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))


# **User Input (CLI Version)**

In [None]:
user_input = input("Enter news text to check if it's Fake or Real:\n")

Enter news text to check if it's Fake or Real:
Actor Aamir Khan placated his daughter Ira when she approached him with her insecurities about not being self-reliant in her 20s. Ira said that she felt useless for not earning for herself even as an adult. Aamir said in an interview that he doesn’t judge people on how much money they have, and told Ira that she needn’t worry about it. He said that she would not be utilising her full potential if she were working a 9-5 job. “I’m here to support you, why are you worried?” he said. Aamir and Ira participated in a joint interview with Pinkvilla, where they spoke about her foundation, Agatsu.  “Main 26-27 saal ki hoon, mere maa baap ne mere upar bahut paise kharch kiye hain, aur main duniya mein bekar insaan hoon, main kuch nahi kar rahi hoon. (My parents have spent a lot of money on me. I am 26–27 years old, and I feel like a useless person in this world. I am not doing anything),” she said in the interview, confessing her fears. Aamir interj

# **User Input (CLI Version)**

In [None]:
user_input = input("Enter news text to check if it's Fake or Real:\n")

# **Preprocess + Vectorize Input**

In [None]:
input_clean = preprocess_text(user_input)
input_vect = vectorizer.transform([input_clean])

# **Load Model & Predict**

In [None]:
loaded_model = pickle.load(open("model.pkl", "rb"))
prediction = loaded_model.predict(input_vect)[0]

# **Result: Fake or Real**

In [None]:
if prediction == 1:
    print(f"✅ This news is REAL.")
else:
    print(f"❌ This news is FAKE.")


❌ This news is FAKE.


In [None]:
from google.colab import files

files.download('model.pkl')
files.download('vectorizer.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>