In [1]:
import pandas as pd

# Load data from the 'data' folder
fake = pd.read_csv("data/Fake.csv")
true = pd.read_csv("data/True.csv")

# Add labels
fake['label'] = 'fake'
true['label'] = 'true'

# Combine the datasets
data = pd.concat([fake, true], axis=0)
data = data[['text', 'label']]  # Keep only useful columns

# Show the first 5 rows
data.head()

Unnamed: 0,text,label
0,Donald Trump just couldn t wish all Americans ...,fake
1,House Intelligence Committee Chairman Devin Nu...,fake
2,"On Friday, it was revealed that former Milwauk...",fake
3,"On Christmas day, Donald Trump announced that ...",fake
4,Pope Francis used his annual Christmas Day mes...,fake


In [2]:
import re
import string

# Function to clean the text
def clean_text(text):
    text = text.lower()  # Lowercase
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # Remove punctuation
    text = re.sub(r'\n', ' ', text)  # Remove new lines
    text = re.sub(r'\w*\d\w*', '', text)  # Remove words with numbers
    return text

# Apply to dataset
data['text'] = data['text'].apply(clean_text)

# Show cleaned sample
data.head()

Unnamed: 0,text,label
0,donald trump just couldn t wish all americans ...,fake
1,house intelligence committee chairman devin nu...,fake
2,on friday it was revealed that former milwauke...,fake
3,on christmas day donald trump announced that h...,fake
4,pope francis used his annual christmas day mes...,fake


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

# Vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Features (X) and Labels (y)
X = vectorizer.fit_transform(data['text'])
y = data['label']

# Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Model Initialization
model = LogisticRegression()

# Training the model
model.fit(X_train, y_train)

# Predicting on test data
y_pred = model.predict(X_test)

# Evaluating the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test,y_pred))

Accuracy: 0.9837416481069042

Classification Report:
               precision    recall  f1-score   support

        fake       0.98      0.99      0.98      4733
        true       0.98      0.98      0.98      4247

    accuracy                           0.98      8980
   macro avg       0.98      0.98      0.98      8980
weighted avg       0.98      0.98      0.98      8980



In [5]:
def predict_news(news):
    # Clean the input
    cleaned = clean_text(news)
    # Transform the input using the same vectorizer
    vectorized = vectorizer.transform([cleaned])
    # Predict
    prediction = model.predict(vectorized)
    return prediction[0]

# Example usage
sample_news_1 = "India has successfully launched its latest satellite for communication into orbit."
sample_news_2 = "Donald Trump caught building a rocket to escape the FBI investigation."

print("Sample 1 Prediction:", predict_news(sample_news_1))
print("Sample 2 Prediction:", predict_news(sample_news_2))

Sample 1 Prediction: fake
Sample 2 Prediction: fake


In [6]:
import pandas as pd

# Load datasets
df_fake = pd.read_csv("data/Fake.csv")
df_true = pd.read_csv("data/True.csv")

# Add labels
df_fake['label'] = 'fake'
df_true['label'] = 'true'

# Combine datasets
df = pd.concat([df_fake[['title', 'text', 'label']], df_true[['title', 'text', 'label']]], axis=0).reset_index(drop=True)
df.head()

Unnamed: 0,title,text,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,fake
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,fake
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",fake
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",fake
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,fake


In [7]:
df['text'] = df['text'].apply(clean_text)
df['title'] = df['title'].apply(clean_text)

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Combine title and text
df['combined'] = df['title'] + " " + df['text']

# Vectorize
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['combined'])
y = df['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict and check accuracy
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test,y_pred))

Accuracy: 0.987750556792873

Classification Report:
               precision    recall  f1-score   support

        fake       0.99      0.99      0.99      4733
        true       0.99      0.99      0.99      4247

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [9]:
def predict_news(news):
    cleaned = clean_text(news)
    vectorized = vectorizer.transform([cleaned])
    prediction = model.predict(vectorized)
    return prediction[0]

print("Sample 1 Prediction:", predict_news(sample_news_1))
print("Sample 2 Prediction:", predict_news(sample_news_2))

Sample 1 Prediction: fake
Sample 2 Prediction: fake


In [20]:
import tkinter as tkinter
from tkinter import messagebox
import pickle
#load model and vectorizer
model=pickle.load(open("model.pkl","rb"))
vectorizer=pickle.load(open("vectorizer.pkl","rb"))
#clean text function
def clean_text(text):
    import string
    import re
    from nlkt.corpus import stopwords
    text=text.lower()
    text=re.sub(r"http\S+|www\S+|https\S+",'',text)
    text = re.sub(r"http\S+|www\S+|https\S+", '', text)
    text = re.sub(r'\@w+|\#','', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = ''.join([word for word in text if word not in string.punctuation])
    text = ' '.join([word for word in text.split() if word not in stopwords.words('english')])
    return text

# Prediction function
def predict_news():
    user_input = entry.get("1.0", tk.END).strip()
    if not user_input:
        messagebox.showwarning("Input Error", "Please enter some news text!")
        return
    cleaned = clean_text(user_input)
    vectorized = vectorizer.transform([cleaned])
    prediction = model.predict(vectorized)[0]
    result_label.config(text=f"Prediction: {prediction.upper()}", fg="green" if prediction=="true" else "red")

# GUI Window
root = tk.Tk()
root.title("Fake News Detector")
root.geometry("500x400")
root.config(bg="white")

tk.Label(root, text="Enter the News Article:", font=("Arial", 14), bg="white").pack(pady=10)
entry = tk.Text(root, height=10, width=55, wrap=tk.WORD, font=("Arial", 12))
entry.pack(padx=10)

tk.Button(root, text="Check", command=predict_news, font=("Arial", 12), bg="blue", fg="white").pack(pady=10)

result_label = tk.Label(root, text="Prediction: ", font=("Arial", 14), bg="white")
result_label.pack(pady=10)

root.mainloop()

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.13_3.13.1520.0_x64__qbz5n2kfra8p0\Lib\tkinter\__init__.py", line 2068, in __call__
    return self.func(*args)
           ~~~~~~~~~^^^^^^^
  File "C:\Users\ASUS\AppData\Local\Temp\ipykernel_16124\1097108303.py", line 27, in predict_news
    cleaned = clean_text(user_input)
  File "C:\Users\ASUS\AppData\Local\Temp\ipykernel_16124\1097108303.py", line 11, in clean_text
    from nlkt.corpus import stopwords
ModuleNotFoundError: No module named 'nlkt'
Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.13_3.13.1520.0_x64__qbz5n2kfra8p0\Lib\tkinter\__init__.py", line 2068, in __call__
    return self.func(*args)
           ~~~~~~~~~^^^^^^^
  File "C:\Users\ASUS\AppData\Local\Temp\ipykernel_16124\1097108303.py", line 27, in predict_news
    cleaned = clean_text(user_input)
  

In [18]:
import tkinter as tk
from tkinter import filedialog

In [16]:
import pickle

# Save the model
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

# Save the vectorizer
with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer,f)