In [None]:
# Import all required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import re
import string
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
#Create a simple dataset directly inside Colab
data = {
    'text': [
        "Breaking! Government announces new education policy for all schools.",
        "Scientists discover cure for cancer! Click here to read more.",
        "Local elections to be held next month as per official sources.",
        "Actor found dead! Shocking truth revealed, visit our site to know!",
        "New highway project to connect rural areas approved by government.",
        "You won a free iPhone! Claim now by visiting our link.",
        "Weather department predicts heavy rainfall this weekend.",
        "This one trick can make you rich overnight â€” must watch!",
        "National sports event to begin from Monday with 20 teams.",
        "Earn $5000 a week from home easily, no experience needed!"
    ],
    'label': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
}

df = pd.DataFrame(data)
df.to_csv('news.csv', index=False)
print("âœ… Sample dataset created and saved as news.csv")
df.head()


âœ… Sample dataset created and saved as news.csv


Unnamed: 0,text,label
0,Breaking! Government announces new education p...,0
1,Scientists discover cure for cancer! Click her...,1
2,Local elections to be held next month as per o...,0
3,"Actor found dead! Shocking truth revealed, vis...",1
4,New highway project to connect rural areas app...,0


In [None]:
#Load the dataset just created
data = pd.read_csv('news.csv')
print("âœ… Dataset loaded successfully!")
print(data.head())


âœ… Dataset loaded successfully!
                                                text  label
0  Breaking! Government announces new education p...      0
1  Scientists discover cure for cancer! Click her...      1
2  Local elections to be held next month as per o...      0
3  Actor found dead! Shocking truth revealed, vis...      1
4  New highway project to connect rural areas app...      0


In [None]:
#Check for missing values
print("Missing values before cleaning:\n", data.isnull().sum())

# Drop rows with null values (if any)
data = data.dropna()

# Initialize stopwords and stemmer
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

# ðŸ”¹ Define preprocessing function
def preprocess(text):
    text = text.lower()                                 # lowercase
    text = re.sub(r'\[.*?\]', '', text)                 # remove text in brackets
    text = re.sub(r'https?://\S+|www\.\S+', '', text)   # remove links
    text = re.sub(r'<.*?>+', '', text)                  # remove HTML tags
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # remove punctuation
    text = re.sub(r'\n', ' ', text)                     # remove newlines
    text = re.sub(r'\w*\d\w*', '', text)                # remove numbers
    tokens = text.split()
    filtered = [stemmer.stem(word) for word in tokens if word not in stop_words]
    return ' '.join(filtered)

# Apply preprocessing to text column
data['text'] = data['text'].apply(preprocess)
print("âœ… Text preprocessing completed!")
data.head()


Missing values before cleaning:
 text     0
label    0
dtype: int64
âœ… Text preprocessing completed!


Unnamed: 0,text,label
0,break govern announc new educ polici school,0
1,scientist discov cure cancer click read,1
2,local elect held next month per offici sourc,0
3,actor found dead shock truth reveal visit site...,1
4,new highway project connect rural area approv ...,0


In [None]:
#Separate features (X) and labels (y)
X = data['text']
y = data['label']   # 1 = fake/spam, 0 = real

#Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Convert text data into numerical form using TF-IDF
vectorizer = TfidfVectorizer(max_df=0.7, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

print(" TF-IDF vectorization done!")


 TF-IDF vectorization done!


In [None]:
# Initialize and train the model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

print("Model training completed!")


Model training completed!


In [None]:
#Predictions on test data
y_pred = model.predict(X_test_tfidf)

#Evaluation Metrics
accuracy = accuracy_score(y_test, y_pred)
print(" Model Evaluation Results:")
print("Accuracy:", accuracy)
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


 Model Evaluation Results:
Accuracy: 0.5

Confusion Matrix:
 [[0 1]
 [0 1]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#Function to check new news text
def detect_fake_news(news):
    news = preprocess(news)
    vector = vectorizer.transform([news])
    prediction = model.predict(vector)
    if prediction == 1:
        print("The given news is FAKE or SPAM!")
    else:
        print("The given news is REAL and TRUSTWORTHY!")

# Test with your own input
sample = input("Enter a news headline or paragraph:\n")
detect_fake_news(sample)


Enter a news headline or paragraph:
Government giving free laptops to everyoneâ€”register now!
The given news is REAL and TRUSTWORTHY!


In [None]:
#Function to check new news text
def detect_fake_news(news):
    news = preprocess(news)
    vector = vectorizer.transform([news])
    prediction = model.predict(vector)
    if prediction == 1:
        print("The given news is FAKE or SPAM!")
    else:
        print("The given news is REAL and TRUSTWORTHY!")

#Test with your own input
sample = input("Enter a news headline or paragraph:\n")
detect_fake_news(sample)

Enter a news headline or paragraph:
You can earn â‚¹10,000 daily from home by clicking this secret link
The given news is FAKE or SPAM!
