In [1]:
!pip install requests



In [2]:
import pandas as pd
import requests
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Load real and fake news datasets
fake = pd.read_csv("Fake.csv")
real = pd.read_csv("True.csv")

In [4]:
# Label fake as 0, real as 1
fake["label"] = 0
real["label"] = 1

# Combine and shuffle
data = pd.concat([fake, real])
data = data.sample(frac=1).reset_index(drop=True)

In [5]:
texts = data['text']
labels = data['label']

vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X = vectorizer.fit_transform(texts)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

# Optional: Check model accuracy
y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))

✅ Accuracy: 0.9866369710467706

📊 Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      4690
           1       0.99      0.99      0.99      4290

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [7]:
def predict_news(news_text):
    input_data = vectorizer.transform([news_text])
    prediction = model.predict(input_data)
    return "🟢 Real News" if prediction[0] == 1 else "🔴 Fake News"

In [8]:
# Replace with your NewsAPI key
api_key = 'your_api_key_here'

# Get top headlines from India
url = f'https://newsapi.org/v2/top-headlines?country=in&apiKey=a46056ca938d44ceac5338a6cdaa8ba5'
response = requests.get(url)
news_data = response.json()

# Check if API call worked
if news_data["status"] != "ok":
    print("❌ Failed to fetch news:", news_data)
else:
    print("✅ Live news fetched successfully!\n")

    # Predict on top 5 news headlines
    for i, article in enumerate(news_data['articles'][:5]):
        title = article['title']
        print(f"📰 News {i+1}: {title}")
        print("🔍 Prediction:", predict_news(title))
        print("-" * 50)

✅ Live news fetched successfully!



In [9]:
print(predict_news("ISRO launches Gaganyaan test vehicle successfully."))
print(predict_news("Supreme Court upholds 10% EWS quota in education and jobs."))
print(predict_news("India's GDP expected to grow by 7.2% in FY25, says RBI report."))
print(predict_news("Government announces scheme for free solar panels to farmers."))
print(predict_news("CBSE Class 12 results declared, over 90% students pass."))
print(predict_news("Union Budget 2025 focuses on clean energy and rural development."))
print(predict_news("India to host G20 summit with leaders from 20 major economies."))

🔴 Fake News
🔴 Fake News
🔴 Fake News
🔴 Fake News
🔴 Fake News
🟢 Real News
🔴 Fake News


In [12]:
print(predict_news("R Praggnanandhaa surpasses D Gukesh in FIDE live chess rankings today"))
print(predict_news("PM Modi lays foundation stones for projects worth ₹5,900 Cr in Siwan, Bihar"))
print(predict_news("Air India cancels nine flights due to a bird hit and maintenance checks"))

NotFittedError: The TF-IDF vectorizer is not fitted

In [13]:
print(predict_news("R Praggnanandhaa surpasses D Gukesh in FIDE live chess rankings today"))
print(predict_news("PM Modi lays foundation stones for projects worth ₹5,900 Cr in Siwan, Bihar"))
print(predict_news("Air India cancels nine flights due to a bird hit and maintenance checks"))

NotFittedError: The TF-IDF vectorizer is not fitted

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Use full article texts for training
texts = data['text']
labels = data['label']

# Create and fit the TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7, ngram_range=(1, 2))
X = vectorizer.fit_transform(texts)

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

In [16]:
def predict_news(news_text):
    input_data = vectorizer.transform([news_text])
    prediction = model.predict(input_data)
    return "🟢 Real News" if prediction[0] == 1 else "🔴 Fake News"

In [17]:
print(predict_news("R Praggnanandhaa surpasses D Gukesh in FIDE live chess rankings today"))
print(predict_news("PM Modi lays foundation stones for projects worth ₹5,900 Cr in Siwan, Bihar"))
print(predict_news("Air India cancels nine flights due to a bird hit and maintenance checks"))

🔴 Fake News
🔴 Fake News
🔴 Fake News


In [18]:
print(predict_news("R Praggnanandhaa surpasses D Gukesh in FIDE live chess rankings today"))
print(predict_news("PM Modi lays foundation stones for projects worth ₹5,900 Cr in Siwan, Bihar"))
print(predict_news("Air India cancels nine flights due to a bird hit and maintenance checks"))

🔴 Fake News
🔴 Fake News
🔴 Fake News


In [19]:
# Use titles instead of full articles
texts = pd.concat([fake['title'], real['title']])
labels = pd.concat([fake['label'], real['label']])

In [20]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7, ngram_range=(1, 2))
X = vectorizer.fit_transform(texts)

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

In [21]:
def predict_news(news_text):
    input_data = vectorizer.transform([news_text])
    prediction = model.predict(input_data)
    return "🟢 Real News" if prediction[0] == 1 else "🔴 Fake News"

In [22]:
print(predict_news("R Praggnanandhaa surpasses D Gukesh in FIDE live chess rankings today"))
print(predict_news("PM Modi lays foundation stones for projects worth ₹5,900 Cr in Siwan, Bihar"))
print(predict_news("Air India cancels nine flights due to a bird hit and maintenance checks"))

🔴 Fake News
🟢 Real News
🟢 Real News


In [1]:
pip install streamlit


Note: you may need to restart the kernel to use updated packages.


In [2]:
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Load and prepare data
fake = pd.read_csv("Fake.csv")
real = pd.read_csv("True.csv")

fake["label"] = 0
real["label"] = 1

texts = pd.concat([fake['title'], real['title']])
labels = pd.concat([fake['label'], real['label']])

vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7, ngram_range=(1, 2))
X = vectorizer.fit_transform(texts)

X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

# Predict function
def predict_news(news_text):
    input_data = vectorizer.transform([news_text])
    prediction = model.predict(input_data)
    return "🟢 Real News" if prediction[0] == 1 else "🔴 Fake News"

# Streamlit UI
st.set_page_config(page_title="Fake News Detector", page_icon="📰")

st.title("📰 Fake News Detector")
st.write("Enter a news headline below to check if it's Real or Fake.")

# User input
user_input = st.text_area("📝 Enter News Headline")

if st.button("🔍 Check"):
    if user_input.strip() == "":
        st.warning("Please enter a headline to analyze.")
    else:
        result = predict_news(user_input)
        st.subheader("Result:")
        st.success(result)

2025-06-21 16:09:50.894 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-06-21 16:09:50.894 Session state does not function when running a script without `streamlit run`


In [3]:
streamlit run app.py

SyntaxError: invalid syntax (3737097518.py, line 1)