<a href="https://colab.research.google.com/github/Qudsia-jabeen20/News-Detection/blob/main/News.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openai pandas scikit-learn xgboost

import openai
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score


In [None]:
import csv

fake_data = []
problematic_rows = []

with open("Fake.csv", 'r', encoding='utf-8', errors='replace') as infile:
    reader = csv.reader(infile)
    header = next(reader)  # Read the header row
    fake_data.append(header) # Add header to the data list
    for i, row in enumerate(reader):
        try:
            # Attempt to process the row - you might need to adjust based on the CSV structure
            # For now, just adding the row if it can be read
            fake_data.append(row)
        except Exception as e:
            print(f"Error processing row {i+2}: {e}") # +2 for 0-based index and header
            problematic_rows.append(i+2) # Store problematic row numbers

# Convert the processed data to a DataFrame
fake_df_processed = pd.DataFrame(fake_data[1:], columns=fake_data[0])

print(f"Successfully read {len(fake_df_processed)} rows from Fake.csv")
if problematic_rows:
    print(f"Skipped or had issues with rows: {problematic_rows}")

display(fake_df_processed.head())

In [None]:
import csv

real_data = []
problematic_rows_real = []

with open("True.csv", 'r', encoding='utf-8', errors='replace') as infile:
    reader = csv.reader(infile)
    header_real = next(reader)  # Read the header row
    real_data.append(header_real) # Add header to the data list
    for i, row in enumerate(reader):
        try:
            # Attempt to process the row
            real_data.append(row)
        except Exception as e:
            print(f"Error processing row {i+2} in True.csv: {e}") # +2 for 0-based index and header
            problematic_rows_real.append(i+2) # Store problematic row numbers

# Convert the processed data to a DataFrame
real_df_processed = pd.DataFrame(real_data[1:], columns=real_data[0])

print(f"Successfully read {len(real_df_processed)} rows from True.csv")
if problematic_rows_real:
    print(f"Skipped or had issues with rows in True.csv: {problematic_rows_real}")

display(real_df_processed.head())

In [None]:
# Add labels
fake_df_processed["label"] = 0  # Fake
real_df_processed["label"] = 1  # Real

# Combine and shuffle
df = pd.concat([fake_df_processed, real_df_processed], axis=0).sample(frac=1, random_state=42).reset_index(drop=True)

# Keep only useful columns
# Assuming 'text' and 'title' are the relevant columns for text data
# And 'label' is the newly added label column
if 'text' in df.columns and 'title' in df.columns:
    df = df[["text", "title", "label"]]
elif 'text' in df.columns:
    df = df[["text", "label"]]
elif 'title' in df.columns:
    df = df[["title", "label"]]
else:
    print("Warning: Neither 'text' nor 'title' columns found. Keeping all columns and label.")
    df = df.copy()

print(df.head())
print(df['label'].value_counts())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42
)

# TF-IDF vectorizer
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train XGBoost
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train_vec, y_train)

# Evaluate
preds = model.predict(X_test_vec)
print("Local Model Accuracy:", accuracy_score(y_test, preds))

In [None]:
from google.colab import userdata
import openai

openai.api_key = userdata.get("your API key")

# Initialize the OpenAI client with your API key
client = openai.OpenAI(api_key=openai.api_key)

def classify_with_openai(news_text):
    prompt = f"""You are a fake news detection expert.
Decide if this news is Real or Fake.

News: "{news_text}"

Answer only with "Fake" or "Real"."""

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
    )
    return response.choices[0].message.content.strip()

In [None]:
def hybrid_predict(news_text):
    vec = vectorizer.transform([news_text])
    local_pred = model.predict(vec)[0]

    if local_pred == 1:
        local_label = "Real"
    else:
        local_label = "Fake"

    print("Local Model Prediction:", local_label)

    # Optional: verify with OpenAI
    gpt_label = classify_with_openai(news_text)
    print("OpenAI Prediction:", gpt_label)

    if local_label != gpt_label:
        print("⚠️ Mismatch detected. Needs human review.")

    return local_label, gpt_label


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Make predictions
preds = model.predict(X_test_vec)

print("Classification Report:")
print(classification_report(y_test, preds))

print("Confusion Matrix:")
print(confusion_matrix(y_test, preds))

In [None]:
news = "Imran Khan is the President of America"
hybrid_predict(news)


In [None]:
!pip install gradio


In [None]:
import gradio as gr

def hybrid_predict_interface(news_text):
    # Local model prediction
    vec = vectorizer.transform([news_text])
    local_pred = model.predict(vec)[0]
    local_label = "Real" if local_pred == 1 else "Fake"

    # OpenAI prediction
    gpt_label = classify_with_openai(news_text)

    # Compare results
    warning = ""
    if local_label != gpt_label:
        warning = "⚠️ Mismatch detected. Needs human review."

    # Output message
    result = f"""📄 **News:** {news_text}

🧠 **Local Model Prediction:** {local_label}
🤖 **OpenAI Prediction:** {gpt_label}
{warning}"""

    return result


In [None]:
interface = gr.Interface(
    fn=hybrid_predict_interface,
    inputs=gr.Textbox(lines=6, placeholder="Paste your news article or headline here..."),
    outputs="markdown",
    title="📰 Fake News Detector (Hybrid: XGBoost + OpenAI)",
    description="Enter a news headline or article. The app will check it using a local ML model and OpenAI's GPT. If the two disagree, you'll get a warning for manual review.",
)

interface.launch()
