In [10]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import requests
import openai  # Make sure to set your OpenAI API key
import os
from huggingface_hub import login


# Set your Hugging Face token if needed
login("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")

# Load sentence-transformers model (requires HF token if private or gated)
model = SentenceTransformer('distilbert-base-nli-mean-tokens')

# Set your OpenAI key
openai.api_key = "your_openai_api_key_here"

# Define the Google Search API credentials
api_key = "466859CB97F84C219C324124C7BE8E01"
api_url = "https://api.valueserp.com/search"

# --- Functions ---

def get_search_results(query, api_key, api_url):
    params = {
        "q": query,
        "num": 10,
        "api_key": api_key
    }
    response = requests.get(api_url, params=params)
    response_data = response.json()

    # Add this line to check the response content
    print(f"API Response: {response_data}")

    return response_data


def get_relevant_results(news_title, search_titles, threshold=0.85):
    news_vec = model.encode([news_title])
    search_vecs = model.encode(search_titles)

    similarities = cosine_similarity(news_vec, search_vecs)[0]
    relevant_indices = [i for i, score in enumerate(similarities) if score >= threshold]

    return relevant_indices, similarities

def annotate_credibility(search_results, credible_sources):
    for result in search_results:
        domain = result.get('domain', '')
        result['credible'] = domain in credible_sources
    return search_results

def construct_prompt(article_title, retrieved_snippets, credibility_flags):
    prompt = "You are a fake news detector AI.\n\n"
    prompt += f"News Title: {article_title}\n\n"
    prompt += "Relevant search results:\n"

    for i, (snippet, cred) in enumerate(zip(retrieved_snippets, credibility_flags)):
        label = "Credible" if cred else "Unverified"
        prompt += f"[{label}] Snippet {i+1}: {snippet}\n"

    prompt += "\nDetermine if the original news article is real or fake. Justify your reasoning. Answer:\n"
    return prompt

def classify_news(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4",  # Replace with "mixtral-8x7b" if you're using Mixtral
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=500
    )
    return response['choices'][0]['message']['content']

# --- Load data ---

df = pd.read_csv("news.csv")
df["False/Truth"] = df["False/Truth"].map({"real": 1, "fake": 0})
y_true = df["False/Truth"].tolist()
articles = df['text'].tolist()

# Credible sources list (expand this as needed)
credible_sources = ["cnn.com", "bbc.com", "nytimes.com"]

# --- Main loop ---

y_pred = []

for idx, article in enumerate(articles):
    print(f"Processing article {idx+1}/{len(articles)}...")

    try:
        # Step 1: Retrieve search results
        search_results = get_search_results(article, api_key, api_url)
        raw_results = search_results.get("results", [])

        if not raw_results:
            print("⚠️ No search results found. Skipping...")
            y_pred.append(0)
            continue

        # Step 2: Extract titles/snippets safely
        search_titles = [r.get("title", "") for r in raw_results if r.get("title")]
        search_snippets = [r.get("snippet", "") for r in raw_results if r.get("snippet")]

        if not search_titles:
            print("⚠️ No valid titles found in search results. Skipping...")
            y_pred.append(0)
            continue

        # Step 3: Get relevant search results
        relevant_indices, similarities = get_relevant_results(article, search_titles)

        if not relevant_indices:
            print("⚠️ No relevant search results (cosine similarity too low). Skipping...")
            y_pred.append(0)
            continue

        retrieved_snippets = [search_snippets[i] for i in relevant_indices]

        # Step 4: Annotate credibility
        annotated_results = annotate_credibility(raw_results, credible_sources)
        credibility_flags = [annotated_results[i]['credible'] for i in relevant_indices]

        # Step 5: Build prompt and classify
        prompt = construct_prompt(article, retrieved_snippets, credibility_flags)
        result = classify_news(prompt)

        label = 1 if "real" in result.lower() else 0
        y_pred.append(label)

    except Exception as e:
        print(f"❌ Error processing article {idx+1}: {e}")
        y_pred.append(0)

# --- Evaluation ---

print("\n--- Evaluation ---")
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred))
print("Recall:", recall_score(y_true, y_pred))
print("F1 Score:", f1_score(y_true, y_pred))


Processing article 1/44898...
API Response: {'request_info': {'success': True, 'topup_credits_remaining': 66, 'credits_used_this_request': 1}, 'search_parameters': {'q': 'Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. Instead, he had to give a shout out to his enemies, haters and  the very dishonest fake news media.  The former reality show star had just one job to do and he couldn t do it. As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year,  President Angry Pants tweeted.  2018 will be a great year for America! As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year. 2018 will be a great year for America!  Donald J. Trump (@realDonaldTrump) December 31, 2017Trump s tweet went down ab

KeyboardInterrupt: 

In [6]:
print(search_results)


{'request_info': {'success': True, 'topup_credits_remaining': 99, 'credits_used_this_request': 1}, 'search_parameters': {'q': 'Donald Trump just couldn t wish all Americans a Happy New Year and leave it at that. Instead, he had to give a shout out to his enemies, haters and  the very dishonest fake news media.  The former reality show star had just one job to do and he couldn t do it. As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year,  President Angry Pants tweeted.  2018 will be a great year for America! As our Country rapidly grows stronger and smarter, I want to wish all of my friends, supporters, enemies, haters, and even the very dishonest Fake News Media, a Happy and Healthy New Year. 2018 will be a great year for America!  Donald J. Trump (@realDonaldTrump) December 31, 2017Trump s tweet went down about as welll as you d expect.What kind of pr