In [1]:
import requests


def get_search_results(query, api_key, api_url):
    params = {
        "q": query,
        "num": 10,
        "api_key": api_key
    }
    response = requests.get(api_url, params=params)
    return response.json()


In [2]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

def get_relevant_results(news_title, search_titles, threshold=0.85):
    model = SentenceTransformer('distilbert-base-nli-mean-tokens')
    news_vec = model.encode([news_title])
    search_vecs = model.encode(search_titles)

    similarities = cosine_similarity(news_vec, search_vecs)[0]
    relevant_indices = [i for i, score in enumerate(similarities) if score >= threshold]

    return relevant_indices, similarities


In [3]:
def annotate_credibility(search_results, credible_sources):
    for result in search_results:
        domain = result['domain']
        result['credible'] = domain in credible_sources
    return search_results


In [4]:
def construct_prompt(article_title, retrieved_snippets, credibility_flags):
    prompt = "You are a fake news detector AI.\n\n"
    prompt += f"News Title: {article_title}\n\n"
    prompt += "Relevant search results:\n"

    for i, (snippet, cred) in enumerate(zip(retrieved_snippets, credibility_flags)):
        label = "Credible" if cred else "Unverified"
        prompt += f"[{label}] Snippet {i+1}: {snippet}\n"

    prompt += "\nDetermine if the original news article is real or fake. Justify your reasoning. Answer:\n"
    return prompt


In [None]:
import openai  # if using OpenAI, or use Hugging Face or Bedrock SDK

def classify_news(prompt):
    response = openai.ChatCompletion.create(
        model="mixtral-8x7b",  # or substitute
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=500
    )
    return response['choices'][0]['message']['content']


In [5]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
df = pd.read_csv("news.csv")

# Prepare the labels
df["False/Truth"] = df["False/Truth"].map({"real": 1, "fake": 0})
y_true = df["False/Truth"].tolist()

# Extract article text
articles = df['text'].tolist()

# Initialize a list for predictions
y_pred = []

# Loop over articles and classify them
for article in articles:
    # Step 1: Retrieve search results based on the article title (or keywords from article)
    search_results = get_search_results(article, api_key, api_url)

    # Step 2: Extract titles/snippets from search results
    search_titles = [result['title'] for result in search_results['results']]
    search_snippets = [result['snippet'] for result in search_results['results']]

    # Step 3: Get relevant search results based on cosine similarity
    relevant_indices, _ = get_relevant_results(article, search_titles)
    retrieved_snippets = [search_snippets[i] for i in relevant_indices]

    # Step 4: Annotate credibility (if a source is in the credible list)
    annotated_results = annotate_credibility(search_results['results'], credible_sources)
    credibility_flags = [result['credible'] for result in annotated_results if result['title'] in retrieved_snippets]

    # Step 5: Build the prompt and classify the news article
    prompt = construct_prompt(article, retrieved_snippets, credibility_flags)
    result = classify_news(prompt)

    # Step 6: Convert result to 1 (real) or 0 (fake)
    label = 1 if result.strip().lower() == "real" else 0
    y_pred.append(label)

# Evaluate the model
print("Accuracy:", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred))
print("Recall:", recall_score(y_true, y_pred))
print("F1 Score:", f1_score(y_true, y_pred))


NameError: name 'api_key' is not defined