In [1]:
from modules.search_engines import GoogleSearchEngine 
from modules.embedder import HuggingFaceEmbedder
import os
from dotenv import load_dotenv
import pandas as pd
from typing import List, Dict
from sklearn.metrics.pairwise import cosine_similarity
from urllib.parse import urlparse
from modules.help import load_credible_domains
from modules.prefilter import prefilter_results
from modules.llm_base import LLM, build_classification_prompt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

## Import Dataset

In [3]:
fake_df = pd.read_csv('./data/Fake.csv')
true_df = pd.read_csv('./data/True.csv')

fake_df['class'] = 'fake'
true_df['class'] = 'true'

df = pd.concat([fake_df, true_df], axis=0)
df.shape, fake_df.shape, true_df.shape

((44898, 5), (23481, 5), (21417, 5))

## Main Code

In [4]:
title_1 = df['title'].iloc[0]

### Search

In [5]:
API_KEY = os.getenv("GOOGLE_API_KEY")
search_engine = GoogleSearchEngine(API_KEY)

query = title_1
results = search_engine.search(query, num_results=10)

### Check Similarity

In [6]:
embedder = HuggingFaceEmbedder(model_name="./models/distilbert-base-uncased")
results_filtered = prefilter_results(results, title_1, embedder)

### Get LLM Results

In [7]:
prompt = build_classification_prompt(title_1, results_filtered)

In [8]:
GROQ_API_KEY  = os.getenv("GROQ_API_KEY")
groq_llm = LLM(
    model="llama-3.3-70b-versatile",            # substitua pelo modelo correto da Groq
    api_key_env="GROQ_API_KEY",
    endpoint="https://api.groq.com/openai/v1/chat/completions"  # endpoint da Groq
)

groq_llm.generate(prompt=prompt, temperature=0.0)



'fake'