In [8]:
import requests
from bs4 import BeautifulSoup
import csv

In [9]:
# URL do site
url = "https://g1.globo.com/fato-ou-fake/"

# Fazendo a requisição HTTP
response = requests.get(url)

In [10]:
# Função para buscar o conteúdo de um link
def fetch_content(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.content
    else:
        print(f"Erro ao acessar {url}. Código de status: {response.status_code}")
        return None

In [11]:
def extract_links_from_page(page_url):
    response = requests.get(page_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        articles_full_section = soup.find('div', class_="_evt")
        if articles_full_section:
            links = articles_full_section.find_all('a', string=lambda text: text and "É #FAKE" in text)
            return [link.get('href') for link in links]
    else:
        print(f"Erro ao acessar {page_url}. Código de status: {response.status_code}")
        return []

In [12]:
def save_to_csv(data, filename='fato_ou_fake.csv'):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Título', 'Textos em Itálico'])
        for item in data:
            writer.writerow(item)

In [13]:
def scrape_fato_ou_fake():
    base_url = "https://g1.globo.com/fato-ou-fake/index/feed/pagina-{}.ghtml"
    all_data = []

    for page_num in range(1, 10):  # Iterar sobre as primeiras 100 páginas
        page_url = base_url.format(page_num)
        print(f"Acessando página: {page_url}")
        links = extract_links_from_page(page_url)

        for link_url in links:
            print(f"\nAcessando o link: {link_url}")
            content = fetch_content(link_url)
            if content:
                link_soup = BeautifulSoup(content, 'html.parser')
                title = link_soup.find('h1', class_='content-head__title')
                title_text = title.text.strip() if title else "Título não encontrado"

                italics = link_soup.find_all('em')
                italics_text = "\n".join([italic.text.strip() for italic in italics]) if italics else "Nenhum texto em itálico encontrado"

                all_data.append([title_text, italics_text])

    save_to_csv(all_data)
    print("Dados salvos em 'fato_ou_fake.csv'")

In [14]:
if __name__ == "__main__":
    scrape_fato_ou_fake()

Acessando página: https://g1.globo.com/fato-ou-fake/index/feed/pagina-1.ghtml

Acessando o link: https://g1.globo.com/fato-ou-fake/noticia/2025/02/14/e-fake-foto-que-mostra-alexandre-de-moraes-usando-bone-da-usaid.ghtml

Acessando o link: https://g1.globo.com/fato-ou-fake/video/e-fake-que-governo-entregou-administracao-de-14-do-territorio-brasileiro-para-empresa-13338513.ghtml

Acessando o link: https://g1.globo.com/fato-ou-fake/noticia/2025/02/13/e-fake-que-vinagre-de-alcool-e-recomendado-para-combater-mosquito-da-dengue.ghtml

Acessando o link: https://g1.globo.com/fato-ou-fake/noticia/2025/02/12/e-fake-post-que-usa-video-de-paolla-oliveira-para-anunciar-desconto-de-50percent-em-camarote-no-carnaval-2025-trata-se-de-golpe.ghtml

Acessando o link: https://g1.globo.com/fato-ou-fake/video/e-fake-que-piloto-do-helicoptero-que-colidiu-com-aviao-nos-eua-era-uma-mulher-trans-13332403.ghtml

Acessando o link: https://g1.globo.com/fato-ou-fake/noticia/2025/02/11/e-fake-que-procon-obteve-decis