In [7]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timezone
from dotenv import load_dotenv
import os
import ipywidgets as widgets
from IPython.display import display, Image, clear_output

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

import pyperclip

%run "./news_source/news.ipynb" import News


load_dotenv()
API_URL = os.getenv("API_URL")

In [10]:
class NewsSource:

    def __init__(self, sectionId):
        self.sectionId = sectionId
        self.newsSource, self.sectionName, self.sectionArr, self.tags = self.get_source_news()
        
        # Dados das notícias principais
        self.html_soup = self.get_html(self.newsSource)
        self.main_news = self.get_main_news_list()
        self.selected_news = ''
        
        # Dados das notícias derivadas da principal selecionada
        self.all_news = []
        self.all_images = []
        
        self.output_area = widgets.Output()
        self.image_display = widgets.Output()  # Inicializa a área de exibição da imagem
        self.create_checkboxes()


    def get_source_news(self):
        url = f'{API_URL}/api/newssource'
        newssource = requests.get(url).json()
        gn = [doc for doc in newssource if doc['__t'] == 'google news - html']
        for doc in gn:
            if doc['sectionId'] == self.sectionId:
                return doc['newsSource'], doc['sectionName'], doc['sectionArr'], doc['tags']
        return None, None, None, None


    def get_html(self, url):
        browsers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome / 86.0.4240.198Safari / 537.36"}
        html_content = requests.get(url, headers=browsers)
        soup = BeautifulSoup(html_content.text, 'html.parser')
        return soup
    
    
    def get_main_news_list(self):
        news_blocks = self.html_soup.find_all(class_='PO9Zff Ccj79 kUVvS')
        result = []
        for nb in news_blocks:
            try:
                title = nb.find('a', class_='gPFEn').get_text()
                all_news = 'https://news.google.com' + nb.find('div', class_='Ylktk').find('a').get('href')[1:]
                result.append({'title': title, 'url': all_news})
            except:
                pass
        return result
    

    def create_checkboxes(self):
        options = [news['title'] for news in self.main_news]
        self.news_checkboxes = widgets.Dropdown(
            options=options,
            description=f'{self.sectionName}:',
            disabled=False,
            layout=widgets.Layout(width='70%')
        )
        self.confirm_button = widgets.Button(
            description='Gerar notícias',
            button_style='success',
            tooltip='Clique para confirmar as notícias selecionadas',
            icon='check',
            layout=widgets.Layout(width='20%')
        )
        self.confirm_button.on_click(self.on_confirm_button_click)
        display(self.news_checkboxes, self.confirm_button, self.output_area)


    def on_confirm_button_click(self, b):
        with self.output_area:
            clear_output()
            self.selected_news = self.news_checkboxes.value
            print(f"Notícia selecionada: {self.selected_news}")
            self.get_reference_news()


    def get_reference_news(self):
        all_news_mainUrl = [news for news in self.main_news if news['title'] == self.selected_news][0]['url']
        print(f'Buscando notícias em {all_news_mainUrl}\n\n')
        
        reference_news = []
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
        try:
            driver.get(all_news_mainUrl)
            WebDriverWait(driver, 15)#.until(EC.url_changes(url))
            articles = driver.find_elements(By.CSS_SELECTOR, "article")
            counter = 1
            for a in articles[:10]:
                title = a.find_element(By.CSS_SELECTOR, "h4").text
                
                share_btn = a.find_element(By.CSS_SELECTOR, '[data-tooltip="Compartilhar"]')
                share_btn.click()
                copy_link_btn = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '[aria-label="Copiar link"]')))
                copy_link_btn.click()
                close_dialog_btn = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '[aria-label="Fechar caixa de diálogo"]')))
                close_dialog_btn.click()
                
                url = pyperclip.paste()
                print(f'#{counter} >> {url} >> {title}')
                reference_news.append({'title': title, 'url': url})
                counter += 1
        except Exception as e:
            print("Problema na coleta dos dados da página:", e)
        driver.quit()

        all_news = []
        for r in reference_news:
            news_data = News(r['url'])
            all_news.append(news_data)
        
        self.all_news = [news for news in all_news if news.title is not None and news.text is not None]
        self.all_images = [news for news in all_news if news.imageUrl is not None or news.imageText is not None]
    #     self.create_image_selector()
        
        
        
    # def create_image_selector(self):
    #     options = [image.imageUrl for image in self.all_images]
    #     self.dropdown = widgets.Dropdown(
    #         options=options,
    #         description='Selecione a imagem:',
    #         disabled=False,
    #     )

    #     # Função que será chamada ao mudar a seleção no dropdown
    #     def on_image_selected(change):
    #         with self.image_display:
    #             self.image_display.clear_output()
    #             selected_url = change['new']
    #             display(Image(url=selected_url))

    #     # Conectar a função ao dropdown
    #     self.dropdown.observe(on_image_selected, names='value')

    #     # Exibe o dropdown e a área de exibição da imagem
    #     display(self.dropdown, self.image_display)

    #     # Inicializa a exibição da primeira imagem, se houver
    #     if options:
    #         on_image_selected({'new': options[0]})


In [9]:
# url = "https://news.google.com/stories/CAAqNggKIjBDQklTSGpvSmMzUnZjbmt0TXpZd1NoRUtEd2lYblBXWURCRnVNaUZNdVk5NzB5Z0FQAQ?hl=pt-BR&gl=BR&ceid=BR%3Apt-419"

# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# try:
#     driver.get(url)
#     WebDriverWait(driver, 15)#.until(EC.url_changes(url))
#     articles = driver.find_elements(By.CSS_SELECTOR, "article.MQsxIb")
#     for a in articles[:10]:
#         # encontrar teto do h4
#         title = a.find_element(By.CSS_SELECTOR, "h4").text
#         share_btn = a.find_element(By.CSS_SELECTOR, '[data-tooltip="Compartilhar"]')
#         share_btn.click()
#         copy_link_btn = driver.find_element(By.CSS_SELECTOR, '[aria-label="Copiar link"]')
#         copy_link_btn.click()
#         close_dialog_btn = driver.find_element(By.CSS_SELECTOR, '[aria-label="Fechar caixa de diálogo"]')
#         close_dialog_btn.click()
        
#         clipboard_data = pyperclip.paste()
#         print(f'Título: {title}')
#         print(f'Url: {clipboard_data}\n\n')
# except Exception as e:
#     print("Problema na coleta dos dados da página:", e)

# driver.quit()






Título: Chefe da ONU fala em “catástrofe mundial” pela rápida elevação do Oceano Pacífico
Url: https://www.correiodopovo.com.br/not%C3%ADcias/mundo/chefe-da-onu-fala-em-cat%C3%A1strofe-mundial-pela-r%C3%A1pida-eleva%C3%A7%C3%A3o-do-oceano-pac%C3%ADfico-1.1527002


Título: 'Previsão do futuro': por que o Pacífico mais quente é um alerta para o mundo sobre o risco do avanço dos oceanos
Url: https://g1.globo.com/meio-ambiente/noticia/2024/08/27/previsao-do-futuro-por-que-o-pacifico-mais-quente-e-um-alerta-para-o-mundo-sobre-o-risco-do-avanco-dos-oceanos.ghtml


Título: Mar pode subir até 21 cm em duas cidades do RJ até 2050, aponta estimativa da Nasa citada pela ONU
Url: https://g1.globo.com/meio-ambiente/noticia/2024/08/27/mar-pode-21-cm-cidades-do-rj-ate-2050.ghtml


Título: ONU alerta para catástrofe mundial por elevação do nível do mar
Url: https://www.poder360.com.br/meio-ambiente/onu-alerta-para-catastrofe-mundial-por-elevacao-do-nivel-do-mar/


Título: ONU emite SOS climático para 