In [None]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import os
from dotenv import load_dotenv
import json
import sqlite3
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

load_dotenv()

# Configuração de email para envio de resumos
EMAIL_HOST = os.getenv('EMAIL_HOST')
EMAIL_PORT = os.getenv('EMAIL_PORT')
EMAIL_USER = os.getenv('EMAIL_USER')
EMAIL_PASSWORD = os.getenv('EMAIL_PASSWORD')
RECIPIENT_EMAIL = os.getenv('RECIPIENT_EMAIL')

# Conexão ao banco de dados SQLite para armazenar links processados
def setup_database():
    conn = sqlite3.connect('news.db')
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS processed_links (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            url TEXT NOT NULL,
            date_processed TEXT NOT NULL
        )
    ''')
    conn.commit()
    conn.close()

def link_already_processed(url):
    conn = sqlite3.connect('news.db')
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM processed_links WHERE url=?", (url,))
    result = cursor.fetchone()
    conn.close()
    return result is not None

def store_processed_link(url):
    conn = sqlite3.connect('news.db')
    cursor = conn.cursor()
    cursor.execute("INSERT INTO processed_links (url, date_processed) VALUES (?, ?)", (url, datetime.now().strftime('%Y-%m-%d')))
    conn.commit()
    conn.close()

# Função para enviar email com o resumo das notícias
def send_news_summary(news_data, date_str):
    subject = f"Resumo de Notícias - {date_str}"
    body = "\n\n".join(news_data)

    # Configuração de mensagem
    msg = MIMEMultipart()
    msg['From'] = EMAIL_USER
    msg['To'] = RECIPIENT_EMAIL
    msg['Subject'] = subject

    msg.attach(MIMEText(body, 'plain'))

    # Enviando o email
    try:
        with smtplib.SMTP(EMAIL_HOST, EMAIL_PORT) as server:
            server.starttls()
            server.login(EMAIL_USER, EMAIL_PASSWORD)
            server.sendmail(EMAIL_USER, RECIPIENT_EMAIL, msg.as_string())
        print(f"Resumo de notícias enviado para {RECIPIENT_EMAIL}")
    except Exception as e:
        print(f"Erro ao enviar email: {e}")

def get_links(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        archive_list = soup.find('ul', class_='archive-list__list')
        li_tags = archive_list.find_all('li')
        links = [li.find('a')['href'] for li in li_tags if li.find('a')]
        return links
    except Exception as e:
        print(f"Erro ao obter links: {e}")
        return []

def extract_article_info(url):
    if link_already_processed(url):
        print(f"Link já processado: {url}")
        return None

    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extração de informações com base no padrão do site
        if url.startswith('https://www.poder360.com.br/poder-flash'):
            title = soup.find('h1', class_='box-poder-flash__title mt-4').text.strip()
        else:
            title = soup.find('h1', class_='inner-page-section__title title-1').text.strip()

        subtitle = soup.find('h2', class_='inner-page-section__line').text.strip()
        date = soup.find('time').text.strip() if soup.find('time') else "Data não encontrada"
        author = (soup.find('a', class_='author__name').text.strip() 
                  if soup.find('a', class_='author__name') else "Autor não encontrado")

        # Extração do corpo do artigo
        article_body = soup.find('div', class_='inner-page-section__text')
        if article_body:
            form = article_body.find('form')
            if form:
                form.decompose()

            text_elements = []
            for child in article_body.children:
                if child.name == 'p':
                    text_elements.append(child.get_text(strip=True))
                elif child.name == 'ul':
                    for li in child.find_all('li'):
                        text_elements.append(li.get_text(strip=True))

            text = '\n'.join(text_elements)
        else:
            text = "Texto do artigo não encontrado"
        
        article_info = f"titulo: {title}\nsubtitulo: {subtitle}\ndata: {date}\nautor: {author}\n\ntexto: {text}\n\n"
        store_processed_link(url)
        return article_info

    except Exception as e:
        print(f"Erro ao extrair informações do artigo: {e}")
        return None

def my_scheduled_task(n_dias):
    setup_database()
    news_data = []

    for i in range(1, n_dias):
        yesterday = datetime.now() - timedelta(i)
        yesterday_str = yesterday.strftime('%Y-%m-%d')
        url = f'https://www.poder360.com.br/{yesterday.strftime("%Y/%m/%d")}'
        print(f"Data e link: {url}")

        links = get_links(url)
        filtered_links = [link for link in links if not link.startswith("https://www.poder360.com.br/author/")]
        print(f"Links filtrados: {len(filtered_links)}")

        for link in filtered_links:
            article_info = extract_article_info(link)
            if article_info:
                news_data.append(article_info)
        
        # Salvando notícias em arquivo JSON
        filename = f'news_{yesterday_str}.json'
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(news_data, f, ensure_ascii=False, indent=4)

        # Enviando resumo por email
        send_news_summary(news_data, yesterday_str)
        print("Tarefa finalizada para o dia", yesterday_str)

if __name__ == "__main__":
    my_scheduled_task(15)
