In [None]:
import requests
from msal import ConfidentialClientApplication
import os
import base64
import logging
from datetime import datetime, timedelta
from typing import List, Optional, Dict
import time

# Konfiguracja logowania
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('mail_connector.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class M365ReportDownloader:
    """
    Connector do pobierania dziennych raportów z M365 mailbox
    Zoptymalizowany pod konkretny use case: jeden plik xlsx co 15 min
    """
    
    def __init__(self, client_id: str, client_secret: str, tenant_id: str, 
                 mailbox_email: str, download_path: str = "./reports"):
        self.client_id = client_id
        self.client_secret = client_secret
        self.tenant_id = tenant_id
        self.mailbox_email = mailbox_email
        self.download_path = download_path
        self.access_token = None
        self.token_expires_at = None
        
        # Utwórz katalog na raporty
        os.makedirs(self.download_path, exist_ok=True)
        
    def _get_access_token(self) -> bool:
        """Pobierz token dostępu (z cache'owaniem)"""
        
        # Sprawdź czy token jest jeszcze ważny (z 5 min buforem)
        if (self.access_token and self.token_expires_at and 
            datetime.now() < self.token_expires_at - timedelta(minutes=5)):
            return True
            
        try:
            app = ConfidentialClientApplication(
                self.client_id,
                authority=f"https://login.microsoftonline.com/{self.tenant_id}",
                client_credential=self.client_secret
            )
            
            result = app.acquire_token_for_client(
                scopes=["https://graph.microsoft.com/.default"]
            )
            
            if "access_token" in result:
                self.access_token = result["access_token"]
                # Token GraphAPI jest ważny 3600 sekund
                self.token_expires_at = datetime.now() + timedelta(seconds=result.get("expires_in", 3600))
                logger.info("Successfully authenticated with Microsoft Graph")
                return True
            else:
                logger.error(f"Authentication failed: {result.get('error_description', 'Unknown error')}")
                return False
                
        except Exception as e:
            logger.error(f"Authentication error: {str(e)}")
            return False
    
    def get_latest_report_messages(self, sender_email: Optional[str] = None, 
                                 hours_back: int = 1) -> List[Dict]:
        """
        Pobierz najnowsze wiadomości z raportem dziennym
        
        Args:
            sender_email: Email nadawcy (opcjonalnie)
            hours_back: Ile godzin wstecz szukać (domyślnie 1h)
        """
        if not self._get_access_token():
            return []
            
        headers = {
            'Authorization': f'Bearer {self.access_token}',
            'Content-Type': 'application/json'
        }
        
        # Timestamp dla filtrowania
        time_filter = (datetime.now() - timedelta(hours=hours_back)).isoformat() + 'Z'
        
        # Buduj filter query
        filter_parts = [
            "hasAttachments eq true",
            "isRead eq false",
            f"receivedDateTime ge {time_filter}",
            "contains(subject, 'Raport dzienny')"  # Filtruj po temacie
        ]
        
        if sender_email:
            filter_parts.append(f"from/emailAddress/address eq '{sender_email}'")
            
        filter_query = " and ".join(filter_parts)
        
        url = f"https://graph.microsoft.com/v1.0/users/{self.mailbox_email}/messages"
        params = {
            '$filter': filter_query,
            '$select': 'id,subject,from,receivedDateTime,hasAttachments',
            '$orderby': 'receivedDateTime desc',
            '$top': 10  # Maksymalnie 10 najnowszych
        }
        
        try:
            response = requests.get(url, headers=headers, params=params, timeout=30)
            response.raise_for_status()
            
            messages = response.json().get('value', [])
            logger.info(f"Found {len(messages)} messages with reports")
            
            return messages
            
        except requests.exceptions.RequestException as e:
            logger.error(f"Error fetching messages: {str(e)}")
            return []
    
    def download_report_attachment(self, message_id: str, message_subject: str = "") -> Optional[str]:
        """
        Pobierz załącznik z raportem z konkretnej wiadomości
        
        Returns:
            str: Ścieżka do pobranego pliku lub None jeśli błąd
        """
        if not self._get_access_token():
            return None
            
        headers = {
            'Authorization': f'Bearer {self.access_token}',
        }
        
        try:
            # Pobierz listę załączników
            url = f"https://graph.microsoft.com/v1.0/users/{self.mailbox_email}/messages/{message_id}/attachments"
            response = requests.get(url, headers=headers, timeout=30)
            response.raise_for_status()
            
            attachments = response.json().get('value', [])
            
            # Znajdź plik xlsx z raportem
            target_attachment = None
            for attachment in attachments:
                if (attachment.get('@odata.type') == '#microsoft.graph.fileAttachment' and
                    attachment.get('name', '').endswith('.xlsx') and
                    'raport dzienny' in attachment.get('name', '').lower()):
                    target_attachment = attachment
                    break
            
            if not target_attachment:
                logger.warning(f"No xlsx report attachment found in message {message_id}")
                return None
            
            # Pobierz i zapisz plik
            filename = target_attachment['name']
            content = target_attachment['contentBytes']
            
            # Dekoduj base64
            file_data = base64.b64decode(content)
            
            # Generuj unikalną nazwę pliku z timestampem
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            safe_filename = f"{timestamp}_{filename}"
            file_path = os.path.join(self.download_path, safe_filename)
            
            # Zapisz plik
            with open(file_path, 'wb') as f:
                f.write(file_data)
            
            file_size_kb = len(file_data) / 1024
            logger.info(f"Downloaded report: {safe_filename} ({file_size_kb:.1f} KB)")
            
            return file_path
            
        except requests.exceptions.RequestException as e:
            logger.error(f"Error downloading attachment from message {message_id}: {str(e)}")
            return None
        except Exception as e:
            logger.error(f"Unexpected error downloading attachment: {str(e)}")
            return None
    
    def mark_message_as_read(self, message_id: str) -> bool:
        """Oznacz wiadomość jako przeczytaną"""
        if not self._get_access_token():
            return False
            
        headers = {
            'Authorization': f'Bearer {self.access_token}',
            'Content-Type': 'application/json'
        }
        
        try:
            url = f"https://graph.microsoft.com/v1.0/users/{self.mailbox_email}/messages/{message_id}"
            data = {"isRead": True}
            
            response = requests.patch(url, headers=headers, json=data, timeout=30)
            response.raise_for_status()
            
            logger.info(f"Marked message {message_id} as read")
            return True
            
        except requests.exceptions.RequestException as e:
            logger.error(f"Error marking message as read: {str(e)}")
            return False
    
    def process_new_reports(self, sender_email: Optional[str] = None) -> List[str]:
        """
        Główna metoda - pobierz wszystkie nowe raporty
        
        Returns:
            List[str]: Lista ścieżek do pobranych plików
        """
        logger.info("Starting report processing...")
        
        # Pobierz nowe wiadomości z raportami
        messages = self.get_latest_report_messages(sender_email)
        
        if not messages:
            logger.info("No new report messages found")
            return []
        
        downloaded_files = []
        
        for message in messages:
            logger.info(f"Processing message: {message['subject']}")
            
            # Pobierz załącznik
            file_path = self.download_report_attachment(
                message['id'], 
                message['subject']
            )
            
            if file_path:
                downloaded_files.append(file_path)
                
                # Oznacz jako przeczytane
                self.mark_message_as_read(message['id'])
            else:
                logger.warning(f"Failed to download attachment from: {message['subject']}")
        
        logger.info(f"Processing complete. Downloaded {len(downloaded_files)} reports")
        return downloaded_files

# Przykład użycia
if __name__ == "__main__":
    # Konfiguracja - najlepiej z zmiennych środowiskowych
    CLIENT_ID = os.getenv("AZURE_CLIENT_ID", "b79ae846-f385-4a9c-9b4c-dd42bc1bf9b7")
    CLIENT_SECRET = os.getenv("AZURE_CLIENT_SECRET", "FW~8Q~JsF0z~gSBtfFewGcFYcHwq3mYxqngVXdt4")
    TENANT_ID = os.getenv("AZURE_TENANT_ID", "f6f37920-4e28-4af9-87e6-3dcb4b6f90be")
    MAILBOX_EMAIL = os.getenv("MAILBOX_EMAIL", "data.cubed@data-cubed.eu")
    SENDER_EMAIL = os.getenv("SENDER_EMAIL")  # Opcjonalnie - filtruj po nadawcy
    
    # Utwórz downloader
    downloader = M365ReportDownloader(
        client_id=CLIENT_ID,
        client_secret=CLIENT_SECRET,
        tenant_id=TENANT_ID,
        mailbox_email=MAILBOX_EMAIL,
        download_path="./daily_reports"
    )
    
    # Pobierz nowe raporty
    downloaded_files = downloader.process_new_reports(sender_email=SENDER_EMAIL)
    
    # Wyświetl rezultaty
    if downloaded_files:
        print(f"\nDownloaded {len(downloaded_files)} report(s):")
        for file_path in downloaded_files:
            print(f"  - {file_path}")
    else:
        print("No new reports found")