In [None]:
import yaml
import logging

def load_credentials(filepath):
    try:
        with open(filepath, 'r') as file:
            credentials = yaml.safe_load(file)
            email = credentials['EMAIL_ACCOUNT']
            app_password = credentials['APP_PASSWORD']
            server = credentials['IMAP_SERVER']
            return email, app_password, server
    except Exception as e:
        logging.error("Failed to load credentials: {}".format(e))
        raise
    
EMAIL_ACCOUNT, APP_PASSWORD, IMAP_SERVER = load_credentials("creds.yaml")

### Using IMAP

In [None]:
import imaplib
import email
from email.header import decode_header
import pandas as pd

def connect_to_mailbox(mailbox_name):
    """Login and select the desired mailbox."""
    mail = imaplib.IMAP4_SSL(IMAP_SERVER)
    mail.login(EMAIL_ACCOUNT, APP_PASSWORD)
    mail.select(mailbox_name)
    return mail

def decode_header_value(value):
    """Safely decode email headers."""
    if not value:
        return ""
    parts = decode_header(value)
    decoded = ''
    for part, encoding in parts:
        if isinstance(part, bytes):
            decoded += part.decode(encoding if encoding else 'utf-8', errors='ignore')
        else:
            decoded += part
    return decoded

def fetch_last_n_emails(mail, n=50):
    """Fetch last N email headers and content from the selected mailbox."""
    status, messages = mail.search(None, 'ALL')
    if status != 'OK':
        print("Failed to fetch emails.")
        return pd.DataFrame()

    email_ids = messages[0].split()
    last_n_ids = email_ids[-n:]

    emails = []
    for uid in reversed(last_n_ids):
        status, data = mail.fetch(uid, '(RFC822)')  # Fetch full message
        if status != 'OK':
            continue

        raw_email = data[0][1]
        msg = email.message_from_bytes(raw_email)

        # Extract content
        body = ""
        if msg.is_multipart():
            for part in msg.walk():
                content_type = part.get_content_type()
                content_disposition = str(part.get("Content-Disposition"))

                # Ignore attachments
                if "attachment" in content_disposition:
                    continue

                if content_type == "text/plain":
                    body = part.get_payload(decode=True).decode(errors="ignore")
                    break
        else:
            body = msg.get_payload(decode=True).decode(errors="ignore")

        email_data = {
            'from': decode_header_value(msg.get("From")),
            'to': decode_header_value(msg.get("To")),
            'subject': decode_header_value(msg.get("Subject")),
            'date': msg.get("Date"),
            'cc': decode_header_value(msg.get("Cc")),
            'bcc': decode_header_value(msg.get("Bcc")),
            'reply_to': decode_header_value(msg.get("Reply-To")),
            'message_id': msg.get("Message-ID"),
            'body': body.strip()
        }

        emails.append(email_data)

    return pd.DataFrame(emails)

def display_emails(df, section_name):
    """Print a preview of the DataFrame."""
    print(f"\n=== {section_name.upper()} SECTION ===")
    print(df[['from', 'subject', 'date']].head())

def main():
    # Fetch from Primary
    primary_mail = connect_to_mailbox('INBOX')
    df_primary = fetch_last_n_emails(primary_mail, n=50)
    primary_mail.logout()
    return df_primary

emails = main()
emails

### Other ways: Using Imap-tools

In [None]:
from imap_tools import SortCriteria

SortCriteria.DATE_DESC

In [None]:
mailbox = MailBox('imap.gmail.com')

mailbox.login(EMAIL_ACCOUNT, APP_PASSWORD, initial_folder='INBOX')
for f in mailbox.folder.list('INBOX'):
        print(f) 

In [None]:
from imap_tools import MailBox, AND
from datetime import datetime as dt

EXCLUDE_DATES = {dt(2025, 5, 17).date(), dt(2025, 5, 18).date()}

# Fetch emails matching keywords and filter by date afterward
with MailBox('imap.gmail.com').login(EMAIL_ACCOUNT, APP_PASSWORD, initial_folder='INBOX') as mailbox:
    messages = mailbox.fetch(
        AND(text=['discount', 'offer', 'price drops', 'honey']),
        # limit=100,
        # reverse=True
    )
    
    # Filter out unwanted dates
    filtered = [
        msg for msg in messages
        if msg.date.date() in EXCLUDE_DATES
    ]

    print(f"Total matching emails (excluding dates): {len(filtered)}")

    # Print first few
    for msg in filtered[:5]:
        print(f"Date: {msg.date.date()} | From: {msg.from_} | Subject: {msg.subject}")
        print("-" * 40)

In [None]:
def list_mailboxes(mail):
    status, mailboxes = mail.list()
    if status == 'OK':
        print("\nAvailable Mailboxes:")
        for mbox in mailboxes:
            print(mbox.decode())
            
primary_mail = imaplib.IMAP4_SSL(IMAP_SERVER)
primary_mail.login(EMAIL_ACCOUNT, APP_PASSWORD)
list_mailboxes(primary_mail)

In [None]:
import imaplib
import email
from email.header import decode_header
import webbrowser
import os

# account credentials
username = "Haiderratlamwala786@gmail.com"
password = "datascientist"
# use your email provider's IMAP server, you can look for your provider's IMAP server on Google
# or check this page: https://www.systoolsgroup.com/imap/
# for office 365, it's this:
imap_server = "gmail.com"

def clean(text):
    # clean text for creating a folder
    return "".join(c if c.isalnum() else "_" for c in text)
# create an IMAP4 class with SSL 
imap = imaplib.IMAP4_SSL(imap_server)
# authenticate
imap.login(username, password)

status, messages = imap.select("INBOX")
# number of top emails to fetch
N = 3
# total number of emails
messages = int(messages[0])

In [None]:
api_code = "jrdc taqi pkqo fqrx"

import imaplib


def get_mail_client(email_address):
    SMTP_SERVER = "gmail.com"
    SMTP_PORT = 993

    mail = imaplib.IMAP4_SSL(SMTP_SERVER)
    mail.login(email_address, password=api_code)
    return mail

mail = get_mail_client(email_address="Haiderratlamwala786@gmail.com")
mail


In [None]:
inbox = mail.select('INBOX')
# for x in inbox:
#     print(x)
#     break
int(inbox[1][0])

In [None]:
def get_top_10_emails(category):
    # category can be 'Promotional, Updates or Forums
    # returns tuple
    select_status, select_response = mail.select('INBOX')
    status, response = select_response[0].uid('search', 'X-GM-RAW "category:' + category + '"')

    # get email ids list
    response = response[0].decode('utf-8').split()
    response.reverse()
    response = response[:min(10, len(response))]
    return response

get_top_10_emails('Promotional')