In [None]:
from notebook_helper import notebook_add_parent_dir_to_path, start_execution_time, execution_time

notebook_add_parent_dir_to_path()

In [None]:
from tqdm.notebook import tqdm

In [None]:
import re
import sys
import imaplib
import os.path
import argparse
from configobj import ConfigObj
from getpass import getpass

import email
from collections import defaultdict
import pandas as pd

In [None]:
start_time = start_execution_time()

See also this article https://towardsdatascience.com/email-automation-analytics-and-visualization-53b022e0f9a0

In [None]:
def connect(config, mailbox="INBOX"):
    email_connection = imaplib.IMAP4_SSL(config["imap_server"], config["imap_port"])
    email_connection.login(config["username"], config["password"])
    email_connection.select(mailbox)
    print(f"Successfully connected to {config['username']}@icloud.com - {mailbox}")
    return email_connection


def search_emails(email_connection, sender):
    _, data = email_connection.search(None, f'(FROM "{sender}")')
    # Split the email identifiers in an array
    mail_ids = data[0]
    if mail_ids is None:
        return None
    else:
        return mail_ids.split()


# Add the deleted flag to an email
def set_deleted(email_connection, email_uid):
    # print(bytes(str(email_uid).strip(), 'ascii'))
    email_connection.uid("STORE", bytes(str(email_uid).strip(), "ascii"), "+FLAGS", "(\\Deleted)")


def fetch_uid(email_connection, email_id):
    try:
        _, uid_string = email_connection.fetch(email_id, "UID")
    except:
        raise ConnectionError
    try:
        uid_str = [str(x, encoding="utf-8") for x in uid_string]
    except Exception as e:
        print(e)
        return None
    # print("".join(uid_str))
    uid_res = re.search(r"\((UID.*?)\)", uid_str[0])
    return uid_res[1].replace("UID", "") if (uid_res != None) else None


def parse_args():
    parser = argparse.ArgumentParser(
        description="Delete all incoming emails from a sender email address"
    )
    parser.add_argument("--email", help="The sender whose messages should be deleted")
    parser.add_argument(
        "--file",
        help="A file which contains the target emails. Each email should be on a separate line.",
    )
    return parser.parse_args()


def validate_input_email(email):
    return re.match(r"[^@]+@[^@]+\.[^@]+", email)


def verify_cli_args(args):
    if args.email is None and args.file is None:
        print("You need to specify a target email or a file. Use --help for details")
        sys.exit()


def import_emails_from_file(filename):
    if os.path.isfile(filename):
        return open(filename).read().split("\n")
    else:
        print(f"The file {filename} doesn't exist")

In [None]:
# args = parse_args()
# verify_cli_args(args)
args = None

In [None]:
config = ConfigObj("config.ini")

In [None]:
if config["password"] == "":
    config["password"] = getpass("Enter your email password: ")
else:
    print("Using password from config.ini")

In [None]:
email_connection = connect(config)

In [None]:
email_connection.check()[0]

In [None]:
def count_by_email_address():
    # NOTE: This function doesn't work
    print("Connecting to " + config["username"] + "@icloud.com...")
    email_connection = connect(config)
    all_email = []
    for target_email in email_connection.search(None, "ALL"):
        emails = search_emails(email_connection, target_email)
        if emails is None:
            n_email = 0
        else:
            n_email = len(emails)
        all_email.append([target_email, n_email])
    return all_email

In [None]:
# all_emails = count_by_email_address()

In [None]:
def count_emails():
    imaplib._MAXLINE = 10000000
    print("Connecting to " + config["username"] + "@icloud.com...")
    email_connection = connect(config)
    email_connection.select("INBOX")
    result, data = email_connection.uid("search", None, "ALL")
    if result == "OK":
        email_ids = data[0].split()
        print(f"{len(email_ids)}")
        return len(email_ids)
    else:
        return None

In [None]:
count_emails()

In [None]:
def count_emails_by_address():
    imaplib._MAXLINE = 10000000
    print("Connecting to " + config["username"] + "@icloud.com...")
    email_connection = connect(config)
    # Initialize a dictionary to store email counts
    email_counts = defaultdict(int)

    # Select the mailbox you want to delete in
    # If you want SPAM, use "INBOX.SPAM"
    email_connection.select("INBOX")

    # Search for specific mail
    result, data = email_connection.uid("search", None, "ALL")

    # If there is no email, exit
    error_count = 0
    if result == "OK":
        # Get the list of email IDs
        email_ids = data[0].split()
        for e_id in tqdm(email_ids, desc="Retrieving all emails", total=len(email_ids)):
            # Fetch the email body
            result, email_data = email_connection.uid("fetch", e_id, "(BODY[HEADER.FIELDS (FROM)])")
            try:
                raw_email = email_data[0][1]
                email_message = email.message_from_bytes(raw_email)
                from_addr = email.utils.parseaddr(email_message["From"])[1]
                # Increment the count for this email address
                email_counts[from_addr] += 1
            except Exception as e:
                print(f"{e}")
                # print(f"ERROR: Unable to convert from bytes - {raw_email} ")
                error_count += 1
        return email_counts, error_count
    else:
        return None

In [None]:
# NOTE - this is NOT working - seems to "crash" although no exception after ~2.5K emails out of 100K emails
# email_counts = count_emails_by_address()

In [None]:
# email_counts_df = pd.DataFrame(email_counts)

In [None]:
# email_counts_df.to_csv("email_counts.csv")

In [None]:
def connect_and_clean(config, target_emails, mailbox="INBOX"):
    imaplib._MAXLINE = 10000000
    print("Connecting to " + config["username"] + "@icloud.com...")
    email_connection = connect(config, mailbox)

    total_emails_count = 0
    for iemail, target_email in enumerate(
        tqdm(target_emails, desc="Total emails", total=len(target_emails))
    ):
        if not validate_input_email(target_email):
            print(f"The email '{target_email}' is not valid")
            continue

        if email_connection.check()[0] == "OK":
            emails = search_emails(email_connection, target_email)
            if emails is not None:
                emails_count = str(len(emails))
            else:
                emails_count = 0
            total_emails_for_target = int(emails_count)
            total_deleted_emails = 0

            while int(emails_count) > 0:
                # print(f"Found {emails_count} email(s) for {target_email}")

                for idx, e in tqdm(enumerate(emails), total=int(emails_count), desc=target_email):
                    # TODO: Add progress bar here - see

                    # The fetching of the email UID is required
                    # since the email ID may change between operations
                    # as specified by the IMAP standard
                    uid = fetch_uid(email_connection, e)
                    if uid is None:
                        print(
                            f"Email {str(total_deleted_emails + idx + 1)}/{str(total_emails_for_target)} was not valid"
                        )

                    else:
                        # print(
                        #    f"Deleted email {str(total_deleted_emails + idx + 1)}/{str(total_emails_for_target)}"
                        # )
                        try:
                            set_deleted(email_connection, uid)
                        except Exception as e:
                            print(e)
                            continue
                # Confirm the deletion of the messages
                email_connection.expunge()

                print(f"Deleted {emails_count} email(s) for {target_email}")
                print("Checking for remaining emails...")

                # Verify if there are any emails left on the server for
                # the target address. This is required to circumvent the
                # chunking of the search results by iCloud
                emails = search_emails(email_connection, target_email)
                if emails is not None:
                    emails_count = str(len(emails))
                else:
                    emails_count = 0
                total_deleted_emails = total_emails_for_target
                total_emails_for_target += int(emails_count)

            print(
                f"{iemail}# Cleanup for {target_email} was successful. Deleted {str(total_emails_for_target)} email(s)"
            )
            total_emails_count += total_emails_for_target

    print(f"The cleanup was successful. Deleted {str(total_emails_count)} email(s)")

    # Close the mailbox and logout
    email_connection.close()
    email_connection.logout()
    return total_emails_count

In [None]:
# if args.email is None:
#     target_emails = import_emails_from_file(args.file)
# else:
#     target_emails = [args.email]

In [None]:
TARGET_EMAILS_FILE = "target_email_address.txt"
target_emails = []
target_emails = list(dict.fromkeys(import_emails_from_file(TARGET_EMAILS_FILE)))
total_emails_count = connect_and_clean(config, target_emails)

In [None]:
total_emails_count

In [None]:
#! pdm list | grep tqdm
# tqdm                     │ 4.66.1

In [None]:
!pdm list | grep -E 'jupyter|notebook'

In [None]:
disp_md(f"INFO: Notebook execution started at: {start_time}\n")
exec_time = execution_time(start_time)