
#### Import packages

In [None]:
# Import necessary packages
import imaplib
import email
import os
import pandas as pd
from email.header import decode_header
import logging
import json
import yaml

In [None]:
# Setup basic configuration for logging
# This configuration logs informational and more severe messages with timestamps and severity level
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


### Loading of credentials for gmail account 

The following function loads the credentials for my gmail account from a yaml 

In [None]:
# Function to load credentials from a YAML file
def load_credentials(filepath):
    """
    Load user credentials from a YAML file for email login.
    
    Parameters:
        filepath (str): The path to the YAML file containing the credentials.
        
    Returns:
        tuple: Returns a tuple containing the username and password if successful.
        
    Raises:
        FileNotFoundError: If the YAML file cannot be found.
        ValueError: If credentials are not correctly formatted or are missing.
        yaml.YAMLError: If there is an error parsing the YAML.
    """
    try:
        with open(filepath, 'r') as file:
            content = file.read()
            credentials = yaml.load(content, Loader=yaml.FullLoader)
            user = credentials.get('user')
            password = credentials.get('password')
            
            if not user or not password:
                logging.error("User or password missing in the provided YAML file.")
                raise ValueError("Credentials not found or incomplete in yaml file.")
            return user, password
    except FileNotFoundError:
        logging.error("The specified YAML file was not found: {}".format(filepath))
        raise
    except yaml.YAMLError as e:
        logging.error("Error parsing YAML file: {}".format(e))
        raise

The following function connects to Gmail's server using the credentials that have just been loaded 

In [None]:

# Function to connect to Gmail's IMAP server
def connect_to_gmail_imap(user, password):
    """
    Connect to the Gmail IMAP server and log in using the provided credentials.
    
    Parameters:
        user (str): The username (email address) for Gmail.
        password (str): The password for the Gmail account.
        
    Returns:
        IMAP4_SSL: An imaplib IMAP4_SSL object with the 'Inbox' selected.
        
    Raises:
        imaplib.IMAP4.error: If there are issues during the login or selecting the inbox.
        Exception: For handling other unexpected errors.
    """
    imap_url = 'imap.gmail.com'
    try:
        my_mail = imaplib.IMAP4_SSL(imap_url)
        my_mail.login(user, password)
        my_mail.select('Inbox')
        logging.info("Connected to Gmail and selected Inbox successfully.")
        return my_mail
    except imaplib.IMAP4.error as e:
        logging.error("Error during IMAP login or Inbox selection: {}".format(e))
        raise
    except Exception as e:
        logging.error("Unexpected error: {}".format(e))
        raise


List of email addresses to delete the emails which has been added to a separate json file. 


In [None]:

# Function to load a list of email addresses from a JSON file
def load_email_list(filepath):
    """
    Load a list of email addresses to be processed from a JSON file.
    
    Parameters:
        filepath (str): The path to the JSON file containing email addresses.
        
    Returns:
        list: A list of email addresses.
    """
    with open(filepath, 'r') as file:
        data = json.load(file)
        return data['emails']


Number of emails per email address 

In [None]:

# Function to identify and flag emails for deletion based on sender address
def get_emails_to_delete(mail, filepath):
    """
    Search and mark emails for deletion from specified senders.
    
    Parameters:
        mail (IMAP4_SSL): The mail connection object.
        filepath (str): Path to the JSON file containing email addresses to be processed.
        
    Returns:
        DataFrame: A DataFrame containing email addresses and the count of emails marked for deletion.
    """
    list_of_emails = load_email_list(filepath)
    email_summary = pd.DataFrame(columns=['Email address', 'Number of email messages to be deleted'])

    for email_address in list_of_emails:
        try:
            status, messages = mail.search(None, f'FROM "{email_address}"')
            messages = messages[0].split()
            email_summary.loc[len(email_summary)] = [email_address, len(messages)]

            for msg_id in messages:
                mail.store(msg_id, "+FLAGS", "\\Deleted")
            logging.info(f"Marked {len(messages)} emails from {email_address} for deletion.")
        except imaplib.IMAP4.error as e:
            logging.error(f"Error processing emails from {email_address}: {e}")

    return email_summary

Finally run all the code and print the code

In [None]:

def main():
    """
    Main function to execute the program.
    
    This function handles the sequence of operations starting from loading credentials, 
    connecting to the Gmail IMAP server, loading a list of email addresses from a file, 
    marking emails for deletion based on those addresses, and finally cleaning up the IMAP session.
    
    The function uses a structured exception handling approach to manage errors that 
    might occur during the loading of credentials or IMAP operations.
    """
    # Path to the YAML file containing credentials for the Gmail account.
    credentials_path = 'Password_folders/gmail_access.yaml'
    
    try:
        # Attempt to load credentials from the specified YAML file.
        user, password = load_credentials(credentials_path)
    except Exception as e:
        # Handle any exceptions that occur during credential loading and exit the program.
        print("Failed to load credentials: {}".format(e))
        return  # Exit the function if credentials can't be loaded.

    # Assuming credentials are loaded successfully, establish an IMAP connection.
    try:
        mail = connect_to_gmail_imap(user, password)
    except Exception as e:
        # Handle possible exceptions during the connection to the IMAP server.
        print("Failed to connect to Gmail IMAP: {}".format(e))
        return  # Exit the function if the connection can't be established.

    # Path to the JSON file containing a list of email addresses to process.
    email_list_filepath = 'Password_folders/email_list.json'
    
    try:
        # Load the list and mark emails from specified senders for deletion.
        summary = get_emails_to_delete(mail, email_list_filepath)
        print(summary)  # Print the summary of operations.
    except Exception as e:
        # Handle possible exceptions from email processing.
        print("Failed to process emails: {}".format(e))

    # Always execute cleanup regardless of earlier errors.
    finally:
        # Clean up IMAP session: mark emails for deletion, close connection, and logout.
        try:
            mail.expunge()  # Permanently remove emails marked for deletion.
            mail.close()    # Close the currently selected mailbox.
            mail.logout()   # Logout from the server.
        except Exception as e:
            print("Failed during cleanup of IMAP session: {}".format(e))

if __name__ == "__main__":
    main()
