In [1]:
import win32com.client
import os
import pandas as pd

In [2]:
# Create a DataFrame to store the email log
email_log = pd.DataFrame(columns=['EmailSerial', 'Sender', 'Receiver', 'Time', 'Subject'])
attachments_log = pd.DataFrame(columns=['EmailSerial', 'AttachmentSerial', 'FileType', 'RenamedAttachment'])

'win32com.client' module in Python allows you to programmatically control Microsoft Office applications.

'Dispatch()', for creating an instance of an Office application.

In [3]:
# Access Outlook and retrieve the inbox folder
outlook = win32com.client.Dispatch("Outlook.Application")
mapi = outlook.GetNamespace("MAPI")
inbox = mapi.GetDefaultFolder(6)  # Inbox folder
messages = inbox.Items

com_error: (-2147352567, 'Exception occurred.', (4096, 'Microsoft Outlook', 'The operation failed.', None, 0, -2147467259), None)

In [33]:
email_serial_counter = 1  # Counter for email serial numbers
attachment_serial_counter = 1  # Counter for attachment serial numbers

#### Creating/Accessing Folder for all attachments (path mentioned here)

In [34]:
# Create a folder to save the attachments
attachments_folder = r"D:\GIKI\CAA_intern\attachments"
os.makedirs(attachments_folder, exist_ok=True)

In [35]:
# Iterate over unread emails, download attachments, mark as read, and update the log
for message in messages:
    if message.Unread:
        sender = message.SenderEmailAddress
        receiver = message.ReceivedByName
        time = message.ReceivedTime
        subject = message.Subject
        
        # Add email information to the email log
        email_log = email_log.append({
            'EmailSerial': email_serial_counter,
            'Sender': sender,
            'Receiver': receiver,
            'Time': str(time),
            'Subject': subject
        }, ignore_index=True)

        attachments_info = []  # List to store attachment information

        for attachment in message.Attachments:
            filename = attachment.FileName
            file_extension = os.path.splitext(filename)[1]
            new_filename = f"{email_serial_counter}_{attachment_serial_counter}_{filename}"
            filepath = os.path.abspath(os.path.join(attachments_folder, new_filename))
            attachment.SaveAsFile(filepath)

            attachments_info.append({
                'EmailSerial': email_serial_counter,
                'AttachmentSerial': attachment_serial_counter,
                'FileType': file_extension,
                'RenamedAttachment': new_filename
            })

            attachment_serial_counter += 1

        # Mark the email as read
        message.UnRead = False

        # Add attachment information to the attachment log
        for attachment_info in attachments_info:
            attachments_log = attachments_log.append(attachment_info, ignore_index=True)

        email_serial_counter += 1

  email_log = email_log.append({
  attachments_log = attachments_log.append(attachment_info, ignore_index=True)


In [36]:
email_log

Unnamed: 0,EmailSerial,Sender,Receiver,Time,Subject
0,1,/O=EXCHANGELABS/OU=EXCHANGE ADMINISTRATIVE GRO...,u2020337,2023-06-27 13:31:57.139000+00:00,HEC - Coursera: Cohort I


In [37]:
attachments_log

Unnamed: 0,EmailSerial,AttachmentSerial,FileType,RenamedAttachment
0,1,1,.jpg,1_1_HEC-DLSE Cohort I.jpg


#### Saving Log File (path mentioned here)

In [38]:
# Save the email log and attachments log to separate sheets in an Excel file
log_filepath = os.path.abspath(r"D:\GIKI\CAA_intern\email_log_with_attachments.xlsx")
with pd.ExcelWriter(log_filepath) as writer:
    email_log.to_excel(writer, sheet_name='EmailLog', index=False)
    attachments_log.to_excel(writer, sheet_name='AttachmentsLog', index=False)

### Sorting into seperate folders..

In [39]:
import os
import shutil

The separate folders for each extension will be created within the attachments_folder itself. 

The function will create a folder for each unique extension encountered and move the files with that extension into their respective folders within the attachments_folder.

In [40]:
def sort_attachments_by_extension(attachments_folder):
    # Get all files in the attachments folder
    files = os.listdir(attachments_folder)

    # Create a dictionary to store folders for each extension
    extension_folders = {}

    # Iterate over the files and identify their extensions
    for file in files:
        file_path = os.path.join(attachments_folder, file)
        
        # Exclude folders and files within extension folders from sorting 
        # ' .isfile() ' functions checks to make sure that it is a file ! 
        if not os.path.isfile(file_path) or os.path.dirname(file_path) != attachments_folder:  
            continue # Sorting restricted to main "attachments_folder" only.

        file_extension = os.path.splitext(file)[1]

        # Create a folder for the extension if it doesn't exist
        if file_extension not in extension_folders:
            extension_folder = os.path.join(attachments_folder, f"{file_extension}_folder") # making of sub-folders
            os.makedirs(extension_folder, exist_ok=True)
            extension_folders[file_extension] = extension_folder

        # Move the file to the respective extension folder
        destination_folder = extension_folders[file_extension]
        destination_path = os.path.join(destination_folder, file)
        shutil.move(file_path, destination_path)

In [41]:
sort_attachments_by_extension(attachments_folder)

Notes:

- get file sorting working ---- Done
- will have to access the webpage programmatically