In [102]:
import win32com.client
import os
import pandas as pd

In [103]:
# Create a DataFrame to store the email log
email_log = pd.DataFrame(columns=['Serial', 'Sender', 'Receiver', 'Time', 'Subject', 'FileType', 'Attachment'])

'win32com.client' module in Python allows you to programmatically control Microsoft Office applications.

'Dispatch()', for creating an instance of an Office application.

In [104]:
# Access Outlook and retrieve the inbox folder
outlook = win32com.client.Dispatch("Outlook.Application")
mapi = outlook.GetNamespace("MAPI")
inbox = mapi.GetDefaultFolder(6)  # Inbox folder
messages = inbox.Items

In [105]:
serial_counter = 1  # Counter for serial numbers

#### Creating/Accessing Folder for all attachments (path mentioned here)

In [106]:
# Create a folder to save the attachments
attachments_folder = r"D:\GIKI\CAA_intern\attachments"
os.makedirs(attachments_folder, exist_ok=True)

In [107]:
# Iterate over unread emails, download attachments, mark as read, and update the log
for message in messages:
    if message.Unread:
        sender = message.SenderEmailAddress
        receiver = message.ReceivedByName
        time = message.ReceivedTime
        subject = message.Subject

        attachments_info = []  # List to store attachment information
        
        for attachment in message.Attachments:
            filename = attachment.FileName
            file_extension = os.path.splitext(filename)[1]
            new_filename = f"{serial_counter}_{filename}"
            filepath = os.path.abspath(os.path.join(attachments_folder, new_filename))
            attachment.SaveAsFile(filepath)
            
            attachments_info.append({
                'FileName': new_filename,
                'FileType': file_extension
            })

        # Mark the email as read
        message.UnRead = False
        
        # Add email and attachment information to the log
        for attachment_info in attachments_info:
            email_log = email_log.append({                   # Append method 
                'Serial': serial_counter,
                'Sender': sender,
                'Receiver': receiver,
                'Time': str(time),
                'Subject': subject,
                'FileType': attachment_info['FileType'],
                'Attachment': attachment_info['FileName']
            }, ignore_index=True)                           
            
            serial_counter += 1

  email_log = email_log.append({                   # Append method
  email_log = email_log.append({                   # Append method
  email_log = email_log.append({                   # Append method
  email_log = email_log.append({                   # Append method
  email_log = email_log.append({                   # Append method


FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  email_log = email_log.append({

Therefore this is the alternative in case that ever happens.. But I find the above append method more easy..

In [108]:
"""
# Add email and attachment information to the log
for attachment_info in attachments_info:
    email_entry = {
        'Serial': serial_counter,
        'Sender': sender,
        'Receiver': receiver,
        'Time': str(time),
        'Subject': subject,
        'FileType': attachment_info['FileType'],
        'Attachment': attachment_info['FileName']
    }
    email_log = pd.concat([email_log, pd.DataFrame([email_entry])], ignore_index=True)
    serial_counter += 1

"""

"\n# Add email and attachment information to the log\nfor attachment_info in attachments_info:\n    email_entry = {\n        'Serial': serial_counter,\n        'Sender': sender,\n        'Receiver': receiver,\n        'Time': str(time),\n        'Subject': subject,\n        'FileType': attachment_info['FileType'],\n        'Attachment': attachment_info['FileName']\n    }\n    email_log = pd.concat([email_log, pd.DataFrame([email_entry])], ignore_index=True)\n    serial_counter += 1\n\n"

In [109]:
email_log

Unnamed: 0,Serial,Sender,Receiver,Time,Subject,FileType,Attachment
0,1,/O=EXCHANGELABS/OU=EXCHANGE ADMINISTRATIVE GRO...,u2020337,2023-06-21 01:19:46.173000+00:00,BOP Summer Internship Program 2023,.pdf,1_INTERN Ad14553.pdf
1,2,owais.sajid002@gmail.com,u2020337,2023-06-22 10:59:30.422000+00:00,attachment test 1,.pdf,2_2020337_intern_CV.pdf
2,3,owais.sajid002@gmail.com,u2020337,2023-06-22 11:00:33.318000+00:00,attachment test 2,.docx,3_CV_assignment3.docx
3,4,owais.sajid002@gmail.com,u2020337,2023-06-23 11:25:04.207000+00:00,Attachment test 3,.docx,4_CV_assignment3.docx
4,5,owais.sajid002@gmail.com,u2020337,2023-06-23 11:25:04.207000+00:00,Attachment test 3,.png,4_fiverr-logo.png


#### Saving Log File (path mentioned here)

In [110]:
log_filepath = os.path.abspath(r"D:\GIKI\CAA_intern\email_log.xlsx")
email_log.to_excel(log_filepath, index=False)

### Sorting into seperate folders..

In [113]:
import os
import shutil

The separate folders for each extension will be created within the attachments_folder itself. 

The function will create a folder for each unique extension encountered and move the files with that extension into their respective folders within the attachments_folder.

In [114]:
def sort_attachments_by_extension(attachments_folder):
    # Get all files in the attachments folder
    files = os.listdir(attachments_folder)

    # Create a dictionary to store folders for each extension
    extension_folders = {}

    # Iterate over the files and identify their extensions
    for file in files:
        file_path = os.path.join(attachments_folder, file)
        
        # Exclude folders and files within extension folders from sorting 
        # ' .isfile() ' functions checks to make sure that it is a file ! 
        if not os.path.isfile(file_path) or os.path.dirname(file_path) != attachments_folder:  
            continue # Sorting restricted to main "attachments_folder" only.

        file_extension = os.path.splitext(file)[1]

        # Create a folder for the extension if it doesn't exist
        if file_extension not in extension_folders:
            extension_folder = os.path.join(attachments_folder, f"{file_extension}_folder")
            os.makedirs(extension_folder, exist_ok=True)
            extension_folders[file_extension] = extension_folder

        # Move the file to the respective extension folder
        destination_folder = extension_folders[file_extension]
        destination_path = os.path.join(destination_folder, file)
        shutil.move(file_path, destination_path)

In [116]:
sort_attachments_by_extension(attachments_folder)

______________________________________

creating sub-logs for each file extension folder that keeps the orignal appointed serial number.

In [118]:
# Create a list of unique file extensions from the main log data frame
unique_extensions = email_log['FileType'].unique()

# Create a sub-log for each file extension and store them as Excel files in respective folders
for extension in unique_extensions:
    # Filter the main log data frame based on the file extension
    sub_log = email_log[email_log['FileType'] == extension]
    
    # Create a folder for the extension if it doesn't exist
    folder_name = f"{extension}_folder"
    folder_path = os.path.join(attachments_folder, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    
    # Set the file path for the sub-log Excel file
    file_name = f"sub_log_{extension}.xlsx"
    file_path = os.path.join(folder_path, file_name)
    
    # Save the sub-log as an Excel file
    sub_log.to_excel(file_path, index=False)


Notes:

- get file sorting working ---- Done
- will have to access the webpage programmatically