# Loading and Modeling Mail Data

In [16]:
import mailbox
import pandas as pd

# Define the path to your mbox file
mbox_file_path = 'All mail Including Spam and Trash.mbox'

# Function to parse the mbox file and return a list of dictionaries
def load_mbox_to_list(mbox_file_path):
    mbox = mailbox.mbox(mbox_file_path)
    messages = []
    for message in mbox:
        msg_data = {
            'Subject': message['subject'],
            'From': message['from'],
            'To': message['to'],
            'Date': message['date'],
            'Message_ID': message['message-id'],
            'Body': message.get_payload(),
        }
        messages.append(msg_data)
    return messages

# Load mbox data into a list of dictionaries
email_data = load_mbox_to_list(mbox_file_path)


# Create a Pandas DataFrame from the list of dictionaries
df = pd.DataFrame(email_data)

# Filtering spam emails
spam_keywords = ['spam', 'viagra', 'lottery']
spam_filter = df['Subject'].str.contains('|'.join(spam_keywords), case=False, na=False)
spam_emails = df[spam_filter]
print("---------------- Spam Emails:")
print(spam_emails)

# Count the number of emails from each sender
sender_counts = df['From'].value_counts()
print("\n ---------------- Sender Counts:")
print(sender_counts)

# Count the number of emails containing specific keywords (e.g., "meeting")
meeting_keywords = ['couresa', 'linkedIn']
meeting_filter = df['Subject'].str.contains('|'.join(meeting_keywords), case=False, na=False)
meeting_emails = df[meeting_filter]
print("\n---------------- Coursea and  Emails:")
print(meeting_emails)




---------------- Spam Emails:
Empty DataFrame
Columns: [Subject, From, To, Date, Message_ID, Body]
Index: []

 ---------------- Sender Counts:
From
LinkedIn Job Alerts <jobalerts-noreply@linkedin.com>                                123
Ei Ei <eiei.cumyeik@gmail.com>                                                       40
"thanbula.21" <thanbula.21@gmail.com>                                                31
"Ee.lay" <eiei.cumyeik@gmail.com>                                                    28
"yokelzinwai@gmail.com" <yokelzinwai@gmail.com>                                      23
                                                                                   ... 
"chatdoelay@gmail.com" <chatdoelay@gmail.com>                                         1
"Scrum Master Certification: Scrum Methodologies" <no-reply@t.mail.coursera.org>      1
"Introduction to Scrum Master Training" <no-reply@t.mail.coursera.org>                1
"myatkay2010@gmail.com" <myatkay2010@gmail.com>             