In [1]:
# Pulling in second set of emails

from dotenv import load_dotenv
import os
import pandas as pd

load_dotenv()

True

In [2]:
# Set the email variables needed

email = os.environ.get('SECOND_EMAIL')
password = os.environ.get('SECOND_PASSWORD')

In [3]:
def imap_login(address=None, password=None):
    '''Wrapper for logging into to email through IMAP
    
    ARGS: 
    address - str (defaul: None, prompt input). Email address 
    being connected to.
    
    password - str (default: None, prompt input). Password for email address.
    
    Returns:
    Mail object connected to corresponding server for email address'''
    
    import imaplib
    
    if not address:
        address = input('Enter you email address: ')
        
    if not password:
        password = input('Enter your password')
        
    if 'gmail' in address:
        SMTP_SERVER = 'imap.gmail.com'
    elif 'yahoo' in password:
        SMTP_SERVER = 'imap.mail.yahoo.com'
    else:
        raise NameError('Please enter a gmail or yahoo email address')
        
    SMTP_PORT = 993
    try:
        mail = imaplib.IMAP4_SSL(SMTP_SERVER)
        mail.login(address, password)
        mail.select('inbox')
    except Exception as e:
        raise
        
    return mail

In [10]:
def search_mailbox(mail, inbox='inbox'):
    """Connects to mailbox and collects a list of ids from mailbox
    
    ARGS:
    mail - logged in mail object
    
    inbox - str (defauls: 'inbox'). Mailbox to connect to. Must be valid
    imap mailbox.
    
    Returns:
    tup (mail object, list of mail_ids)
    If you don't need the ids, you can use an underscore like so:
    mail, _ = search_mailbox(mail)"""
    
    import imaplib
    
    mail.select(inbox)
    
    typ, data = mail.search(None, 'ALL')
    
    mail_ids = data[0].decode()
    mail_ids = mail_ids.split()
    
    return mail, mail_ids

In [11]:
def save_mail(mail, i_d, filename='email_data.csv', verbose=False):
    """Writes email data to csv
    
    ARGS: 
    mail - logged in mail object
    
    i_d - list of i_ds
    ids of messages to get
    
    filename - string ending in .csv (default: 'email_data.csv')
    name of file to write to 
    
    Returns: None, saves data to csv"""
    
    import imaplib
    import email
    import csv
    
    csv_file = open(filename, 'w', encoding='UTF-8')
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['id', 'uid', 'from_', 'subject', 'msg', 'content_type'])
    
    for i in i_d:
        try:
            typ, data = mail.fetch(str(i).encode(), '(UID RFC822)')

            uid = email.message_from_bytes(data[0][0])
            uid = uid.get_payload()
            uid = uid.split()[-3]

            meta = email.message_from_bytes(data[0][1])
            from_ = meta['From']
            subject = meta['Subject']
            content_type = meta['Content-Type'].split(';')[0]
            
            msg = meta.get_payload()
            while type(msg) != str:
                msg = msg[0].get_payload()
            
            print(i)
            if verbose:
                print('UID: ', uid)
                print('From: ', from_)
                print('Subject: ', subject)
                print('Content-Type: ', content_type)
                print('Message: ', msg)
            csv_writer.writerow([i, uid, from_, subject, msg, content_type])
            print('Message saved')
        except Exception as e:
            print(e)

In [13]:
mail = imap_login(address=email, password=password)

inbox = search_mailbox(mail)

In [16]:
len(inbox[1])

43657

In [18]:
save_mail(mail, range(42657, 43657), filename='Email_data/second_email.csv')

42657
Message saved
42658
Message saved
42659
Message saved
42660
Message saved
42661
Message saved
42662
Message saved
42663
Message saved
42664
Message saved
42665
Message saved
42666
Message saved
42667
Message saved
42668
Message saved
42669
Message saved
42670
Message saved
42671
Message saved
42672
Message saved
42673
Message saved
42674
Message saved
42675
Message saved
42676
Message saved
42677
Message saved
42678
Message saved
42679
Message saved
42680
Message saved
42681
Message saved
42682
Message saved
42683
Message saved
42684
Message saved
42685
Message saved
42686
Message saved
42687
Message saved
42688
Message saved
42689
Message saved
42690
Message saved
42691
Message saved
42692
Message saved
42693
Message saved
42694
Message saved
42695
Message saved
42696
Message saved
42697
Message saved
42698
Message saved
42699
Message saved
42700
Message saved
42701
Message saved
42702
Message saved
42703
Message saved
42704
Message saved
42705
Message saved
42706
Message saved
