In [1]:
def imap_login(address=None, password=None):
    '''Wrapper for logging into to email through IMAP
    
    ARGS: 
    address - str (defaul: None, prompt input). Email address 
    being connected to.
    
    password - str (default: None, prompt input). Password for email address.
    
    Returns:
    Mail object connected to corresponding server for email address'''
    
    import imaplib
    
    if not address:
        address = input('Enter you email address: ')
        
    if not password:
        password = input('Enter your password')
        
    if 'gmail' in address:
        SMTP_SERVER = 'imap.gmail.com'
    elif 'yahoo' in password:
        SMTP_SERVER = 'imap.mail.yahoo.com'
    else:
        raise NameError('Please enter a gmail or yahoo email address')
        
    SMTP_PORT = 993
    try:
        mail = imaplib.IMAP4_SSL(SMTP_SERVER)
        mail.login(address, password)
        mail.select('inbox')
    except Exception as e:
        raise
        
    return mail

In [2]:
def search_mailbox(mail, inbox='inbox'):
    """Connects to mailbox and collects a list of ids from mailbox
    
    ARGS:
    mail - logged in mail object
    
    inbox - str (defauls: 'inbox'). Mailbox to connect to. Must be valid
    imap mailbox.
    
    Returns:
    tup (mail object, list of mail_ids)
    If you don't need the ids, you can use an underscore like so:
    mail, _ = search_mailbox(mail)"""
    
    import imaplib
    
    mail.select(inbox)
    
    typ, data = mail.search(None, 'ALL')
    
    mail_ids = data[0].decode()
    mail_ids = mail_ids.split()
    
    return mail, mail_ids

In [3]:
def print_mail(mail, i_d=None, mail_part='(RFC822)'):
    """Prints out mail messages to screen
    
    ARGS:
    mail - logged in mail object
    
    i_d - single id or list of ids
    id(s) of email(s) to print
    
    Returns: None; prints message to screen"""
    
    import imaplib
    import email
    
    if type(i_d) == list:
        for i in i_d:
            typ, data = mail.fetch(str(i).encode(), mail_part)
            meta = email.message_from_bytes(data[0][1])
            print(meta)
    
    else:
        typ, data = mail.fetch(str(i_d).encode(), mail_part)
        meta = email.message_from_bytes(data[0][1])
        print(meta)

In [4]:
def save_mail(mail, i_d, filename='email_data.csv', verbose=False):
    """Writes email data to csv
    
    ARGS: 
    mail - logged in mail object
    
    i_d - list of i_ds
    ids of messages to get
    
    filename - string ending in .csv (default: 'email_data.csv')
    name of file to write to 
    
    Returns: None, saves data to csv"""
    
    import imaplib
    import email
    import csv
    
    csv_file = open(filename, 'w', encoding='UTF-8')
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['id', 'uid', 'from_', 'subject', 'msg', 'content_type'])
    
    for i in i_d:
        try:
            typ, data = mail.fetch(str(i).encode(), '(UID RFC822)')

            uid = email.message_from_bytes(data[0][0])
            uid = uid.get_payload()
            uid = uid.split()[-3]

            meta = email.message_from_bytes(data[0][1])
            from_ = meta['From']
            subject = meta['Subject']
            content_type = meta['Content-Type'].split(';')[0]
            
            msg = meta.get_payload()
            while type(msg) != str:
                msg = msg[0].get_payload()
#             if type(msg) == list:
#                 if len(msg) == 3:
#                     msg = msg[0].get_payload()[0].get_payload()
#                 else:
#                     msg = msg[0].get_payload()
            
            print(i)
            if verbose:
                print('UID: ', uid)
                print('From: ', from_)
                print('Subject: ', subject)
                print('Content-Type: ', content_type)
                print('Message: ', msg)
            csv_writer.writerow([i, uid, from_, subject, msg, content_type])
            print('Message saved')
        except Exception as e:
            print(e)

In [5]:
def clean_emails(df, drop_html=True):
    """Cleans our emails from csv (assumes columns of save_mail func)
    
    ARGS: pandas dataframe
    dataframe from csv with columns of save_mail()
    
    Returns:
    Dataframe with emails cleaned up"""
    
    df['content_type'] = df['content_type'].map(lambda x: 'multipart/alternative' if x == 'Multipart/Alternative' else x)
    
    df = df.dropna(subset=['msg'])
    
    if drop_html:
        mask = df['msg'].str.startswith('<')
        df = df[~mask]
        
    df['msg'] = df['msg'].apply(lambda x: ''.join(x.split('=\r\n')))
    df['msg'] = df['msg'].apply(lambda x: ''.join(x.split('=E2=80=99')))
    df['msg'] = df['msg'].apply(lambda x: ' '.join(x.split('=0A')))
    df['msg'] = df['msg'].apply(lambda x: ' '.join(x.split('=0D')))
    df['msg'] = df['msg'].apply(lambda x: ' '.join(x.split('\r\n')))
    df['msg'] = df['msg'].apply(lambda x: ' '.join(x.split('\n')))
    df['msg'] = df['msg'].apply(lambda x: ' '.join(x.split('\r')))
    df['msg'] = df['msg'].apply(lambda x: ' '.join(x.split('\t')))
    return df

#### For personal email

In [6]:
mailbox = imap_login('Samuelithian@gmail.com', 'fcsqfaevpyotaaxa')

In [7]:
mail, mail_ids = search_mailbox(mailbox)

In [8]:
save_mail(mail, mail_ids)

1
Message saved
2
Message saved
3
Message saved
4
Message saved
5
Message saved
6
Message saved
7
Message saved
8
Message saved
9
Message saved
10
Message saved
11
Message saved
12
Message saved
13
Message saved
14
Message saved
15
Message saved
16
Message saved
17
Message saved
18
Message saved
19
Message saved
20
Message saved
21
Message saved
22
Message saved
23
Message saved
24
Message saved
25
Message saved
26
Message saved
27
Message saved
28
Message saved
29
Message saved
30
Message saved
31
Message saved
32
Message saved
33
Message saved
34
Message saved
35
Message saved
36
Message saved
37
Message saved
38
Message saved
39
Message saved
40
Message saved
41
Message saved
42
Message saved
43
Message saved
44
Message saved
45
Message saved
46
Message saved
47
Message saved
48
Message saved
49
Message saved
50
Message saved
51
Message saved
52
Message saved
53
Message saved
54
Message saved
55
Message saved
56
Message saved
57
Message saved
58
Message saved
59
Message saved
60
Mes

In [9]:
import pandas as pd

emails = pd.read_csv('email_data.csv')
df = clean_emails(emails)

df[df['id'] == 225]

Unnamed: 0,id,uid,from_,subject,msg,content_type
224,225,13504,norman hepner <normstormin@gmail.com>,Re: Send me your address please.,"ok, but what building and room number are you...",multipart/alternative


In [10]:
sam_tags = pd.read_csv('personal-csv.csv')

sam_tags[sam_tags['keys'] == 225]

Unnamed: 0,keys,from/to,subject,body,tags,New
515,225,norman hepner <normstormin@gmail.com>,Re: Send me your address please.,--001a1148300220a45e053eb7031d Content-Type: t...,['Personal'],Personal


In [11]:
sam_tags = sam_tags.drop(columns=['from/to', 'subject', 'body', 'tags'])
sam_tags.head()

Unnamed: 0,keys,New
0,748,Personal
1,747,other
2,745,Personal
3,742,Entertainment
4,740,Entertainment


In [12]:
new_df = df.merge(sam_tags, how='left', left_on='id', right_on='keys')
new_df.head()

Unnamed: 0,id,uid,from_,subject,msg,content_type,keys,New
0,1,2729,norman hepner <normstormin@gmail.com>,mac and cheese,http://shine.yahoo.com/channel/food/tips-for-m...,text/plain,,
1,2,3803,norman hepner <normstormin@gmail.com>,Fwd: View Your USAA Credit Card Bill Online,---------- Forwarded message ---------- From:...,multipart/alternative,2.0,"Personal, Finance"
2,3,3804,norman hepner <normstormin@gmail.com>,usaa account info,samuelithian Rockstar11! PIN: 7253,multipart/alternative,3.0,"Personal, Finance"
3,4,3903,"""Mattson, Kelly"" <KellyMattson@selah.k12.wa.us>","RE: From Jared, Mackenzie, Jordan and Sam.","My Thespian Friends, Your per...",multipart/mixed,4.0,Personal
4,5,4034,Thomas Hepner <thep3333@gmail.com>,Samuel College,Here are my recommendations for college stuff....,multipart/mixed,5.0,"Personal, Productivity"


In [13]:
new_df.at[0, 'keys'] = 1
new_df.at[0, 'New'] = 'Personal, other'

new_df.iloc[0]

id                                                              1
uid                                                          2729
from_                       norman hepner <normstormin@gmail.com>
subject                                            mac and cheese
msg             http://shine.yahoo.com/channel/food/tips-for-m...
content_type                                           text/plain
keys                                                            1
New                                               Personal, other
Name: 0, dtype: object

#### For professional email

In [14]:
def mail_save(mail, i_d, filename='email_data(2).csv', verbose=False):
    """Writes email data to csv
    
    ARGS: 
    mail - logged in mail object
    
    i_d - list of i_ds
    ids of messages to get
    
    filename - string ending in .csv (default: 'email_data.csv')
    name of file to write to 
    
    Returns: None, saves data to csv"""
    
    import imaplib
    import email
    import csv
    
    csv_file = open(filename, 'w', encoding='UTF-8')
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['id', 'uid', 'from_', 'subject', 'msg', 'content_type'])
    
    for i in i_d:
        try:
            typ, data = mail.fetch(str(i).encode(), '(UID RFC822)')

            uid = email.message_from_bytes(data[0][0])
            uid = uid.get_payload()
            uid = uid.split()[-3]

            meta = email.message_from_bytes(data[0][1])
            from_ = meta['From']
            subject = meta['Subject']
            content_type = meta['Content-Type'].split(';')[0]
            
            msg = meta.get_payload()
            while type(msg) != str:
                msg = msg[0].get_payload()
#             if type(msg) == list:
#                 if len(msg) == 3:
#                     msg = msg[0].get_payload()[0].get_payload()
#                 else:
#                     msg = msg[0].get_payload()
            
            print(i)
            if verbose:
                print('UID: ', uid)
                print('From: ', from_)
                print('Subject: ', subject)
                print('Content-Type: ', content_type)
                print('Message: ', msg)
            csv_writer.writerow([i, uid, from_, subject, msg, content_type])
            print('Message saved')
        except Exception as e:
            print(e)

In [15]:
mailbox = imap_login('Samuelhepner03@gmail.com', 'lhalptrbmgugsrhf')

In [16]:
mail, mail_ids = search_mailbox(mailbox)

In [17]:
mail_save(mail, mail_ids)

1
Message saved
2
Message saved
3
Message saved
4
Message saved
5
Message saved
6
Message saved
7
Message saved
8
Message saved
9
Message saved
10
Message saved
11
Message saved
12
Message saved
13
Message saved
14
Message saved
15
Message saved
16
Message saved
17
Message saved
18
Message saved
19
Message saved
20
Message saved
21
Message saved
22
Message saved
23
Message saved
24
Message saved
25
Message saved
26
Message saved
27
Message saved
28
Message saved
29
Message saved
30
Message saved
31
Message saved
32
Message saved
33
Message saved
34
Message saved
35
Message saved
36
Message saved
37
Message saved
38
Message saved
39
Message saved
40
Message saved
41
Message saved
42
Message saved
43
Message saved
44
Message saved
45
Message saved
46
Message saved
47
Message saved
48
Message saved
49
Message saved
50
Message saved
51
Message saved
52
Message saved
53
Message saved
54
Message saved
55
Message saved
56
Message saved
57
Message saved
58
Message saved
59
Message saved
60
Mes

In [18]:
import pandas as pd

emails = pd.read_csv('email_data(2).csv')
df2 = clean_emails(emails)

df2.head()

Unnamed: 0,id,uid,from_,subject,msg,content_type
0,1,1211,"""Brown, Joshua J MSgt USAF (US)"" <joshua.j.bro...",RE: [Non-DoD Source] Re: Air Guard,"Samuel, =20 I might have a position for ...",multipart/signed
2,3,1484,"""Brown, Joshua J MSgt USAF (US)"" <joshua.j.bro...",RE: [Non-DoD Source] Re: Air Guard,Here you go. =20 You can also google tha...,multipart/signed
3,4,1492,"""Brown, Joshua J MSgt USAF (US)"" <joshua.j.bro...",RE: [Non-DoD Source] Info about open position,"Samuel, I just sent a PDF with no link...",multipart/signed
4,5,1494,"""Brown, Joshua J MSgt USAF (US)"" <joshua.j.bro...",RE: [Non-DoD Source] Info about open position,Copy its here in Montana with our communicatio...,multipart/signed
5,6,1708,noreply@afrisstf.csd.disa.mil,Air Force Reserve Information,Thank you for inquiring about the Air Force Re...,multipart/signed


In [19]:
sam_other = pd.read_csv('other-sam.csv')
sam_other.head()

Unnamed: 0,keys,from/to,subject,body,tags,New
0,59,Google <no-reply@accounts.google.com>,App password created,--00000000000002379205972971fd Content-Type: t...,['Security'],Productivity
1,57,Google <no-reply@accounts.google.com>,2-Step Verification turned on,--000000000000d50ed3059729649c Content-Type: t...,Security'],Productivity
2,56,"""Medium Daily Digest"" <noreply@medium.com>",How To Wake Up at 5 A.M. Every Day | Bryan Ye ...,--337b1df667177cb513899ad44e31fb8d4f8637575996...,"['Promotions', 'Productivity']",Entertainment
3,55,"""no-reply-aws@amazon.com"" <no-reply-aws@amazon...",RE:[CASE 6570793521] Limit Increase: SageMaker,------=_Part_26869745_214056034.1573352866590 ...,['Productivity'],Productivity
4,54,Amazon Web Services <aws-marketing-email-repli...,Thank you for attending AWS Machine Learning W...,------=_Part_-1563787701_1894373630.1573225596...,"['Events', 'Productivity']","Events, Productivity"


In [20]:
other_tags = sam_other.drop(columns=['from/to', 'subject', 'body', 'tags'])
other_tags.head()

Unnamed: 0,keys,New
0,59,Productivity
1,57,Productivity
2,56,Entertainment
3,55,Productivity
4,54,"Events, Productivity"


In [21]:
new_df2 = df2.merge(other_tags, how='left', left_on='id', right_on='keys')
# new_df2.tail(50)

In [22]:
tables = [new_df, new_df2]
result = pd.concat(tables)

In [23]:
result['keys'] = result['keys'].fillna(result['id'])
result.tail()

Unnamed: 0,id,uid,from_,subject,msg,content_type,keys,New
85,91,3820,Jasmine Logan <invitations@linkedin.com>,"Hi Samuel, please add me to your professional ...","Hi Samuel, I'd like to join your LinkedIn n...",multipart/alternative,91.0,
86,92,3821,Amazon Web Services <aws-marketing-email-repli...,=?UTF-8?Q?=E2=9A=A1_Recent_Announcements_from_...,A Weekly Review from AWS Featured Announcem...,multipart/alternative,92.0,
87,93,3822,Amazon Web Services <aws-marketing-email-repli...,Watch re:Invent keynotes live,"AWS re:Invent December 2 =E2=80=93 6, 2019=20...",multipart/alternative,93.0,
88,94,3823,Trello <do-not-reply@trello.com>,5 new notifications on Tagger Smarter Email si...,Here's what you missed on Trello. avrahamja...,multipart/alternative,94.0,
89,95,3824,"""Research from data.world"" <research@data.world>",Webinar tomorrow! Save your seat now.,Learn how Yonder successfully built their cata...,multipart/alternative,95.0,


In [24]:
result['New'] = result['New'].fillna('-')
result.tail(100)

Unnamed: 0,id,uid,from_,subject,msg,content_type,keys,New
741,754,49133,quip <refills@getquip.com>,Refill processing! Please review address and w...,*|SUBJECT|* <https://www.getquip.com/...,multipart/alternative,754.0,-
742,755,49148,CD Baby <no-reply@cdbaby.com>,SAMUEL - Your CD Baby Order! SO06582499,SAMUEL- Thanks for your order with CD Baby!...,multipart/alternative,755.0,-
743,756,49149,"""Steam"" <noreply@steampowered.com>","Halo: The Master Chief Collection, Upcoming La...","Hello samuelithian, You are receiving this ...",multipart/alternative,756.0,-
744,757,49167,quip <refills@getquip.com>,"Refill Shipped! Learn how to refill, refresh a...",*|SUBJECT|* <https://www.getquip.com/...,multipart/alternative,757.0,-
745,758,49189,Twitter <info@twitter.com>,"squeakadeeks shared ""Buy Squeakadeeks a Coffee...","@samuel_nest, see what's happening in the worl...",multipart/alternative,758.0,-
746,759,49191,Twitter <info@twitter.com>,"Lambda School shared ""Announcing New Mental He...","@samuel_nest, see what's happening in the worl...",multipart/alternative,759.0,-
747,760,49193,Twitter <notify@twitter.com>,"@samuel_nest, check out the notifications you ...",Twitter > https://twitter.com You've got ...,multipart/alternative,760.0,-
748,761,49205,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Camp Dudley Summer 2019,Hey everyone Winter camp is Jan 3-5. I would l...,multipart/alternative,761.0,-
749,762,49213,Twitter <info@twitter.com>,=?UTF-8?Q?SHiFT_&amp;_VIP_Codes_=F0=9F=97=9D_s...,"@samuel_nest, see what's happening in the worl...",multipart/alternative,762.0,-
750,763,49223,"""Dark Horse Direct"" <direct@darkhorse.com>",New Product Announcement!,"You could win the largest, most powerful vesse...",multipart/alternative,763.0,-


In [25]:
result = result.drop(columns=['keys', 'id'])

In [26]:
import csv

In [27]:
result = result.reset_index(drop=True)
result.tail(100)

Unnamed: 0,uid,from_,subject,msg,content_type,New
741,49133,quip <refills@getquip.com>,Refill processing! Please review address and w...,*|SUBJECT|* <https://www.getquip.com/...,multipart/alternative,-
742,49148,CD Baby <no-reply@cdbaby.com>,SAMUEL - Your CD Baby Order! SO06582499,SAMUEL- Thanks for your order with CD Baby!...,multipart/alternative,-
743,49149,"""Steam"" <noreply@steampowered.com>","Halo: The Master Chief Collection, Upcoming La...","Hello samuelithian, You are receiving this ...",multipart/alternative,-
744,49167,quip <refills@getquip.com>,"Refill Shipped! Learn how to refill, refresh a...",*|SUBJECT|* <https://www.getquip.com/...,multipart/alternative,-
745,49189,Twitter <info@twitter.com>,"squeakadeeks shared ""Buy Squeakadeeks a Coffee...","@samuel_nest, see what's happening in the worl...",multipart/alternative,-
746,49191,Twitter <info@twitter.com>,"Lambda School shared ""Announcing New Mental He...","@samuel_nest, see what's happening in the worl...",multipart/alternative,-
747,49193,Twitter <notify@twitter.com>,"@samuel_nest, check out the notifications you ...",Twitter > https://twitter.com You've got ...,multipart/alternative,-
748,49205,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Camp Dudley Summer 2019,Hey everyone Winter camp is Jan 3-5. I would l...,multipart/alternative,-
749,49213,Twitter <info@twitter.com>,=?UTF-8?Q?SHiFT_&amp;_VIP_Codes_=F0=9F=97=9D_s...,"@samuel_nest, see what's happening in the worl...",multipart/alternative,-
750,49223,"""Dark Horse Direct"" <direct@darkhorse.com>",New Product Announcement!,"You could win the largest, most powerful vesse...",multipart/alternative,-


In [28]:
import numpy as np

result['New'].replace('-', np.nan, inplace=True)
result.tail(100)

Unnamed: 0,uid,from_,subject,msg,content_type,New
741,49133,quip <refills@getquip.com>,Refill processing! Please review address and w...,*|SUBJECT|* <https://www.getquip.com/...,multipart/alternative,
742,49148,CD Baby <no-reply@cdbaby.com>,SAMUEL - Your CD Baby Order! SO06582499,SAMUEL- Thanks for your order with CD Baby!...,multipart/alternative,
743,49149,"""Steam"" <noreply@steampowered.com>","Halo: The Master Chief Collection, Upcoming La...","Hello samuelithian, You are receiving this ...",multipart/alternative,
744,49167,quip <refills@getquip.com>,"Refill Shipped! Learn how to refill, refresh a...",*|SUBJECT|* <https://www.getquip.com/...,multipart/alternative,
745,49189,Twitter <info@twitter.com>,"squeakadeeks shared ""Buy Squeakadeeks a Coffee...","@samuel_nest, see what's happening in the worl...",multipart/alternative,
746,49191,Twitter <info@twitter.com>,"Lambda School shared ""Announcing New Mental He...","@samuel_nest, see what's happening in the worl...",multipart/alternative,
747,49193,Twitter <notify@twitter.com>,"@samuel_nest, check out the notifications you ...",Twitter > https://twitter.com You've got ...,multipart/alternative,
748,49205,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Camp Dudley Summer 2019,Hey everyone Winter camp is Jan 3-5. I would l...,multipart/alternative,
749,49213,Twitter <info@twitter.com>,=?UTF-8?Q?SHiFT_&amp;_VIP_Codes_=F0=9F=97=9D_s...,"@samuel_nest, see what's happening in the worl...",multipart/alternative,
750,49223,"""Dark Horse Direct"" <direct@darkhorse.com>",New Product Announcement!,"You could win the largest, most powerful vesse...",multipart/alternative,


In [30]:
result.dropna(subset=['New'], inplace=True)
result.tail(100)

Unnamed: 0,uid,from_,subject,msg,content_type,New
688,31898,Mathnasium of Bothell <bothell@mathnasium.com>,"Instructor Schedule - 6/03 to 6/14, PLEASE VER...","All, Please verify your assigned schedule and...",multipart/alternative,"Productivity, Events"
689,31905,Mathnasium of Bothell <bothell@mathnasium.com>,"Re: Instructor Schedule - 6/03 to 6/14, PLEASE...",*PLEASE SEE UPDATED SCHEDULE.* - *Rem...,multipart/alternative,"Productivity, Events"
690,31953,Kyle McPherson <kmcpherson@yakimaymca.org>,Re: Introduction video,It actually says video unavailable. _________...,multipart/alternative,"Personal, Productivity"
691,31956,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Introduction video,SSBkaWQgZ2V0IHRoZSBlbWFpbCBmcm9tIHlvdSB0dWJlIG...,multipart/related,"Personal, Productivity"
692,31957,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Introduction video,SG1tIEkgYXBwcmVjaWF0ZSB0aGUgZWZmb3J0IGJ1dCBpdC...,multipart/related,"Personal, Productivity"
693,31958,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Introduction video,SGFoYSB0aGlzIGlzIGdldHRpbmcgZnVubnkuIEkgYW0gc2...,multipart/related,"Personal, Productivity"
694,31959,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Introduction video,T29wcyBuZXZlciBtaW5kIEkgZ290IGl0Lg0KDQpGcm9tOi...,multipart/related,"Personal, Productivity"
695,31960,Kyle McPherson <kmcpherson@yakimaymca.org>,RE: Introduction video,SXTigJlzIGF3ZXNvbWUgdGhhbmsgeW91Lg0KDQpGcm9tOi...,multipart/related,"Personal, Productivity"
696,32027,Kathy Mitchell <kmm1101@uw.edu>,action required: Peer Facilitator hiring proc...,Hi all. Please complete the following requi...,multipart/related,"Personal, Productivity"
697,32052,Kathy Mitchell <kmm1101@uw.edu>,Re: Student employee form,"Hi Sam. sorry! yes, I did receive your for...",multipart/alternative,"Personal, Productivity"


In [31]:
csv_file = open('all-emails.csv', 'w', encoding='UTF-8')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['uid', 'from_', 'subject', 'msg', 'content_type', 'New'])
for i in result.index:
    csv_writer.writerow([result['uid'][i], result['from_'][i], result['subject'][i], result['msg'][i], result['content_type'][i], result['New'][i]])
    print('Message saved.')

Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message saved.
Message sa

In [33]:
emails = pd.read_csv('all-emails.csv')
emails.head(100)

Unnamed: 0,uid,from_,subject,msg,content_type,New
0,2729,norman hepner <normstormin@gmail.com>,mac and cheese,http://shine.yahoo.com/channel/food/tips-for-m...,text/plain,"Personal, other"
1,3803,norman hepner <normstormin@gmail.com>,Fwd: View Your USAA Credit Card Bill Online,---------- Forwarded message ---------- From:...,multipart/alternative,"Personal, Finance"
2,3804,norman hepner <normstormin@gmail.com>,usaa account info,samuelithian Rockstar11! PIN: 7253,multipart/alternative,"Personal, Finance"
3,3903,"""Mattson, Kelly"" <KellyMattson@selah.k12.wa.us>","RE: From Jared, Mackenzie, Jordan and Sam.","My Thespian Friends, Your per...",multipart/mixed,Personal
4,4034,Thomas Hepner <thep3333@gmail.com>,Samuel College,Here are my recommendations for college stuff....,multipart/mixed,"Personal, Productivity"
5,4242,Thomas Hepner <hepner.thomas@gmail.com>,Why do Running Start?,Files attached,multipart/mixed,"Personal, Productivity"
6,4288,Thomas Hepner <hepner.thomas@gmail.com>,Fwd: Your Amazon.com Promotional Code,wallpaper chico ---------- Forwarded messag...,multipart/alternative,"Personal, Shopping"
7,6000,norman hepner <normstormin@gmail.com>,Fwd: Bellair Airporter Shuttle Confirmation #1...,---------- Forwarded message ---------- From:...,multipart/alternative,"Personal, Travel"
8,6194,norman hepner <normstormin@gmail.com>,Fwd: Order Placed - Ticket Order #177630751,HAPPY BIRTHDAY ... LOVE DAD ----------...,multipart/alternative,"Personal, Shopping"
9,6195,Thomas Hepner <hepner.thomas@gmail.com>,Re: My latest C++ work,Cool! I'll take a look at it tomorrow after I ...,multipart/alternative,Personal
