# Fetch Mails from Gmail using Gmail-API

This is a project to search a gmail account, fetch the message content and return it into a nicely readable text.

In [58]:
#Importing required modules
from __future__ import print_function
import httplib2
import os
from googleapiclient.discovery import build
#from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
import base64
from bs4 import BeautifulSoup
import re
import time
import dateutil.parser as parser
from datetime import datetime
import datetime
import email
from cleantext import clean
import csv
import pandas as pd


try:
    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args([])
except ImportError:
    flags = None

In [59]:
#Defining auhorization scopes
SCOPES = 'https://www.googleapis.com/auth/gmail.readonly'
CLIENT_SECRET_FILE = 'credentials.json' #Name of gmail credential file
storage = 'gmail-storage.json' #Name of storage credential retured by Gmail API
APPLICATION_NAME = 'Gmail API Python' #Name of Application

#Function to activate credential
def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

    Returns:
        Credentials, the obtained credential.
    """
    path = '~/Desktop/Doing_DS' #creating credential path
    home_dir = os.path.expanduser(path)
    credential_dir = os.path.join(home_dir, 'secrets')
    credential_path = os.path.join(credential_dir,
                                   CLIENT_SECRET_FILE)
    if not os.path.exists(credential_dir):
        os.makedirs(credential_dir)
    storage_path = os.path.join(credential_dir,
                                   storage)

    store = Storage(storage_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(credential_path, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else: # Needed only for compatibility with Python 2.6
            credentials = tools.run(flow, store)
        print('Storing credentials to ' + storage_path)
    return credentials

In [60]:
#getting credentials
credentials = get_credentials()

In [61]:
#A function to get authorization and create a service
def get_service():
    
    #authorization of credentials
    http = credentials.authorize(httplib2.Http())

    #service variable is the access point to complete gmail API
    service = build('gmail', 'v1', http=http)
    
    return service

In [62]:
#creating the service
service = get_service()

## Searching Gmail to fetch mail id
A search query is defined based on gmail search operator defined at https://support.google.com/mail/answer/7190?hl=en. 

A search query string is provided below as an example.

In [63]:

search_query = "yalin.mailer@state.gov, after:01/01/2020" 

def filter_mail(service, query):
    '''
    service = a gmail service
    
    query = a search query
    '''
    
    mail_id = service.users().messages().list(userId='me', 
                                              #maxResults=10,
                                              q=search_query).execute()['messages']

    return mail_id

In [64]:
mail_ids = filter_mail(service, query = search_query)

In [65]:
def data_encoder(text):
    if len(text)>0:
        message = base64.urlsafe_b64decode(text)
        message = str(message, 'utf-8')
        message = email.message_from_string(message)
    return message

In [66]:
def readMessage(content)->str:
    message = None
    if "data" in content['payload']['body']:
        message = content['payload']['body']['data']
        message = data_encoder(message)
    elif "data" in content['payload']['parts'][0]['body']:
        message = content['payload']['parts'][0]['body']['data']
        message = data_encoder(message)
    else:
        print("body has no data.")
    return message

In [67]:
def fetch_mail(mail_id):
    mail_list = []
    
    for ml_id in mail_id:
        mail_dict = { }
        m_id = ml_id['id'] # get id of individual message
        message = service.users().messages().get(userId= 'me',
                                                 id=m_id).execute() 
        payld = message['payload'] # get payload of the message 
        m_head = payld['headers'] # getting message payload header

        
        for sub in m_head: #Fetching the mail Subject
            if sub['name'] == 'Subject':
                m_subj = sub['value']
                mail_dict['Subject'] = m_subj
            else:
                pass
        
        
        for dt in m_head: # getting the date
            if dt['name'] == 'Date':
                mail_date = dt['value']
                date_parse = (parser.parse(mail_date))
                m_date = (date_parse.date())
                mail_dict['Date'] = str(m_date)
            else:
                pass

        
        for sender in m_head: # getting the Sender
            if sender['name'] == 'From':
                m_from = sender['value']
                mail_dict['Sender'] = m_from
            else:
                pass
            
        
            
        mssg = readMessage(message)
        msg_body = BeautifulSoup(mssg.get_payload(), 'lxml')
        msg_text = msg_body.body.get_text()
        clean_text = clean(msg_text, 
                           fix_unicode=True,
                           to_ascii=True,
                           lower = False,
                           no_line_breaks=True,
                           no_urls=True) # Execute all cleaning operations

        mail_dict['Message body'] = clean_text
        
        
        mail_list.append(mail_dict)
    
    return mail_list

In [68]:
mail_List = fetch_mail(mail_id = mail_ids)

In [69]:
def save_data(mail_list, file_path):
    with open(file_path, 'w', encoding='utf-8', newline = '') as csvfile: 
        #fieldnames = ['Sender','Subject','Date','Snippet','Message_body']
        fieldnames = ['Sender','Subject','Date','Message body']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter = ',')
        writer.writeheader()
        for val in mail_list:
            writer.writerow(val)

In [70]:
# file paths
raw_data_path = os.path.join(os.path.pardir,'src', 'data','raw')
fetchmail_data_path = os.path.join(raw_data_path, 'fetched_gmail.csv')

In [71]:
# save data
save_data(mail_List,fetchmail_data_path)

In [72]:
df = pd.read_csv(fetchmail_data_path)

In [73]:
df

Unnamed: 0,Sender,Subject,Date,Message body
0,YALI Network <yalin.mailer@state.gov>,There’s a NEW YALI4Youth Workbook,2021-01-12,Youth Mentorship Workbook No images? Click her...
1,YALI Network <yalin.mailer@state.gov>,Be one of the first to take the NEW online course,2020-12-22,New course on youth development No images? Cli...
2,YALI Network <yalin.mailer@state.gov>,Africa’s youth is waiting for you!,2020-12-02,"No images? Click here <URL> Dear Okwudili, Afr..."
3,YALI Network <yalin.mailer@state.gov>,YALI4Youth Starts NOW!,2020-11-24,"No images? Click here <URL> Dear Okwudili, Acr..."
4,YALI Network <yalin.mailer@state.gov>,YALI RLC West Africa - Applications Open!,2020-10-23,"No images? Click here <URL> Dear Okwudili, Are..."
5,YALI Network <yalin.mailer@state.gov>,Don't miss out on these financial literacy res...,2020-10-22,These YALIEntrepreneurs resources are here for...
6,YALI Network <yalin.mailer@state.gov>,YALIEntreprenuers Starts NOW!,2020-09-29,Take control of your financial future No image...
7,YALI Network <yalin.mailer@state.gov>,Honor the International Day of Democracy with ...,2020-09-15,"No images? Click here <URL> Dear Okwudili, Hap..."
8,YALI Network <yalin.mailer@state.gov>,There’s a brand NEW YALI Course!,2020-09-08,"No images? Click here <URL> Dear Okwudili, Res..."
9,YALI Network <yalin.mailer@state.gov>,It’s time to create change with YALILearns,2020-08-31,"No images? Click here <URL> Dear Okwudili, Eac..."


In [74]:
df['Message body'][0]

"Youth Mentorship Workbook No images? Click here <URL> Dear Okwudili, In the past weeks, you've worked hard to learn some of the fundamentals of youth development and mentorship with our YALI4Youth Online Course. Now it's time to take your skills one step further. Today we're releasing the NEW YALI4Youth Youth Development Workbook to provide you with resources and personal exercises on some of the critical points of youth development. By completing this workbook, you'll be preparing yourself to be a successful and impactful youth mentor. So let's get started. Download and complete the new workbook today in either English, French, or Portuguese. Download the NEW Youth Development Workbook [<URL>] There's still more to come in YALI4Youth! Stay tuned to our Facebook [<URL>], Twitter [<URL>], and LinkedIn [<URL>] to see what's next. And don't forget that you can still take the YALI4Youth Pledge [<URL>], YALI4Youth Quiz [<URL>], or Online Course [<URL>] if you haven't already! Best, Sarah Y