# Fetch Mails from Gmail using Gmail-API

This is a project to search a gmail and fetch the message content.

In [2]:
#Importing required modules
from __future__ import print_function
import httplib2
import os
from googleapiclient.discovery import build
#from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
import base64
from bs4 import BeautifulSoup
import re
import time
import dateutil.parser as parser
from datetime import datetime
import datetime


try:
    import argparse
    flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args([])
except ImportError:
    flags = None

In [3]:
#Defining auhorization scopes
SCOPES = 'https://www.googleapis.com/auth/gmail.readonly'
CLIENT_SECRET_FILE = 'credentials.json' #Name of gmail credential file
storage = 'gmail-storage.json' #Name of storage credential retured by Gmail API
APPLICATION_NAME = 'Gmail API Python' #Name of Application

#Function to activate credential
def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

    Returns:
        Credentials, the obtained credential.
    """
    path = '~/Desktop/folders/Doing_DS' #creating credential path
    home_dir = os.path.expanduser(path)
    credential_dir = os.path.join(home_dir, 'secrets')
    credential_path = os.path.join(credential_dir,
                                   CLIENT_SECRET_FILE)
    if not os.path.exists(credential_dir):
        os.makedirs(credential_dir)
    storage_path = os.path.join(credential_dir,
                                   storage)

    store = Storage(storage_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(credential_path, SCOPES)
        flow.user_agent = APPLICATION_NAME
        if flags:
            credentials = tools.run_flow(flow, store, flags)
        else: # Needed only for compatibility with Python 2.6
            credentials = tools.run(flow, store)
        print('Storing credentials to ' + storage_path)
    return credentials

In [4]:
#getting credentials
credentials = get_credentials()

In [5]:
#A function to get authorization and create a service
def get_service():
    
    #authorization of credentials
    http = credentials.authorize(httplib2.Http())

    #service variable is the access point to complete gmail API
    service = build('gmail', 'v1', http=http)
    
    return service

In [6]:
#creating the service
service = get_service()

## Searching Gmail to fetch mail id
A search query is defined based on gmail search operator defined at https://support.google.com/mail/answer/7190?hl=en. 

A search query string is provide below as an example.

In [7]:
search_query = "from:nigeria@nigeria.com, before:10/08/2019" 
def filter_mail(service, query):
    
    mail_id = service.users().messages().list(userId='me', 
                                              maxResults=10,
                                              q=search_query).execute()['messages']

    return mail_id 

In [8]:
mail_ids = filter_mail(service, query = search_query)

In [15]:
def fetch_mail(mail_id):
    mail_list = []
    
    for ml_id in mail_id:
        mail_dict = { }
        m_id = ml_id['id'] # get id of individual message
        message = service.users().messages().get(userId= 'me',
                                                 id=m_id).execute() 
        payld = message['payload'] # get payload of the message 
        m_head = payld['headers'] # getting message payload header

        
        for sub in m_head: #Fetching the mail Subject
            if sub['name'] == 'Subject':
                m_subj = sub['value']
                mail_dict['Subject'] = m_subj
            else:
                pass
        
        
        for dt in m_head: # getting the date
            if dt['name'] == 'Date':
                mail_date = dt['value']
                date_parse = (parser.parse(mail_date))
                m_date = (date_parse.date())
                mail_dict['Date'] = str(m_date)
            else:
                pass

        
        for sender in m_head: # getting the Sender
            if sender['name'] == 'From':
                m_from = sender['value']
                mail_dict['Sender'] = m_from
            else:
                pass
            
        mail_dict['Snippet'] = message['snippet'] # fetching message snippet
            
        try:
            # Fetching message body
            mssg_parts = payld['parts'] # fetching the message parts
            part_one = mssg_parts[0] # fetching first element of the part 
            part_body = part_one['body'] # fetching body of the message
            part_data = part_body['data'] # fetching data from the body
            clean_one = part_data.replace("-","+") # decoding from Base64 to UTF-8
            clean_one = clean_one.replace("_","/") # decoding from Base64 to UTF-8
            clean_two = base64.b64decode(bytes(clean_one, 'UTF-8')) # decoding from Base64 to UTF-8
            soup = BeautifulSoup(clean_two , "lxml")
            mssg_body = soup.body()
            #print(mssg_body)
            mail_dict['Message body'] = mssg_body
            
        except:
            pass
        
        mail_list.append(mail_dict)
    
    return mail_list

In [16]:
mail_List = fetch_mail(mail_id = mail_ids)
print(mail_List)

[{'Subject': 'Huge Tron TRX Airdrop', 'Date': '2019-10-03', 'Sender': 'AirdropAlert <Support@airdropalert.com>', 'Snippet': 'Best Airdrops only at Airdropalert.com Highlights Up to $45 from Top Airdrops Foundations of Ethereum Now you can earn ETH as well with AirdropAlert! Know Coding? $400k USD Telegram Competition. Top'}, {'Subject': 'We bring you the Crypto Pro Plan', 'Date': '2019-10-01', 'Sender': 'AirdropAlert <Support@airdropalert.com>', 'Snippet': 'Airdropalert.com Last week we have reached out to announce the Pro Plan. Due to unexpected high traffic, the payment provider&#39;s servers got overloaded! Thankfully the issue has been resolved now'}, {'Subject': "Vitalik's Life Story", 'Date': '2019-09-19', 'Sender': 'AirdropAlert <Support@airdropalert.com>', 'Snippet': 'Best Airdrops only at Airdropalert.com Highlights Up to $25 from Top Airdrops Airdrops in your wallet entirely automatic? The story of Vitalik Buterin AirdropAlert listed in Rotterdam Capital Week 2019'}, {'Subjec