In [1]:
from exchangelib import Credentials, Account, Folder, Message, EWSDateTime
from requests_html import HTML
import re
import datetime
import configparser
import pandas as pd
from sqlalchemy import create_engine
import sqlalchemy

In [2]:
# Importing account email address and password
config = configparser.ConfigParser()
config.read('exchange_credentials.ini')
email_address = config['credentials']['email_address']
password = config['credentials']['password']

# Defining credentials for exchange account and setting account
credentials = Credentials(email_address, password)
account = Account(email_address, credentials = credentials, autodiscover = True)

In [3]:
receipt_folder = account.inbox / 'ASDA Order Receipts'

In [5]:
items = receipt_folder.all().order_by('datetime_received')

# Extract datetime_received, subject and body from each item
item_details = items.values('datetime_received', 'subject', 'body')

# For each item in folder we will process, insert into database and then move to 'processed' folder
email_datetime_list = []
order_number_list = []
for item in item_details:

    # grab datetime and append to date_time list
    email_datetime = item['datetime_received']
    email_datetime_list.append(email_datetime)

    # extract subject line, for branching  later
    subject = item['subject']

    # Convert body to lines
    body_raw = item['body']
    body_html = HTML(html = body_raw)
    body = body_html.find('tr')[0].text
    body = re.sub(r'[^\x00-\x7f]',r'', body)
    body_lines = body.splitlines()
    
    # Case for subject line of 'Your updated ASDA groceries order'

    if subject == 'Your updated ASDA Groceries order':
        # get order number and give error if no order number is found
        try:
            # order number is on line below line == 'Order Number'
            order_number = body_lines[body_lines.index('Order Number:') + 1]
        except:
            print("Order Number was not found")

        # get delivery date and give error if no delivery date is found
        try:
            # Delivery date is on line below 'Delivery Date:'
            delivery_date_str = body_lines[body_lines.index('Delivery Date:') + 1]
            # Converting delivery date to a date object
            delivery_date_str = delivery_date_str[0:11]
            delivery_date = datetime.datetime.strptime(delivery_date_str, '%d %b %Y').date()
        except:
            print("Delivery Date not found")

In [6]:
delivery_date

datetime.date(2020, 2, 9)

In [8]:
item = item_details[0]

In [14]:
    body_raw = item['body']
    body_html = HTML(html = body_raw)
    body = body_html.find('tr')[0].text
    body = re.sub(r'[^\x00-\x7f]',r'', body)
    lines = body.splitlines()

In [15]:
lines

['Order Number:',
 '20670684235',
 'Delivery Date:',
 '09 Feb 2020 03:00 PM-05:00 PM',
 'Delivery Note',
 'Hi Richard,',
 '',
 'Your order has been picked and is being prepared for delivery. Your order details are listed below. Visit your order detail page for updates on your delivery time.',
 '',
 "If you're unhappy with any of your products, simply hand them back to your driver for a full refund.",
 '',
 '',
 '',
 'Order Details',
 '',
 '',
 'Substitutes',
 'Qty',
 'Price',
 'ASDA Herbes de Provence 15g',
 'Substitute for 1 X ASDA Italian Style Seasoning 12g',
 '1',
 '0.69',
 '',
 '',
 'Ordered',
 'Qty',
 'Price',
 'Chilled',
 'ASDA Unsalted Butter 250g',
 '',
 '1',
 '1.49',
 'Frozen',
 'ASDA Take Away BBQ Chicken & Bacon Stuffed Crust Pizza 458g',
 '',
 '1',
 '1.95',
 'ASDA Take Away Pepperoni Stuffed Crust Pizza 415g',
 '',
 '1',
 '1.73',
 'Other',
 "ASDA Baker's Selection Chocolate Chip Brioche Rolls 8pk",
 '',
 '1',
 '0.83',
 'ASDA White Flour Dusted Sliced Rolls 6pk',
 '',
 '1',

In [20]:
def remove_blank_and_headings(element):
    """Removes blank lines and heading titles from the categories.txt file from a list"""
    with open('categories.txt') as cat:
        categories = cat.read().splitlines()
    remove_list = ['Quantity', 'Price', None]
    # concat the categories list to the remove list
    remove_list = remove_list + categories
    if element in remove_list:
        return False
    else:
        return element

In [22]:
try:
    # order number is on line below line == 'Order Number'
    order_number = lines[lines.index('Order Number:') + 1]
except:
    print("Order Number was not found")

# get delivery date and give error if no delivery date is found
try:
    # Delivery date is on line below 'Delivery Date:'
    delivery_date_str = lines[lines.index('Delivery Date:') + 1]
    # Converting delivery date to a date object
    delivery_date_str = delivery_date_str[0:11]
    delivery_date = datetime.datetime.strptime(delivery_date_str, '%d %b %Y').date()
except:
    print("Delivery Date not found")

# Get the total
try:
     total_str = lines[lines.index('Total') + 1]
     total = float(total_str)
except:
    print("total not found")

# Get the subtotal
try:
     subtotal_str = lines[lines.index('Subtotal*') + 5]
     subtotal = float(subtotal_str)
except:
    print("subtotal not found")

# Get the substitutes
# Start_substitutes finds the index of the line containing the Substitutes header. Since there may not be substitutes
# this is set up in a try, except format. the variable substitutions_present tracks if a file has subs or not
try:
    start_substitutes = lines.index('Substitutes')
    # This groups the lines into a new substitutes list which is made up of a tuple of 4 elements
    # i is the first line with a substitute item, i+1 is the item being substituted, i+2 is the quantity and i+3 is the price
    i = start_substitutes + 3
    substitutes = []
    # loop will continue until it reaches an empty line after a price
    while len(lines[i]) > 0 :
        substitutes.append((lines[i], lines[i + 1][19:], lines[i + 2], lines[i + 3]))
        i += 4
    substitutions_present = True
except:
    # if no line subsutitions then error will trigger 
    print("No substitutions")
    substitutions_present = False

# find the start of the unavailable section and pack into a list of tuples
try:
    start_unavailable = lines.index('Unavailable')
    i = start_unavailable + 3
    unavailable = []
    while len(lines[i]) > 0 :
        unavailable.append((lines[i], lines[i + 1], lines[i + 2]))
        i += 3
    unavailable_present = True
except:
    print("No unavailable items")
    unavailable_present = False


No unavailable items


In [30]:
# Find the ordered items

# We can find the start and end of the ordered section then create a list
start_ordered = lines.index('Ordered')
end_ordered = lines.index('Multibuy Savings')

i = start_ordered + 3
ordered = []
while i < end_ordered:
    ordered.append(lines[i])
    i += 1

# Remove blank list elements and headings
ordered = list(filter(remove_blank_and_headings, ordered))

# Create a list of tuples for the ordered items
i = 0
ordered_clean = []
while i < len(ordered):
    ordered_clean.append((ordered[i], ordered[i + 1], ordered[i + 2]))
    i += 3


In [31]:
ordered_clean

[('ASDA Unsalted Butter 250g', '1', '1.49'),
 ('ASDA Take Away BBQ Chicken & Bacon Stuffed Crust Pizza 458g', '1', '1.95'),
 ('ASDA Take Away Pepperoni Stuffed Crust Pizza 415g', '1', '1.73'),
 ("ASDA Baker's Selection Chocolate Chip Brioche Rolls 8pk", '1', '0.83'),
 ('ASDA White Flour Dusted Sliced Rolls 6pk', '1', '0.56'),
 ('Kingsmill Medium Soft White Bread 800g', '1', '0.95'),
 ('ASDA Plain Bagels 4pk', '1', '0.69'),
 ('ASDA Onion Rings 150g', '1', '0.69'),
 ('Maltesers Biscuits 110G', '1', '1.00'),
 ('Belvita Breakfast Biscuits Soft Bakes Choc Chip 5 Pack 250g', '1', '1.39'),
 ('Belvita Breakfast Biscuits Soft Bakes Golden Grain 5 Pack 250g',
  '1',
  '1.39'),
 ('ASDA Extra Special Mascarpone & Tomato Pasta Sauce 340g', '1', '0.90'),
 ('ASDA Extra Special Garlic and Tomato Pasta Sauce 340g', '1', '0.90'),
 ('Carex Original Hand Wash 250ml', '1', '0.95'),
 ('ASDA All Purpose Cloths 10pk', '1', '0.75'),
 ('ASDA 4 100% Cotton Dusters 4pk', '1', '1.25'),
 ('Mr Sheen Spring Meadow Mu