In [6]:
from tika import parser
from parse import parse
from pprint import pprint
import os
 
def do_simple_parsing(clean_lines, filter_list):
    order_details = {}
    for text_filter in filter_list:
        for line in clean_lines:
            result = parse(text_filter, line)
            if result:
                order_details.update(result.__dict__["named"])
    return order_details
 
def get_elements_in_between(starts_with, ends_with, clean_lines):
    for line in clean_lines:
        if starts_with == line:
            start_index = clean_lines.index(line) + 1
        elif ends_with in line:
            end_index = clean_lines.index(line)
    elements_in_between = clean_lines[start_index: end_index]
    return elements_in_between
 
def get_place_details(clean_lines):
    starts_with = "Ordered from:"
    ends_with = "Item Name Quantity Price"
    address_list = get_elements_in_between(starts_with, ends_with, clean_lines)
    place_details = {}
    place_details["place_name"] = address_list[0]
    place_details["place_address"] = " ".join(address_list[1:])
    return(place_details)
 
def get_order_item_details(clean_lines):
    starts_with = "Item Name Quantity Price"
    ends_with = "Item Total:"
    items = get_elements_in_between(starts_with, ends_with, clean_lines)
    items_processed = []
    for item in items:
        item_details = {}
        item_splitted = item.split("₹")
        item_details["name"] = " ".join(item_splitted[0].strip().split(" ")[:-1])
        item_details["price"] = item_splitted[-1].strip()
        item_details["quantity"] = item_splitted[0].strip().split(" ")[-1]
        items_processed.append(item_details)
    return(items_processed)
 
def get_recepient_address(clean_lines):
    starts_with = "Delivery To:"
    ends_with = "Disclaimer:"
    recepient_address_list = get_elements_in_between(starts_with, ends_with, clean_lines)
    recepient_address = " ".join(recepient_address_list)
    return(recepient_address)
 
def parse_swiggy_bill(filename):
    parsed = parser.from_file(filename)
    splitted = parsed["content"].split("\n")
    clean_lines = [line for line in splitted if len(line) != 0]
    filter_list = ["Order No: {order_no}", "Order placed at: {order_date}, {order_time} {order_am_pm}",
               'Order Status: {order_status}', 'Item Total: ₹  {order_total}', 'GST: ₹  {gst}',
               'Order Packing Charges: ₹  {packing_charge}', 'Delivery Charges: ₹  {delivery_charge}']
   
    parsed_data = {
    'order_details' : do_simple_parsing(clean_lines, filter_list),
    'place_details' : get_place_details(clean_lines),
    'ordered_item_details' : get_order_item_details(clean_lines),
    'recepient_address' : get_recepient_address(clean_lines)
    }
    return parsed_data
 
if __name__ == "__main__":
    for files in "pdf_files":
        pprint(parse_swiggy_bill("41418314131_5b81cbc1-01cb-4137-8bfe-f863298f45b4.pdf"))


{'order_details': {'delivery_charge': '0',
                   'gst': '6.45',
                   'order_am_pm': 'PM',
                   'order_date': '20/05/2019',
                   'order_no': '#41418314131',
                   'order_status': 'Delivered',
                   'order_time': '02:35',
                   'order_total': '129',
                   'packing_charge': '10'},
 'ordered_item_details': [{'name': 'Dal Lehsooni & Aloo Wajid Ali Shah Combo',
                           'price': '129',
                           'quantity': '1'}],
 'place_details': {'place_address': 'No 1/4,Ground Floor, Swami Vivekanand '
                                    'Road, Lido Multiplex,Ulsoor ,Bangalore, '
                                    'Karnataka, 560008.',
                   'place_name': 'UBQ by Barbeque Nation'},
 'recepient_address': 'Hasan Jaaga 5/1, Penthouse 01, 6th Floor, Rich Homes '
                      'Apartment, Richmond Road, Shanthala Nagar, Ashok Naga, '
              