In [3]:
from tika import parser
from parse import parse
import pprint

# file_name = "swiggy-order-39803023548.pdf"
file_name = "swiggy-order-41110023402.pdf"

parsed = parser.from_file(file_name)

splitted = parsed["content"].split("\n")

# clean_lines = []
# for line in splitted:
#     if len(line) != 0:
#         clean_lines.append(line)
#        OR
clean_lines = [line for line in splitted if len(line) != 0]
filter_list = ["Order No: {order_no}", 
               "Order placed at: {order_date}, {order_time} {order_am_pm}",
               "Order delivered at: {delivery_date}, {delivery_time} {delivery_am_pm}",
               "Order Status: {order_status}",
               "Item Total: ₹  {order_total}",
               "Grand Total: ₹ {order_grand_total}",
               "GST: ₹  {gst}",
               "Order Packing Charges: ₹  {packing_charge}",
               "Delivery Charges: ₹  {delivery_charge}"]


def do_simple_parsing(clean_lines, filter_list):
    order_details = {}
    for text_filter in filter_list:
        for line in clean_lines:
            result = parse(text_filter, line)
            if result:
                order_details.update(result.__dict__["named"])
    return order_details
    

def get_element_in_between(starts_with, ends_with, clean_lines):
    for line in clean_lines:
        if starts_with in line:
            start_index = clean_lines.index(line) + 1
        elif ends_with in line:
            end_index = clean_lines.index(line)
        else:
            pass

    place_details = {}
    element_list = clean_lines[start_index:end_index]
    
    return element_list

def get_place_details(clean_lines):
    starts_with = "Ordered from:"
    ends_with = "Item Name Quantity Price"
    address_list = get_element_in_between(starts_with, ends_with, clean_lines)
    place_details["place_name"] = address_list[0]
    place_details["place_address"] = " ".join(address_list[1:])
    
    return place_details


def get_items(clean_lines):
    
    starts_with = "Item Name Quantity Price"
    ends_with = "Item Total: ₹"

    result = get_element_in_between(starts_with, ends_with, clean_lines)

    items = []
    for item in result:
        item_details = {}
        item_splitted = item.split("₹")
        item_details["price"] = item_splitted[-1].strip()

        item_details["quantity"] = item_splitted[0].strip().split(" ")[-1]
        item_details["name"] = " ".join(item_splitted[0].strip().split(" ")[:-1])
        items.append(item_details)

    return {"items":items}

def get_person(clean_lines):
    starts_with = "Delivery To:"
    ends_with = "Disclaimer:"
    person_details = {}
    result = get_element_in_between(starts_with, ends_with, clean_lines)
    person_details["name"] = result[0]
    person_details["address"] = " ".join(result[1:])

    return {"person":person_details}


order_data = do_simple_parsing(clean_lines, filter_list)
person_data = get_person(clean_lines)

place_details = {}

place_details = get_place_details(clean_lines)
items = get_items(clean_lines)
order_data.update(place_details)
order_data.update(items)
order_data.update(person_data)

pprint.pprint(order_data)


{'delivery_am_pm': 'AM',
 'delivery_charge': '40',
 'delivery_date': '17/05/2019',
 'delivery_time': '01:20',
 'items': [{'name': 'Chocolate Xoverloaded Waffle',
            'price': '140',
            'quantity': '1'},
           {'name': '-', 'price': '0', 'quantity': 'Banana'}],
 'order_am_pm': 'AM',
 'order_date': '17/05/2019',
 'order_grand_total': '176',
 'order_no': '#41110023402',
 'order_status': 'Delivered',
 'order_time': '12:57',
 'order_total': '140',
 'packing_charge': '10',
 'person': {'address': '3rd Floor, 161, Behind Supreme Agencies Building, 5th '
                       'Cross Rd, Vinayaka Nagar, Wilson Garden, Bengaluru, '
                       'Karnataka 560027, India',
            'name': 'Karan Raj Pradhan'},
 'place_address': '#105, 1st A Cross, Jyoti Niwas College Road, Kormangala 5th '
                  'block',
 'place_name': 'XO Belgian Waffle'}


In [4]:
for i in clean_lines:
    print(i)

Thanks for choosing Swiggy, Karan Raj Pradhan! Here are your order details:
Order No: #41110023402
Order placed at: 17/05/2019, 12:57 AM
Order delivered at: 17/05/2019, 01:20 AM
Order Status: Delivered
Ordered from:
XO Belgian Waffle
#105, 1st A Cross, Jyoti Niwas College Road, Kormangala 5th
block
Item Name Quantity Price
Chocolate Xoverloaded Waffle 1 ₹ 140
- Banana ₹ 0
Item Total: ₹  140
Order Packing Charges: ₹  10
Delivery Charges: ₹  40
Discount Applied: - ₹  14
Grand Total: ₹ 176
Delivery To:
Karan Raj Pradhan
3rd Floor, 161, Behind Supreme Agencies Building,
5th Cross Rd, Vinayaka Nagar, Wilson Garden,
Bengaluru, Karnataka 560027, India
Disclaimer: This is an acknowledgement of Delivery of the Order and not an actual invoice. Details mentioned above including
the menu prices and taxes (as applicable) are as provided by the Restaurant to Swiggy. Responsibility of charging (or not
charging) taxes lies with the Restaurant and Swiggy disclaims any liability that may arise in this r

Thanks for choosing Swiggy, Hasan! Here are your order details:
Order No: #39803023548
Order placed at: 01/05/2019, 09:53 PM
Order delivered at: 01/05/2019, 10:22 PM
Order Status: Delivered
Ordered from:
FreshMenu
MAH QUDDUS,4th floor,No 2,AGA Abdullah street,Richmond
Street,Johnson Market,Banglore:560025
Item Name Quantity Price
Baked Margharita Mac 'n' Cheese 1 ₹ 210
BBQ Chicken Club Sandwich 1 ₹ 179
Item Total: ₹  389
GST: ₹  13.61
Order Packing Charges: ₹  17
Delivery Charges: ₹  0
Discount Applied (SUPERIT): - ₹  166.7
Grand Total: ₹ 253
Delivery To:
Hasan
Jaaga 5/1, Penthouse 01, 6th Floor, Rich Homes
Apartment, Richmond Road, Shanthala Nagar,
Ashok Naga, 6/1, Shanthala Nagar, Richmond
Town, Bengaluru, Karnataka 560025, India
Disclaimer: This is an acknowledgement of Delivery of the Order and not an actual invoice. Details mentioned above including
the menu prices and taxes (as applicable) are as provided by the Restaurant to Swiggy. Responsibility of charging (or not
charging) t