## Product Mapping
### Anthony Ung

In [18]:
import csv
import re

products_old = []
products_mapped = []
new_product_classes = []

# Read the product and product classes files.
with open('Products1.txt', 'r') as csvfile:

    csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)
    for row in csv.DictReader(csvfile, dialect='piper'):
        products_old.append(row)

    with open('product_class.txt', 'r') as csvfile:
        csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)
        
        for row in csv.DictReader(csvfile, dialect='tab'):
            new_product_classes.append(row)

def pipeline(src, dst1, dst2, func):
    func(src, dst1, dst2)

# ETL 1: Change all the manufacturers to either 'Rowan Warehouse' or 'Rowan Dairy'
def etl_1(src, dst1, dst2=None):
    for product in src:
        if product['itemType'] == 'Milk':
            product['Manufacturer'] = 'Rowan Dairy'
        else:
            product['Manufacturer'] = 'Rowan Warehouse'
    
        dst1.append(product)


# ETL 2: Handle the 1319 products that can directly be mapped to one of the subcategories
def etl_2(src, dst1, dst2):
    product_classes = []
    
    for p_entry in new_product_classes:
        product_classes.append(p_entry['product_subcategory'])
    
    for product in src:
        if product['itemType'] in product_classes:
            product['meta_code'] = 1
            product['meta_entry'] = 'Mapped by AU from old Item type into new subcategory'
            dst1.append(product)
        else:
            product['meta_code'] = -1
            product['meta_entry'] = ''
            dst2.append(product)


# ETL 3 - Bagel
def etl_3(src, dst1, dst2):
    product_classes = []
    
    for p_entry in new_product_classes:
        product_classes.append(p_entry['product_subcategory'])

    pattern = 'Bagel'
    
    for product in src:
        if re.search(pattern, product['Product Name']):
            product['meta_code'] = 2
            product['meta_entry'] = 'Mapped by AU from character match with Bagel'
            dst1.append(product)
        else:
            dst2.append(product)


# ETL 4 - Bread
def etl_4(src, dst1, dst2):
    for product in src:
        if product['itemType'] != 'Bread':
            dst2.append(product)
        else:
            pattern_1 = 'Cheeseburger'
            pattern_2 = 'Chicken'
            pattern_3 = 'Texas Toast'
            
            if re.search(pattern_1, product['Product Name']) \
                or re.search(pattern_2, product['Product Name']) \
                or re.search(pattern_3, product['Product Name']):
            
                dst2.append(product)

            else:
                product['meta_code'] = 3
                product['meta_entry'] = 'Mapped by AU with a character match for Bread ' \
                                            'and assuming all these matches are sliced'
                dst1.append(product)

    
# ETL 1: Change all the manufacturers to either 'Rowan Warehouse' or 'Rowan Dairy'
products_etl_1 = []
pipeline(products_old, products_etl_1, None, etl_1)


# ETL 2: Handle the 1319 products that can directly be mapped to one of the subcategories
products_etl_to_be_mapped_v1 = []
pipeline(products_etl_1, products_mapped, products_etl_to_be_mapped_v1, etl_2)


# ETL 3: Bagels
products_etl_to_be_mapped_v2 = []
pipeline(products_etl_to_be_mapped_v1, products_mapped, products_etl_to_be_mapped_v2, etl_3)


# ETL 4: Bread
products_etl_to_be_mapped_v3 = []
pipeline(products_etl_to_be_mapped_v2, products_mapped, products_etl_to_be_mapped_v3, etl_4)


print(len(products_etl_to_be_mapped_v3))
print('Done')

815
Done


In [14]:
for product in products_etl_to_be_mapped_v1:
    print(product)

{'Manufacturer': 'Rowan Warehouse', 'Product Name': 'Jambalaya Rice Mix', 'Size': '12\xa0oz', 'itemType': 'Rice/Rice Mix', 'SKU': '42081001', 'BasePrice': '$2.49', 'meta_code': -1, 'meta_entry': ''}
{'Manufacturer': 'Rowan Warehouse', 'Product Name': 'Jambalaya Rice Mix', 'Size': '8\xa0oz', 'itemType': 'Rice/Rice Mix', 'SKU': '42082001', 'BasePrice': '$1.79', 'meta_code': -1, 'meta_entry': ''}
{'Manufacturer': 'Rowan Warehouse', 'Product Name': 'Guacamole Regular', 'Size': '8\xa0oz', 'itemType': '', 'SKU': '42083001', 'BasePrice': '$3.99', 'meta_code': -1, 'meta_entry': ''}
{'Manufacturer': 'Rowan Warehouse', 'Product Name': 'Coffee Original Blend', 'Size': '12\xa0oz', 'itemType': 'Coffee/Creamer', 'SKU': '42084001', 'BasePrice': '$3.99', 'meta_code': -1, 'meta_entry': ''}
{'Manufacturer': 'Rowan Warehouse', 'Product Name': 'Italian Dressing', 'Size': '16\xa0oz', 'itemType': 'Salad Dressing', 'SKU': '42086001', 'BasePrice': '$2.00', 'meta_code': -1, 'meta_entry': ''}
{'Manufacturer': '