## Product Mapping
### Anthony Ung

In [70]:
import csv
import re

products_old = []
products_mapped = []
new_product_classes = []

# Read the product and product classes files.
with open('Products1.txt', 'r') as csvfile:

    csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)
    for row in csv.DictReader(csvfile, dialect='piper'):
        products_old.append(row)

    with open('product_class.txt', 'r') as csvfile:
        csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)
        
        for row in csv.DictReader(csvfile, dialect='tab'):
            new_product_classes.append(row)

def pipeline(src, dst1, dst2, func):
    func(src, dst1, dst2)

# ETL 1: Change all the manufacturers to either 'Rowan Warehouse' or 'Rowan Dairy'
def etl_1(src, dst1, dst2=None):
    for product in src:
        if product['itemType'] == 'Milk':
            product['Supplier'] = 'Rowan Dairy'
        else:
            product['Supplier'] = 'Rowan Warehouse'
    
        dst1.append(product)


# ETL 2: Handle the 1319 products that can directly be mapped to one of the subcategories
def etl_2(src, dst1, dst2):
    product_classes = {}
    
    for p_entry in new_product_classes:
        product_classes[p_entry['product_subcategory']] = int(p_entry['product_class_id'])
    
    for product in src:
        if product['itemType'] in product_classes.keys():
            product['itemCode'] = product_classes[p_entry['product_subcategory']]
            product['meta_code'] = 1
            product['meta_entry'] = 'Mapped by AU from old Item type into new subcategory'
            dst1.append(product)
        else:
            product['meta_code'] = -1
            product['meta_entry'] = ''
            dst2.append(product)


# ETL 3 - Bagel
def etl_3(src, dst1, dst2):
    product_classes = []
    
    for p_entry in new_product_classes:
        product_classes.append(p_entry['product_subcategory'])

    pattern = 'Bagel'
    
    for product in src:
        if re.search(pattern, product['Product Name']):
            product['itemCode'] = 25
            product['meta_code'] = 2
            product['meta_entry'] = 'Mapped by AU from character match with Bagel'
            dst1.append(product)
        else:
            dst2.append(product)


# ETL 4 - Bread
def etl_4(src, dst1, dst2):
    for product in src:
        if product['itemType'] != 'Bread':
            dst2.append(product)
        else:
            pattern_1 = 'Cheeseburger'
            pattern_2 = 'Chicken'
            pattern_3 = 'Texas Toast'
            
            if re.search(pattern_1, product['Product Name']) \
                or re.search(pattern_2, product['Product Name']) \
                or re.search(pattern_3, product['Product Name']):
            
                dst2.append(product)

            else:
                product['itemCode'] = 27
                product['meta_code'] = 3
                product['meta_entry'] = 'Mapped by AU with a character match for Bread ' \
                                            'and assuming all these matches are sliced'
                dst1.append(product)


# ETL 5 - Snacks
def etl_5(src, dst1, dst2):
    for product in src:
        pattern_1 = 'Dip '

        if re.search(pattern_1, product['Product Name']):
            product['itemCode'] = 83
            product['meta_code'] = 4
            product['meta_entry'] = 'Mapped by AU with a character match for Dips'
            dst1.append(product)
            continue

        pattern_2 = 'Doritos'
        if re.search(pattern_2, product['Product Name']):
            product['itemCode'] = 12
            product['meta_code'] = 5
            product['meta_entry'] = 'Mapped by AU with a character match for Doritos'
            dst1.append(product)
            continue

        pattern_3 = 'Ruffles'
        if re.search(pattern_3, product['Product Name']):
            product['itemCode'] = 12
            product['meta_code'] = 6
            product['meta_entry'] = 'Mapped by AU with a character match for Chips'
            dst1.append(product)
            continue

        pattern_4 = 'Lays'
        if re.search(pattern_4, product['Product Name']):
            product['itemCode'] = 12
            product['meta_code'] = 7
            product['meta_entry'] = 'Mapped by AU with a character match for Lays'
            dst1.append(product)
            continue

        pattern_5 = 'Tostitos'
        pattern_6 = 'Salsa'
        if ((re.search(pattern_5, product['Product Name'])) and (not (re.search(pattern_6, product['Product Name'])))):
            product['itemCode'] = 12
            product['meta_code'] = 8
            product['meta_entry'] = 'Mapped by AU with a character match for Tostitos and ignoring Salsa'
            dst1.append(product)
            continue

        pattern_7 = 'Crisps'
        if re.search(pattern_7, product['Product Name']):
            product['itemCode'] = 12
            product['meta_code'] = 9
            product['meta_entry'] = 'Mapped by AU with a character match for Crisps'
            dst1.append(product)
            continue

        pattern_8 = 'Salsa'
        if re.search(pattern_8, product['Product Name']):
            product['itemCode'] = 83
            product['meta_code'] = 10
            product['meta_entry'] = 'Mapped by AU with a character match for Salsa'
            dst1.append(product)
            continue

        dst2.append(product)


# ETL 6 - Coffee
def etl_6(src, dst1, dst2):
    for product in src:
        pattern_1 = 'Coffee'
        pattern_2 = 'Cake'
        pattern_3 = 'Coffeemate'
        pattern_4 = 'Creamer'

        if re.search(pattern_1, product['Product Name']) \
            and (not re.search(pattern_2, product['Product Name'])) \
            and (not re.search(pattern_3, product['Product Name'])) \
            and (not re.search(pattern_4, product['Product Name'])):
        
            product['itemCode'] = 7
            product['meta_code'] = 11
            product['meta_entry'] = 'Mapped by AU with a character match for Coffee'
            dst1.append(product)
            continue
            
        dst2.append(product)


# ETL 7 - Donuts
def etl_7(src, dst1, dst2):
    for product in src:
        pattern_1 = 'Donut'

        if re.search(pattern_1, product['Product Name']):          
            product['itemCode'] = 84
            product['meta_code'] = 12
            product['meta_entry'] = 'Mapped by AU with a character match for Donuts'
            dst1.append(product)
            continue
            
        dst2.append(product)


# ETL 8 - Hamburger Helper
def etl_8(src, dst1, dst2):
    for product in src:
        pattern_1 = 'Hamburger Helper'

        if re.search(pattern_1, product['Product Name']):
            product['itemCode'] = 4
            product['meta_code'] = 13
            product['meta_entry'] = 'Mapped by AU with a character match for Hamburger Helper'
            dst1.append(product)
            continue
            
        dst2.append(product)


# ETL 9 - Pasta
def etl_9(src, dst1, dst2):
    for product in src:
        pattern_1 = 'Sauce'

        # "Lasagna With Meat And Sauce Pasta" should be frozen
        pattern_2 = 'Lasagna With Meat And Sauce Pasta'

        if re.search(pattern_1, product['Product Name']) \
            and (not re.search(pattern_2, product['Product Name'])):
            
            product['itemCode'] = 48
            product['meta_code'] = 14
            product['meta_entry'] = 'Mapped by AU with a character match for Sauce'
            dst1.append(product)
            continue
            
        dst2.append(product)


# ETL 10 - Goldfish
def etl_10(src, dst1, dst2):
    for product in src:
        pattern_1 = 'Goldfish'

        if re.search(pattern_1, product['Product Name']):
            product['itemCode'] = 82
            product['meta_code'] = 15
            product['meta_entry'] = 'Mapped by AU with a character match for Goldfish and mapping to crackers'
            dst1.append(product)
            continue

        pattern_2 = 'Chips Ahoy'

        if re.search(pattern_2, product['Product Name']):          
            product['itemCode'] = 45
            product['meta_code'] = 16
            product['meta_entry'] = 'Mapped by AU with a character match for Goldfish and mapping to crackers'
            dst1.append(product)
            continue

        pattern_3 = 'Wheat Thins'

        if re.search(pattern_3, product['Product Name']):
            product['itemCode'] = 82
            product['meta_code'] = 17
            product['meta_entry'] = 'Mapped by AU with a character match for Wheat Thins and mapping to crackers'
            dst1.append(product)
            continue
            
        dst2.append(product)


# ETL 11 - Specific Manufacturer
def etl_11(src, dst1, dst2):
    for product in src:
        pattern_1 = 'Tastykake'

        if re.search(pattern_1, product['Manufacturer']):
            product['itemCode'] = 84
            product['meta_code'] = 18
            product['meta_entry'] = 'Mapped by AU by TastyKake Manufacturer. Made decision that all Tastykake products are donuts'
            dst1.append(product)
            continue

        pattern_2 = 'Welchs'

        if re.search(pattern_1, product['Manufacturer']):
            product['itemCode'] = 30
            product['meta_code'] = 18
            product['meta_entry'] = 'Mapped by AU by Welchs Manufacturer'
            dst1.append(product)
            continue
            
        dst2.append(product)

# ETL 1: Change all the manufacturers to either 'Rowan Warehouse' or 'Rowan Dairy'
products_etl_1 = []
pipeline(products_old, products_etl_1, None, etl_1)


# ETL 2: Handle the 1319 products that can directly be mapped to one of the subcategories
products_etl_to_be_mapped_v1 = []
pipeline(products_etl_1, products_mapped, products_etl_to_be_mapped_v1, etl_2)


# ETL 3: Bagels
products_etl_to_be_mapped_v2 = []
pipeline(products_etl_to_be_mapped_v1, products_mapped, products_etl_to_be_mapped_v2, etl_3)


# ETL 4: Bread
products_etl_to_be_mapped_v3 = []
pipeline(products_etl_to_be_mapped_v2, products_mapped, products_etl_to_be_mapped_v3, etl_4)


# ETL 5: Snacks
products_etl_to_be_mapped_v4 = []
pipeline(products_etl_to_be_mapped_v3, products_mapped, products_etl_to_be_mapped_v4, etl_5)


# ETL 6: Coffee - Make this one earlier next time
products_etl_to_be_mapped_v5 = []
pipeline(products_etl_to_be_mapped_v4, products_mapped, products_etl_to_be_mapped_v5, etl_6)


# ETL 7: Donuts
products_etl_to_be_mapped_v6 = []
pipeline(products_etl_to_be_mapped_v5, products_mapped, products_etl_to_be_mapped_v6, etl_7)


# ETL 8: Hamburger Helper - Needs to be before Pasta
products_etl_to_be_mapped_v7 = []
pipeline(products_etl_to_be_mapped_v6, products_mapped, products_etl_to_be_mapped_v7, etl_8)


# ETL 9: Pasta
products_etl_to_be_mapped_v8 = []
pipeline(products_etl_to_be_mapped_v7, products_mapped, products_etl_to_be_mapped_v8, etl_9)


# ETL 10: Goldfish and other Snacks
products_etl_to_be_mapped_v9 = []
pipeline(products_etl_to_be_mapped_v8, products_mapped, products_etl_to_be_mapped_v9, etl_10)


# ETL 11: TastyKake
products_etl_to_be_mapped_v10 = []
pipeline(products_etl_to_be_mapped_v9, products_mapped, products_etl_to_be_mapped_v10, etl_11)


print(len(products_etl_to_be_mapped_v10))
print('Done')

430
Done


In [61]:
for product in products_etl_to_be_mapped_v9:
    print(product['Product Name'])

Jambalaya Rice Mix
Jambalaya Rice Mix
Guacamole Regular
Italian Dressing
Cheeseburger Heat & Serve Sliders
Farmers Pick Concord Grape
Shells & Cheese Microwave Cup
Shells & Cheese Original
Iced Tea Lemon
English Muffins Double Fiber Honey Wheat
English Muffins Hearty Grains 100% Whole Wheat
English Muffins Multi Grain Light
English Muffins Original
English Muffins Original
English Muffins Original Whole Grain With Omega 3
English Muffins Raisin
English Muffins Triple Health
Cake Dreamies Creme Kakes
Cake Kandy Kake Peanut Butter
Cake Koffee Kake Cream Filled
Cupcakes Buttercream
Cupcakes Chocolate
Cupcakes Krimpet Butterscotch
Cupcakes Lemon
Pie Apple
Pie Cherry
Pie Chocolate Klair
Pie Lemon
Pie Peach
Hot Cocoa Mix Marshmallows
Hot Cocoa Mix Milk Chocolate
Hot Cocoa Mix Milk Chocolate Canister
Hot Cocoa Mix Milk Chocolate No Sugar Added
Sunny Delight Original
Orange Citrus Punch Smooth
Orange Citrus Punch Tangy Original
Stuffing Mix Pork
Lasagna With Meat And Sauce Pasta
Sandwich Steak