## Product Mapping v2
### Anthony Ung

#### Some Jupyter things you need to be aware of ...
#### 
#### As long as you run the cells in the correct order, the mapping of the products table is idempotent.
#### If you want to run an individual cell, you need to restart the kernel.
#### Go to "Kernel" > "Restart Kernel and Run up to Selected Cell..."

In [1]:
import csv
import re

In [2]:
products_old = []
PRODUCTS_MAPPED = []
PRODUCT_CLASSES_NEW = []

# Read the product and product classes files.
with open('Products1.txt', 'r') as csvfile:

    csv.register_dialect('piper', delimiter='|', quoting=csv.QUOTE_NONE)
    for row in csv.DictReader(csvfile, dialect='piper'):
        products_old.append(row)
        
with open('product_class.txt', 'r') as csvfile:
    csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)
    
    for row in csv.DictReader(csvfile, dialect='tab'):
        PRODUCT_CLASSES_NEW.append(row)

In [3]:
class DEBUG:
    def print_product_classes():
        print("product_class_id|product_subcategory|product_category|product_department|product_family")
        for product in PRODUCT_CLASSES_NEW:
            print(f"{product['product_class_id']}|{product['product_subcategory']}|{product['product_category']}|{product['product_department']}|{product['product_family']}")

    def product_dump(product_arr):
        with open('products_to_be_mapped.csv', 'w', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=product_arr[0].keys())

            writer.writeheader()
            for product in product_arr:
                writer.writerow(product)
    

### A utility function that invokes some ETL code on our behalf

The convention:  
- `func` - Contains ETL code to be invoked on our behalf.
- `src` - The source array
- `dst1` - The destination array for products successfully mapped
- `dst2` - The destination array for products not successfully mapped.

When creating a definition for `func`, the names `src`, `dst1`, and `dst2` have no meaning to the caller.

Each updated product needs to have the following fields:
- `product_class_id` - The code of the new product class
- `meta_code` - A unique ID.
- `meta_mapped_by` - The initials of the person who mapped the product (eg. AU, SJ, GK, AB, NB, etc.)
- `meta_reason` - The reason why this product was mapped (e.g. from a character match, from a specific manufacturer, etc.)

In [4]:
def pipeline(func, src, dst1, dst2):
    func(src, dst1, dst2)

def update_product(product, product_class_id, code, mapped_by, reason):
    product['product_class_id'] = product_class_id
    product['meta_code'] = code
    product['meta_mapped_by'] = mapped_by
    product['meta_reason'] = reason

#### Slide 9 stipulates that every product must have a key that will be mapped to our dimension table.

In [5]:
def generate_surrogate_key(src, dst1=None, dst2=None):
    product_id = 1

    for product in src:
        product['product_id'] = product_id
        product_id += 1

generate_surrogate_key(products_old)


### Slide 17 stipulates that we have specific suppliers.

In [6]:
def generate_suppliers(src, dst1=None, dst2=None):
    for product in src:
        if product['itemType'] == 'Milk':
            product['Supplier'] = 'Rowan Dairy'
        else:
            product['Supplier'] = 'Rowan Warehouse'
            
generate_suppliers(products_old)


### Some useful conventions in this cell:

Array names in all caps indicate that either (1) this array shall only be appended to, or (2) this array should not be modified at all.
`PRODUCTS_MAPPED` is Type 1. `PRODUCT_CLASSES_NEW` is Type 2.

In [12]:
def natural_mapping(src, dst1, dst2):
    '''
        Disallow duplicate product classes
        Used the following linux command to identify duplicates
            cat product_class.txt | cut -f 2 | sort | uniq -c | sort -r | head
    ''';
    product_subcategories = {}
    for subcategory in PRODUCT_CLASSES_NEW:
        if((subcategory['product_subcategory'] != 'Coffee') \
           and (subcategory['product_subcategory'] != 'Cleaners')):

            product_subcategories[subcategory['product_subcategory']] = subcategory ['product_class_id']

    '''
        Resolve a duplicate and verified by hand to use the smaller of the two
    '''
    product_subcategories['Fresh Vegetables'] = 13

    for product in src:
        if product['itemType'] in product_subcategories.keys():
            update_product( \
                product=product, \
                product_class_id = product_subcategories[product['itemType']], \
                code = 1, \
                mapped_by = 'AU', \
                reason = 'Mapped from old item type into new subcategory')
            dst1.append(product)
        else:
            dst2.append(product)

    for product in dst2[:]:
        if 'Frito Lay' in product['Manufacturer'] or 'Pringles' in product['Manufacturer'] or 'Crisps' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 12, \
                code = 2, \
                mapped_by = 'SJ', \
                reason = 'All Frito Lay and Pringles items are chips, and all crisps are chips')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'O Organics' in product['Manufacturer'] or 'Safeway Kitchens' in product['Manufacturer'] or 'Folgers' in product['Manufacturer'] or 'Starbucks' in product['Manufacturer'] and 'Coffee' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 7, \
                code = 3, \
                mapped_by = 'SJ', \
                reason = 'These manufacturers are only for coffee. starbucks has hot chocolate too so a separate check is needed')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Coffee' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 7, \
                code = 4, \
                mapped_by = 'SJ', \
                reason = 'Remainder of coffee by character match')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Goldfish' in product['Product Name'] or 'Wheat Thins' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 82, \
                code = 5, \
                mapped_by = 'SJ', \
                reason = 'Goldfish cracker character match')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Dressing' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 48, \
                code = 6, \
                mapped_by = 'SJ', \
                reason = 'dressing character match as a sauce')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Barilla' in product['Manufacturer']:
            update_product( \
                product=product, \
                product_class_id = 5, \
                code = 7, \
                mapped_by = 'SJ', \
                reason = 'barilla manufacturer for only pasta')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Bushs' in product['Manufacturer']:
            update_product( \
                product=product, \
                product_class_id = 62, \
                code = 8, \
                mapped_by = 'SJ', \
                reason = 'bushs only sells baked beans, which is a canned vegetable')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Oregon' in product['Manufacturer']:
            update_product( \
                product=product, \
                product_class_id = 62, \
                code = 9, \
                mapped_by = 'SJ', \
                reason = 'manufacturer only makes one item')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Donut' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 84, \
                code = 10, \
                mapped_by = 'SJ', \
                reason = 'character match Donut')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Bagel' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 25, \
                code = 11, \
                mapped_by = 'SJ', \
                reason = 'character match for bagels')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Eggs' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 62, \
                code = 12, \
                mapped_by = 'SJ', \
                reason = 'character match for eggs')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Syrup' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 48, \
                code = 13, \
                mapped_by = 'SJ', \
                reason = 'Sauce is best fit for syrup')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Chips Ahoy' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 45, \
                code = 14, \
                mapped_by = 'SJ', \
                reason = 'character match for cookies')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Waffles' in product['Product Name'] or 'Wafflers' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 48, \
                code = 15, \
                mapped_by = 'SJ', \
                reason = 'character match for waffles')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Juice' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 30, \
                code = 16, \
                mapped_by = 'SJ', \
                reason = 'character match for juice')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Tastykake' in product['Manufacturer']:
            update_product( \
                product=product, \
                product_class_id = 84, \
                code = 17, \
                mapped_by = 'SJ', \
                reason = 'manufacturer of donut-like products')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Pepperidge Farm' in product['Manufacturer'] and 'Stuffing' not in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 84, \
                code = 18, \
                mapped_by = 'SJ', \
                reason = 'manufacturer of bread')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Hamburger Helper' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 4, \
                code = 19, \
                mapped_by = 'SJ', \
                reason = 'character match hamburger helper')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Muffin' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 26, \
                code = 20, \
                mapped_by = 'SJ', \
                reason = 'character match muffin')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Sauce' in product['Product Name']:
            update_product( \
                product=product, \
                product_class_id = 48, \
                code = 21, \
                mapped_by = 'SJ', \
                reason = 'character match Sauce')
            dst1.append(product)
            dst2.remove(product)

    for product in dst2[:]:
        if 'Powerade' in product['Manufacturer']:
            update_product( \
                product=product, \
                product_class_id = 52, \
                code = 22, \
                mapped_by = 'SJ', \
                reason = 'manufacturer only makes flavored drinks')
            dst1.append(product)
            dst2.remove(product)
            
    
    for product in dst2[:]:
        pname = product.get('Product Name', '').lower()
        manuf = product.get('Manufacturer', '').lower()

        if manuf == "zatarains" and "jambalaya rice mix" in pname:
            update_product(
                product=product,
                product_class_id=57,
                code=23,
                mapped_by='RK',
                reason='Zatarains Jambalaya Rice Mix mapped to Rice'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf == "yucatan" and "guacamole regular" in pname:
            update_product(
                product=product,
                product_class_id=83,
                code=24,
                mapped_by='RK',
                reason='Yucatan Guacamole mapped to Dips'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf == "white castle" and "cheeseburger heat & serve sliders" in pname:
            update_product(
                product=product,
                product_class_id=0,
                code=25,
                mapped_by='RK',
                reason='White Castle Sliders (custom category: Frozen Entrees)'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf == "welchs" and "farmers pick concord grape" in pname:
            update_product(
                product=product,
                product_class_id=30,
                code=26,
                mapped_by='RK',
                reason='Welchs Concord Grape mapped to Juice'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf == "velveeta" and "shells & cheese" in pname:
            update_product(
                product=product,
                product_class_id=11,
                code=27,
                mapped_by='RK',
                reason='Velveeta Shells & Cheese mapped to Cheese'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf == "swiss miss" and "hot cocoa mix" in pname:
            update_product(
                product=product,
                product_class_id=51,
                code=28,
                mapped_by='RK',
                reason='Swiss Miss Hot Cocoa Mix mapped to Hot Beverages'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf == "sunset" and "cleaner" in pname:
            update_product(
                product=product,
                product_class_id=21,
                code=29,
                mapped_by='RK',
                reason='Sunset cleaner mapped to Cleaners'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf in ["sunny delight drinks", "sunny d"] and "sunny delight" in pname:
            update_product(
                product=product,
                product_class_id=52,
                code=30,
                mapped_by='RK',
                reason='Sunny D mapped to Flavored Drinks'
            )
            dst1.append(product)
            dst2.remove(product)

        elif manuf in ["sun-maid", "sunbeam", "stroehmann"] and "bread" in pname:
            update_product(
                product=product,
                product_class_id=27,
                code=31,
                mapped_by='RK',
                reason='Bread product mapped to Sliced Bread'
            )
            dst1.append(product)
            dst2.remove(product)

    import json
    try:
        with open("full_mapping_rules_all.json", "r") as f:
            mapping_rules = json.load(f)

        code_counter = 1000
        for product in dst2[:]:
            pname = product.get('Product Name', '').lower()
            manuf = product.get('Manufacturer', '').lower()
            matched = False

            for rule in mapping_rules:
                if all(kw in pname for kw in rule['keywords']) and rule['manufacturer'] in manuf:
                    update_product(
                        product=product,
                        product_class_id=rule['class_id'],
                        code=code_counter,
                        mapped_by='RK',
                        reason=rule['reason']
                    )
                    dst1.append(product)
                    dst2.remove(product)
                    code_counter += 1
                    matched = True
                    break  # stop checking rules if one matches
    except FileNotFoundError:
        print("Warning: full_mapping_rules.json not found. JSON-based mapping skipped.")

        
Products_To_Be_Mapped = []
natural_mapping(products_old, PRODUCTS_MAPPED, Products_To_Be_Mapped)

In [13]:
print(len(Products_To_Be_Mapped))

for i in Products_To_Be_Mapped:
    print(i['Manufacturer'] + ' ' + i['Product Name'])

DEBUG.product_dump(Products_To_Be_Mapped)

1
All Laundry Detergent 2X Ultra Free Clear
