In [2]:
import pandas as pd
import difflib

# Load data from Excel file
df = pd.read_excel("Aisle-Mapping.xlsx")

# Preprocessing: fill missing values, lowercase strings, and strip whitespaces
df['Grocery ITEM'] = df['Grocery ITEM'].fillna("").str.lower().str.strip()
df['Aisle Category'] = df['Aisle Category'].fillna("").str.lower().str.strip()

# Initialize a dictionary to store mappings
grocery_to_aisle_mapping = {}

# Iterate over each grocery item
for index, row in df.iterrows():
    grocery_item = row['Grocery ITEM']
    aisle_name = row['Aisle Category']

    # Calculate similarity score between the grocery item and each aisle name
    similarity_scores = {aisle: difflib.SequenceMatcher(None, grocery_item, aisle).ratio() for aisle in df['Aisle Category']}

    # Choose the aisle with the highest similarity score
    mapped_aisle = max(similarity_scores, key=similarity_scores.get)

    # Store the mapping in the dictionary
    grocery_to_aisle_mapping[grocery_item] = mapped_aisle

    # Print the process for each grocery item
    print(f"Grocery item: {grocery_item}")
    print(f"Similarity scores: {similarity_scores}")
    print(f"Mapped aisle: {mapped_aisle}")
    print()

# Print the mappings
print("Final mappings:")
for grocery_item, aisle_name in grocery_to_aisle_mapping.items():
    print(f"{grocery_item} mapped to {aisle_name}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Grocery item: shopping bags
Similarity scores: {'alcohol': 0.2, 'baby': 0.23529411764705882, 'bakery': 0.21052631578947367, 'baking': 0.3157894736842105, 'beverages': 0.36363636363636365, 'candy': 0.1111111111111111, 'canned goods': 0.08, 'cereal': 0.10526315789473684, 'condiments': 0.08695652173913043, 'dairy': 0.1111111111111111, 'frozen': 0.21052631578947367, 'garden': 0.10526315789473684, 'household': 0.2727272727272727, 'meat': 0.11764705882352941, 'misc': 0.11764705882352941, 'organic': 0.2, 'pasta & grains': 0.2962962962962963, 'personal care': 0.38461538461538464, 'pet': 0.125, 'produce': 0.1, 'seafood': 0.2, 'snacks': 0.42105263157894735, 'spices': 0.42105263157894735, '': 0.0}
Mapped aisle: snacks

Grocery item: shopping bags
Similarity scores: {'alcohol': 0.2, 'baby': 0.23529411764705882, 'bakery': 0.21052631578947367, 'baking': 0.3157894736842105, 'beverages': 0.36363636363636365, 'candy': 0.1111111111111111, 