In [None]:
import pandas as pd
import difflib

# Load data from Excel file
df = pd.read_excel("Aisle-Mapping.xlsx")

# Preprocessing: fill missing values, lowercase strings
df['Grocery ITEM'] = df['Grocery ITEM'].fillna("").str.lower()
df['Aisle Category'] = df['Aisle Category'].fillna("").str.lower()

def map_item_to_aisle(grocery_item, aisle_mapping):
    """Map a grocery item to the aisle name based on similarity."""
    matched_items = difflib.get_close_matches(grocery_item, aisle_mapping['Grocery ITEM'], n=1, cutoff=0.9)
    if matched_items:
        matched_item = matched_items[0]
        aisle_category = aisle_mapping.loc[aisle_mapping['Grocery ITEM'] == matched_item, 'Aisle Category'].iloc[0]
        return aisle_category
    return None

# Filter out rows with missing aisle categories
unmapped_items = df[df['Aisle Category'] == '']

total_unmapped = len(unmapped_items)
print(f"Total unmapped items: {total_unmapped}")

print("Mapping in progress...")

# Iterate over unmapped items and map them
mapped_count = 0
for index, grocery_item in unmapped_items.iterrows():
    original_index = df[df['Grocery ITEM'] == grocery_item['Grocery ITEM']].index[0]
    aisle_category = map_item_to_aisle(grocery_item['Grocery ITEM'], df)
    if aisle_category:
        df.at[original_index, 'Aisle Category'] = aisle_category
        mapped_count += 1
        print(f"Iteration {mapped_count}: {grocery_item['Grocery ITEM']} mapped to {aisle_category}")

# Save the mapped items to a new Excel file
df.to_excel("Mapped-Aisle-Mapping.xlsx", index=False)

print(f"Mapping completed. Total mapped items: {mapped_count}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Iteration 7329: tropical fruit mapped to alcohol
Iteration 7330: whole milk mapped to baby
Iteration 7331: butter milk mapped to seafood
Iteration 7332: rolls/buns mapped to candy
Iteration 7333: whole milk mapped to baby
Iteration 7334: rolls/buns mapped to candy
Iteration 7335: fruit/vegetable juice mapped to organic
Iteration 7336: whole milk mapped to baby
Iteration 7337: butter mapped to misc
Iteration 7338: other vegetables mapped to baking
Iteration 7339: whole milk mapped to baby
Iteration 7340: other vegetables mapped to baking
Iteration 7341: whole milk mapped to baby
Iteration 7342: fruit/vegetable juice mapped to organic
Iteration 7343: rolls/buns mapped to candy
Iteration 7344: other vegetables mapped to baking
Iteration 7345: other vegetables mapped to baking
Iteration 7346: other vegetables mapped to baking
Iteration 7347: pot plants mapped to cereal
Iteration 7348: whole milk mapped to baby
Iteration 7349: