
# Store-Specific Product Comparison Tool

1. Select a store and get the best-selling products.
2. Save the best-sellers to CSV.
3. Prompt the user to input a product name.
4. Search for that product across all stores and export results to CSV.
5. Let the user choose **which stores** to compare.
6. Display the **cheapest product** if available.


In [10]:

import pandas as pd
from fuzzywuzzy import process, fuzz
import matplotlib.pyplot as plt


# Helper to normalize item names
def clean_item_name(name):
    if isinstance(name, str):
        return name.lower().strip()
    return name

# Load all datasets
file_paths = {
    "IGA": "IGA 1.csv",
    "Foodland": "foodland_balaklava.csv",
    "Woolworths": "woolworths_cleaned.csv",
    "Coles": "coles_synthetic_dataset.csv"
    
}
dataframes = {k: pd.read_csv(v) for k, v in file_paths.items()}


In [11]:
# Ask the user to enter a product name for partial match searching
product_query = input("Enter product name to search: ").lower().strip()

# Function to convert a price column to numeric and drop rows with invalid/missing values
def clean_price_column(df, column):
    df[column] = pd.to_numeric(df[column], errors="coerce")  # Convert prices to numeric, coerce errors to NaN
    return df.dropna(subset=[column])  # Drop rows where the price is NaN

# Prepare a list to hold all matched rows from all stores
matched_rows = []

# Iterate through each store's DataFrame in the dictionary `dataframes`
for store, df in dataframes.items():
    # Dynamically detect column name for product and price across different formats
    name_col = "Item Name" if "Item Name" in df.columns else "Product Name"
    price_col = (
        "Item Price" if "Item Price" in df.columns else
        "Discounted Price" if "Discounted Price" in df.columns else
        "Best Price"
    )

    # Clean item name column if needed (could apply further normalization here)
    df[name_col] = df[name_col].apply(clean_item_name)  # Optional: a cleaning function you may define

    # Clean the price column (convert to numeric and drop invalids)
    df = clean_price_column(df, price_col)

    # Filter rows where the item name contains the search keyword (case-insensitive)
    matches = df[df[name_col].str.contains(product_query, na=False)].copy()

    # Add store name to each matching row
    matches["Store"] = store

    # Keep only relevant columns and rename them uniformly
    matches = matches[[name_col, price_col, "Store"]]
    matches.columns = ["Product Name", "Price", "Store"]

    # Append this store's matches to the results list
    matched_rows.append(matches)

# Combine matches from all stores into one DataFrame
matched_df = pd.concat(matched_rows, ignore_index=True)

# Save the final matched results to a CSV file
matched_df.to_csv("matching_products.csv", index=False)

# Print a confirmation message and return the result
print("Matching products saved to matching_products.csv")
matched_df


Matching products saved to matching_products.csv


Unnamed: 0,Product Name,Price,Store
0,woolworths scone homestyle buttermilk 6 pack,6.00,Woolworths
1,woolworths soft & glazed milk buns 4 pack,5.50,Woolworths
2,woolworths southern style buttermilk chicken t...,9.00,Woolworths
3,woolworths buttermilk tennessee style chicken ...,8.50,Woolworths
4,thomas dux french washed rind sheep's milk bri...,90.00,Woolworths
...,...,...,...
9175,whittaker's block chocolate creamy milk block ...,8.00,Coles
9176,whittaker's block chocolate milk creamy cara ....,8.00,Coles
9177,whittaker's original peanut slab milk chocolat...,1.25,Coles
9178,whittaker's toasted coconut slab milk chocolat...,1.25,Coles


In [14]:

# Show all matched products
print("Available products for comparison:")
print(matched_df["Product Name"].unique())

# Ask which product to compare
compare_product = input("Enter the exact product name to compare: ").lower().strip()

# Ask which stores to include in comparison
available_stores = matched_df["Store"].unique()
print(f"Available stores: {', '.join(available_stores)}")
selected_stores = input("Enter comma-separated stores to compare (e.g. IGA,Foodland,Coles,Woolworths): ").split(",")

# Filter data
filtered = matched_df[
    (matched_df["Product Name"].str.lower() == compare_product) &
    (matched_df["Store"].isin([s.strip() for s in selected_stores]))
]

# Show cheapest or not found
if filtered.empty:
    print("The specified product was not found in the selected stores.")
else:
    cheapest = filtered.loc[filtered["Price"].idxmin()]
    print(f"Cheapest match found:Store: {cheapest['Store']}, Product: {cheapest['Product Name']}, Price: ${cheapest['Price']}")


Available products for comparison:
['woolworths scone homestyle buttermilk 6 pack'
 'woolworths soft & glazed milk buns 4 pack'
 'woolworths southern style buttermilk chicken thigh fillet burgers 350g'
 ... "whittaker's original peanut slab milk chocolate bar | 50g"
 "whittaker's toasted coconut slab milk chocolate bar | 50g"
 "whittaker's west coast buttermilk caramelised ... 100g"]
Available stores: Woolworths, Coles
The specified product was not found in the selected stores.


In [13]:
# Generate top pricing matches across all stores for the searched product
# Reuse product_query from earlier step
store_results = []

for store, df in dataframes.items():
    name_col = "Item Name" if "Item Name" in df.columns else "Product Name"
    price_col = "Item Price" if "Item Price" in df.columns else "Discounted Price" if "Discounted Price" in df.columns else "Best Price"
    
    df[name_col] = df[name_col].apply(clean_item_name)
    df = clean_price_column(df, price_col)
    
    # Get top 5 cheapest matches that contain the product query
    matched = df[df[name_col].str.contains(product_query, na=False)].copy()
    matched["Store"] = store
    matched = matched.sort_values(price_col).head(5)
    matched = matched[[name_col, price_col, "Store"]]
    matched.columns = ["Product Name", "Price", "Store"]
    store_results.append(matched)

# Concatenate all top matched results
top_prices_df = pd.concat(store_results, ignore_index=True)
top_prices_df.to_csv("top_store_prices.csv", index=False)
print("Top matching store prices saved to top_store_prices.csv")
top_prices_df


Top matching store prices saved to top_store_prices.csv


Unnamed: 0,Product Name,Price,Store
0,essentials coconut milk 400ml,0.95,Woolworths
1,essentials coconut milk 400ml,0.95,Woolworths
2,essentials coconut milk 400ml,0.95,Woolworths
3,essentials coconut milk 400ml,0.95,Woolworths
4,essentials coconut milk 400ml,0.95,Woolworths
5,coles coconut milk | 400ml,0.95,Coles
6,coles coconut milk | 400ml,0.95,Coles
7,coles coconut milk | 400ml,0.95,Coles
8,coles coconut milk | 400ml,0.95,Coles
9,coles coconut milk | 400ml,0.95,Coles
