# Sales Analyzer & Price Comparator Notebook

In [1]:

import pandas as pd

# Load the CSV files
woolies_df = pd.read_csv('customer_transactions_woolies.csv')
coles_df = pd.read_csv('customer_transactions_coles.csv')


## Keyword-Based Sales Analyzer

In [2]:

def keyword_sales_summary(keyword, woolies_df, coles_df):
    
    # Combine both store data into one DataFrame and label the source store
    all_data = pd.concat([woolies_df.assign(Store="Woolworths"), coles_df.assign(Store="Coles")])
    
    # Filter the combined data for items that contain the keyword in their name (case-insensitive)
    filtered = all_data[all_data["ItemName"].str.contains(keyword, case=False, na=False)]
    
# Group the filtered data by item name and summarize:
    # - Total quantity sold
    # - Number of unique customers who bought it
    # - Number of different stores it appeared in
    summary = (
        filtered.groupby("ItemName")
        .agg(
            TotalSold=("Quantity", "sum"),
            UniqueCustomers=("CustomerID", pd.Series.nunique),
            StoreCount=("Store", "nunique")
        )
        .sort_values("TotalSold", ascending=False) # Sort items by how much was sold, descending
        .reset_index()
    )
    
    # If the summary is not empty, mark the top-selling item
    if not summary.empty:
        summary["Remark"] = ""  # Create a new column to annotate
        summary.loc[0, "Remark"] = "Most Sold"   # Flag the top item as the most sold
    
    # Save the result to a CSV file
    summary.to_csv("keyword_sales_summary.csv", index=False)
    return summary


In [3]:
# Prompt the user to enter a keyword to search products by name
keyword = input("Enter a keyword to analyze product sales: ").strip()

# Call the function to analyze products that match the keyword and get the summary DataFrame
summary_df = keyword_sales_summary(keyword, woolies_df, coles_df)

# Display the keyword and corresponding summary
print(f"Results for keyword: '{keyword}'")
print(summary_df)

# Suggestion to compare partially matched products across stores
# Extract all item names that matched the keyword search
matching_products = summary_df["ItemName"].tolist()

# Print each matching product to guide the user for further comparison or selection
print("\nProducts matching the keyword across stores:")
for product in matching_products:
    print("-", product)


Results for keyword: 'eggs'
                                             ItemName  TotalSold  \
0                               Awona Quail Eggs 400g         48   
1                Coles Free Range Eggs 12 Pack | 700g         35   
2             Sunny Queen Organic Eggs 12 pack | 700g         31   
3                 Coles Cage Free Eggs 18 Pack | 900g         29   
4     Fyfe Family Farm Free Range Eggs 12 Pack | 700g         25   
5          Coles Jumbo Free Range Eggs 12 Pack | 800g         24   
6   Yallamundi Farm Organic Free Range Eggs 12 Pac...         23   
7   Yallamundi Farm Organic Free Range Eggs 12 Pac...         23   
8          Good Yolk Co Cage Free Eggs 12 Pack | 700g         23   
9   Surf Coast Cracking Good Ultimate Free Range E...         22   
10  Sunny Queen Organic Free Range Large Eggs 6 pa...         22   
11    Sunny Queen Cage Free Large Eggs 18 pack | 900g         22   
12                          Smoked Eggs 6 Pack | 350g         21   
13  Sunny Queen Free

## Price Comparison for a Specific Product

In [4]:

def compare_prices(product_name, woolies_df, coles_df):
    # Normalize column names to avoid KeyErrors due to hidden whitespace or casing
    woolies_df.columns = woolies_df.columns.str.strip()
    coles_df.columns = coles_df.columns.str.strip()

    # Add Store column and combine both DataFrames
    woolies = woolies_df.assign(Store="Woolworths")
    coles = coles_df.assign(Store="Coles")
    combined = pd.concat([woolies, coles])

    # Check for 'ItemName' column
    if 'ItemName' not in combined.columns or 'UnitPrice' not in combined.columns:
        print("Required columns 'ItemName' or 'UnitPrice' are missing.")
        print(f"Available columns: {combined.columns.tolist()}")
        return

    # Perform case-insensitive partial matching
    matches = combined[combined["ItemName"].str.contains(product_name, case=False, na=False)]
    
    # Handle the case where no products matched the input keyword
    if matches.empty:
        print("No matching products found.")
        return

    print(f" Found {len(matches)} matching entries. Grouping by product and store...")

    # Group and sort by price
    grouped = (
        matches.groupby(["ItemName", "Store"])["UnitPrice"]
        .min()            # Get the minimum price per product/store combo
        .reset_index()    # Flatten the groupby object into a DataFrame
        .sort_values("UnitPrice")       # Sort from cheapest to most expensive
    )

    # Print cheapest option
    cheapest = grouped.iloc[0]   # Get the top row — the cheapest entry after sorting
    
    # Display the best match details clearly
    print(f"\n Cheapest match is:\n'{cheapest['ItemName']}' at {cheapest['Store']} for ${cheapest['UnitPrice']:.2f}")
    
    # Also display all the grouped results so the user can compare other options
    print("\n All matches:")
    print(grouped)


In [5]:
#Prompt the user to enter part of the product name for flexible price comparison
product_name = input("Enter the exact product name for price comparison: ").strip()
compare_prices(product_name, woolies_df, coles_df) #Run the price comparison using the updated compare_prices function


 Found 4390 matching entries. Grouping by product and store...

 Cheapest match is:
'Double Phoenix Vermicelli Bean Noodles | 100g' at Coles for $0.85

 All matches:
                                              ItemName       Store  UnitPrice
42       Double Phoenix Vermicelli Bean Noodles | 100g       Coles       0.85
242         Wai Wai Noodles Instant Tom Yum Shrimp 60g  Woolworths       0.85
243        Wai Wai Noodles Rice Vermicelli Instant 55g  Woolworths       0.85
244               Wai Wai Oriental Instant Noodles 60g  Woolworths       0.85
245       Wai Wai Oriental Style Instant Noodles | 60g       Coles       0.85
..                                                 ...         ...        ...
107  Maggi 2 Minute Beef Flavour Noodles 12 Pack | ...       Coles      10.00
225  The Han Kitchen Flaming Chicken Carbonara Nood...  Woolworths      10.50
226  The Han Kitchen Flaming Chicken Stir Fried Noo...  Woolworths      10.50
0    A-sha Mandarin Style Noodles With Onion Sauce ...