In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

<h1>1. Load and preprocess the data</h1>


In [14]:
def load_data(file_path):
    df = pd.read_csv(file_path)
    product_map = df[['ProductCode', 'ItemName']].drop_duplicates().set_index('ProductCode')['ItemName'].to_dict()

    # Create user-item matrix (rows: customers, columns: products)
    user_item_matrix = df.pivot_table(
        index='CustomerID',
        columns='ProductCode',
        values='Quantity',
        aggfunc='sum',
        fill_value=0
    ).astype(np.float32)

    return csr_matrix(user_item_matrix.values), user_item_matrix.index, user_item_matrix.columns, product_map

# Using only relevant columns

In [8]:
def train_model(matrix, components=40):
    svd = TruncatedSVD(n_components=components, random_state=42)
    item_factors = svd.fit_transform(matrix.T)  # Items in rows
    return svd, item_factors

def compute_similarities(item_factors):
    return cosine_similarity(item_factors)

# Aggregate quantities if there are multiple purchases of same item by same customer

In [9]:
# 3. Recommendation Functions
def personalized_recommendations(customer_id, user_index, sparse_matrix, similarity_matrix, items, product_map, top_n=10):
    try:
        customer_idx = list(user_index).index(customer_id)
        purchased_indices = sparse_matrix[customer_idx].indices
        scores = similarity_matrix[purchased_indices].sum(axis=0)
        scores[purchased_indices] = -np.inf  # Exclude purchased

        top_indices = np.argsort(-scores)[:top_n]
        return [(items[i], product_map.get(items[i], "Unknown"), scores[i])
                for i in top_indices if scores[i] > -np.inf]
    except ValueError:
        return []

def basket_recommendations(basket_items, similarity_matrix, items, product_map, top_n=5):
    """Get recommendations based on current basket items"""
    try:
        # Convert basket items to indices (handle both string and integer codes)
        basket_indices = []
        for item in basket_items:
            item_str = str(item)  # Ensure string comparison
            if item_str in items:
                basket_indices.append(list(items).index(item_str))

        if not basket_indices:
            print("No valid basket items found in product catalog")
            return []

        # Aggregate similarities from all basket items
        scores = similarity_matrix[basket_indices].sum(axis=0)

        # Create mask for purchased items
        purchased_mask = np.zeros_like(scores, dtype=bool)
        purchased_mask[basket_indices] = True

        # Get top recommendations excluding basket items
        valid_scores = scores[~purchased_mask]
        valid_indices = np.argsort(-valid_scores)[:top_n]

        # Map back to original indices
        all_indices = np.arange(len(items))
        valid_global_indices = all_indices[~purchased_mask][valid_indices]

        return [
            (items[i], product_map.get(items[i], "Unknown Product"), scores[i])
            for i in valid_global_indices if scores[i] > 0  # Only show positive associations
        ]

    except Exception as e:
        print(f"Error generating recommendations: {str(e)}")
        return []

# Example usage for customer

In [11]:
# 4. Main Execution
if __name__ == "__main__":
    # Load data
    sparse_matrix, customers, items, product_map = load_data("customer_transactions_coles.csv")

    # Train model
    svd, item_factors = train_model(sparse_matrix)

    # Compute similarities
    item_similarity = compute_similarities(item_factors)

    # Example 1: Personalized recommendations
    customer_id = "C1001"  # Replace with actual customer ID
    personal_recs = personalized_recommendations(customer_id, customers, sparse_matrix,
                                               item_similarity, items, product_map)
    print(f"\nPersonalized recommendations for {customer_id}:")
    for i, (code, name, score) in enumerate(personal_recs, 1):
        print(f"{i}. {name} ({code}) | Relevance: {score:.3f}")


Personalized recommendations for C1001:
1. Coles RSPCA Approved Chicken Wing ... approx 1.1kg (2021544) | Relevance: 136.524
2. Coles Simply Honey BBQ Sausage | 560g (5406818) | Relevance: 135.563
3. Coles Deli Thawed Cooked Prawn Plate Small | 200g (6487531) | Relevance: 135.303
4. Coles Beef Chuck Casserole Steak | 850g (6332331) | Relevance: 134.575
5. Coles Hot Smoked Salmon Natural | 150g (3582081) | Relevance: 134.517
6. Coles Finest Pork Sage And Black Pepper Sausa ... 500g (5928507) | Relevance: 133.780
7. Coles Graze Grassfed Beef Diced Steak | 500g (2024393) | Relevance: 133.549
8. Coles No Added Hormone Beef Bones | approx 950g (2062155) | Relevance: 133.261
9. Inglewood Organic Chicken Nibbles | 700g (7275903) | Relevance: 132.925
10. Coles Family Classics Made Easy Slow Cooked Pork ... 800g (3966210) | Relevance: 132.687
