<a href="https://colab.research.google.com/github/Rama389/AI-Projects/blob/main/Unsupervised_Learning_BigBasket.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load data
all_orders = pd.read_csv("all_except_last_orders.csv")
last_orders_subset = pd.read_csv("last_orders_subset.csv")

# Frequency-based user-item matrix
user_item_matrix = all_orders.groupby(['Member', 'SKU']).size().unstack(fill_value=0)

# Cosine similarity between SKUs
item_similarity = cosine_similarity(user_item_matrix.T)
item_similarity_df = pd.DataFrame(
    item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns
)

# Popular SKUs for fallback
popular_skus = all_orders['SKU'].value_counts().index.tolist()

# Prepare results
recommendations = []
recall_scores = []

grouped = last_orders_subset.groupby("Order")

for order_id, group in grouped:
    member = group['Member'].iloc[0]
    visible_skus = set(group['SKU'])

    # CF score accumulation
    similar_items = pd.Series(dtype=float)
    for sku in visible_skus:
        if sku in item_similarity_df:
            similar = item_similarity_df[sku].drop(labels=visible_skus, errors='ignore')
            similar_items = similar_items.add(similar, fill_value=0)

    # Top CF recommendations
    cf_recs = similar_items.sort_values(ascending=False).index.tolist()

    # Combine CF + popularity to get 5 recs
    final_recs = []
    for sku in cf_recs:
        if sku not in visible_skus and sku not in final_recs:
            final_recs.append(sku)
        if len(final_recs) == 5:
            break
    if len(final_recs) < 5:
        for sku in popular_skus:
            if sku not in visible_skus and sku not in final_recs:
                final_recs.append(sku)
            if len(final_recs) == 5:
                break

    # Calculate recall@5
    full_skus = set(
        all_orders[(all_orders['Order'] == order_id) & (all_orders['Member'] == member)]['SKU']
    )
    forgotten = full_skus - visible_skus
    matched = set(final_recs) & forgotten
    recall_at_5 = len(matched) / len(forgotten) if forgotten else 0.0

    # Store with ID column
    for i, sku in enumerate(final_recs, 1):
        recommendations.append({
            "ID": f"{order_id}_{i}",
            "Member": member,
            "Order": order_id,
            "SKU": sku,
            "Recall@5": recall_at_5
        })

# Create final DataFrame
final_df = pd.DataFrame(recommendations)
final_df.to_csv("GR4_hybrid_rec_5_sets_with_recall.csv", index=False)

# Print overall average recall
print("âœ… File saved: GR1_hybrid_rec_5_sets_with_recall.csv")
print(f"ðŸ“Š Average Recall@5: {final_df[['Order', 'Recall@5']].drop_duplicates()['Recall@5'].mean():.4f}")

âœ… File saved: GR1_hybrid_rec_5_sets_with_recall.csv
ðŸ“Š Average Recall@5: 0.0000
