<a href="https://colab.research.google.com/github/Rama389/AI-Projects/blob/main/Unsupervisied_Learning_Did_you_Forget_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Build a “Did You Forget” system

In [None]:
#Loading Required Libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
#Loading the required Datasets
all_orders = pd.read_csv("all_except_last_orders.csv")
last_orders_subset = pd.read_csv("last_orders_subset.csv")

print("✅ Datasets loaded successfully.")
print(f"All orders: {all_orders.shape}")
print(f"Last orders subset: {last_orders_subset.shape}")


✅ Datasets loaded successfully.
All orders: (28984, 5)
Last orders subset: (5487, 5)


In [None]:
#Creating a user matrix
# Step 2: Create a User-Item Matrix
# This matrix shows which Member has purchased which SKU in the past
user_item_matrix = (
    all_orders.groupby(['Member', 'SKU'])
    .size()                            # Count purchases
    .unstack(fill_value=0)            # Convert to matrix form
)

# Convert counts to binary (1 = purchased, 0 = not purchased)
user_item_matrix = user_item_matrix.applymap(lambda x: 1 if x > 0 else 0)

print("✅ User-item matrix created.")
user_item_matrix.head()

  user_item_matrix = user_item_matrix.applymap(lambda x: 1 if x > 0 else 0)


✅ User-item matrix created.


SKU,6884195,7541573,7543241,7547271,7547296,7547323,7548497,7548498,7548511,7548730,...,93141092,93141093,93156751,93174226,93176429,93176430,93176431,93289485,93289486,93289487
Member,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SSCEHNS,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SSCESNS,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SSCEWZO,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SSCHNCE,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SSCLCSW,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Step 3: Calculate Item-Item Similarity Matrix
# Using Cosine Similarity between items (columns)
item_similarity = cosine_similarity(user_item_matrix.T)

# Create a DataFrame to hold similarity scores
item_similarity_df = pd.DataFrame(
    item_similarity,
    index=user_item_matrix.columns,
    columns=user_item_matrix.columns
)

print("✅ Item-item similarity matrix computed.")

✅ Item-item similarity matrix computed.


In [None]:
# Step 4: Generate Top-5 Recommendations Per Order in last_orders_subset.csv

recommendations = []

# Group by Order to work on each customer's last (incomplete) order
grouped_last_orders = last_orders_subset.groupby('Order')

for order_id, group in grouped_last_orders:
    member = group['Member'].iloc[0]       # Get member ID
    current_skus = group['SKU'].tolist()   # Get SKUs in their last order

    # Prepare a series to accumulate similarity scores from known SKUs
    similar_items = pd.Series(dtype=float)

    for sku in current_skus:
        # Only recommend from known SKUs in training data
        if sku in item_similarity_df:
            # Drop items already purchased in this order
            similar = item_similarity_df[sku].drop(labels=current_skus, errors='ignore')
            similar_items = similar_items.add(similar, fill_value=0)

    # Get Top-5 recommended SKUs
    top_5 = similar_items.sort_values(ascending=False).head(5).index.tolist()

    # Append results in required format
    for sku in top_5:
        recommendations.append({
            'Member': member,
            'Order': order_id,
            'SKU': sku
        })

print("✅ Recommendations generated for all orders.")

✅ Recommendations generated for all orders.


In [None]:
# Step 5: Create Output DataFrame and Save
recommendation_df = pd.DataFrame(recommendations)

# Save to CSV with required naming convention
output_file = "GR4_rec_5_sets.csv"
recommendation_df.to_csv(output_file, index=False)

print(f"✅ Recommendation file saved: {output_file}")

✅ Recommendation file saved: GR4_rec_5_sets.csv


In [None]:
# Step 6: Compute Recall@5 per order
recall_records = []

# Re-group last_orders_subset to compare with true last orders
grouped_last_orders = last_orders_subset.groupby('Order')

for order_id, group in grouped_last_orders:
    member = group['Member'].iloc[0]
    visible_skus = set(group['SKU'].tolist())

    # Get full list of SKUs from the same order from all_orders
    full_skus = set(
        all_orders[
            (all_orders['Order'] == order_id) & (all_orders['Member'] == member)
        ]['SKU'].tolist()
    )

    forgotten_skus = full_skus - visible_skus

    # Get predicted SKUs from earlier recommendations
    predicted_skus = set(
        recommendation_df[
            recommendation_df['Order'] == order_id
        ]['SKU'].tolist()
    )

    if forgotten_skus:
        recall = len(predicted_skus & forgotten_skus) / len(forgotten_skus)
    else:
        recall = 0.0

    recall_records.append({
        'Order': order_id,
        'Member': member,
        'Recall@5': recall
    })

# Convert to DataFrame
recall_df = pd.DataFrame(recall_records)

# Merge with recommendations
final_df = recommendation_df.merge(recall_df, on=['Order', 'Member'])

# Save to CSV
final_df.to_csv("GR5_rec_5_sets_with_recall.csv", index=False)