In [5]:
import pandas as pd
import random

# Sample data
user_names = ['Arun', 'Divya', 'Ravi', 'Sneha', 'Karthik', 'Meena', 'Ajay', 'Priya', 'Vikram', 'Anjali']
products = [
    {'product_id': 10, 'name': 'Bluetooth Speaker', 'category': 'Electronics', 'price': 1200},
    {'product_id': 11, 'name': 'Running Shoes', 'category': 'Sports', 'price': 2000},
    {'product_id': 12, 'name': 'Notebook Set', 'category': 'Stationery', 'price': 300},
    {'product_id': 13, 'name': 'Ceramic Mug', 'category': 'Home', 'price': 400},
    {'product_id': 14, 'name': 'LED Table Lamp', 'category': 'Home', 'price': 999},
    {'product_id': 15, 'name': 'Wireless Mouse', 'category': 'Electronics', 'price': 650},
    {'product_id': 16, 'name': 'Water Bottle', 'category': 'Sports', 'price': 350},
    {'product_id': 17, 'name': 'Sketch Pens', 'category': 'Stationery', 'price': 150},
    {'product_id': 18, 'name': 'Smartwatch X2000', 'category': 'Electronics', 'price': 1500},
    {'product_id': 19, 'name': 'Scented Candles', 'category': 'Home', 'price': 550},
    {'product_id': 20, 'name': 'Yoga Mat Pro', 'category': 'Sports', 'price': 700}
]

data = []
for user_id, user_name in enumerate(user_names, start=1):
    budget = random.randint(3000, 10000)
    for _ in range(random.randint(3, 7)):  # 3 to 7 purchases
        product = random.choice(products)
        total_price = product['price']
        data.append({
            'user_id': user_id,
            'user_name': user_name,
            'product_id': product['product_id'],
            'product_name': product['name'],
            'category': product['category'],
            'price': product['price'],
            'total_price': total_price,
            'budget': budget
        })

# Convert to DataFrame and save
df = pd.DataFrame(data)
df.to_csv('synthetic_ecommerce.csv', index=False)
print("CSV file created successfully.")

CSV file created successfully.


In [6]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Load data
df = pd.read_csv('synthetic_ecommerce.csv')

# Step 1: Aggregate features per user
agg = df.groupby('user_name').agg(
    total_spend=('total_price', 'sum'),
    avg_price=('price', 'mean'),
    budget=('budget', 'first'),
    num_products=('product_id', 'nunique')
).reset_index()

# Step 2: Feature scaling
scaler = StandardScaler()
features = scaler.fit_transform(agg[['total_spend', 'avg_price', 'budget', 'num_products']])

# Step 3: K-Means clustering (K=3)
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
agg['cluster'] = kmeans.fit_predict(features)

# Step 4: View cluster profiles
print("\nCluster Profiles:\n", agg.groupby('cluster')[['total_spend', 'budget', 'num_products']].mean())

# Step 5: Recommendation function based on user_name
def recommend(user_name, top_n=3):
    if user_name not in agg['user_name'].values:
        return f"User '{user_name}' not found."

    user_cluster = agg.loc[agg.user_name == user_name, 'cluster'].iloc[0]
    similar_users = agg[agg.cluster == user_cluster]['user_name']
    prods = df[df.user_name.isin(similar_users)]
    
    # Top N most frequent products
    top_products = prods['product_id'].value_counts().head(top_n).index.tolist()
    
    # Get product details
    product_info = df[df['product_id'].isin(top_products)][['product_id', 'product_name', 'category', 'price']].drop_duplicates()
    product_info = product_info.sort_values(by='product_id').reset_index(drop=True)
    
    return product_info

# Example: Recommend for specific user
sample_user = 'Arun'
print(f"\nSample Recommendations for {sample_user}:\n")
print(recommend(user_name=sample_user))


Cluster Profiles:
          total_spend       budget  num_products
cluster                                        
0             2350.0  5053.666667           3.0
1             5059.2  8164.200000           4.6
2             6699.5  4392.500000           4.5

Sample Recommendations for Arun:

   product_id  product_name    category  price
0          12  Notebook Set  Stationery    300
1          13   Ceramic Mug        Home    400
2          20  Yoga Mat Pro      Sports    700
