In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers = pd.read_csv(r'C:\Users\hp\Downloads\Customers.csv')
products = pd.read_csv(r'C:\Users\hp\Downloads\Products.csv')
transactions = pd.read_csv(r'C:\Users\hp\Downloads\Transactions.csv')

# Merge datasets on CustomerID and ProductID
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Feature Engineering
customer_features = merged_data.groupby('CustomerID').agg({
    'Quantity': 'sum',           # Total quantity purchased
    'TotalValue': 'sum',         # Total spending
    'TransactionDate': 'count'   # Number of transactions
}).reset_index()

# Standardizing 
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features[['Quantity', 'TotalValue', 'TransactionDate']])

# similarity scores
similarity_matrix = cosine_similarity(features_scaled)

similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# Lookalike Recommendations
lookalike_recommendations = {}

for customer_id in customer_features['CustomerID'].head(20):  # For first 20 customers
    similar_scores = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Top 3 similar customers
    lookalike_recommendations[customer_id] = list(zip(similar_scores.index, similar_scores.values))

lookalike_list = []
for cust_id, recommendations in lookalike_recommendations.items():
    cust_map = {"cust_id": cust_id, "lookalikes": []}
    for rec in recommendations:
        cust_map["lookalikes"].append({"similar_cust_id": rec[0], "similarity_score": rec[1]})
    lookalike_list.append(cust_map)

lookalike_df = pd.DataFrame(lookalike_list)

lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to Lookalike.csv")


Lookalike recommendations saved to Lookalike.csv
