# Task 2: Lookalike Model
Build a Lookalike Model that takes a user's information as input and recommends 3 similar
customers based on their profile and transaction history. The model should:  
● Use both customer and product information.   
● Assign a similarity score to each recommended customer.

In [3]:
# Lookalike Model Development for eCommerce Dataset

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("/content/Customers.csv")
products = pd.read_csv("/content/Products.csv")
transactions = pd.read_csv("/content/Transactions.csv")

# Merge datasets for comprehensive analysis
merged_data = transactions.merge(customers, on="CustomerID", how="left").merge(products, on="ProductID", how="left")

# Feature engineering
# Check for necessary columns
if "Price" not in merged_data.columns:
    merged_data["Price"] = merged_data["TotalValue"] / merged_data["Quantity"]  # Calculate Price if not present

# Aggregating transaction data for each customer
customer_profiles = merged_data.groupby("CustomerID").agg(
    {
        "TotalValue": "sum",  # Total spending
        "Quantity": "sum",    # Total quantity purchased
        "Price": "mean",      # Average price of products purchased
        "Category": lambda x: x.mode()[0] if len(x.mode()) > 0 else "Unknown"  # Most frequent category
    }
).reset_index()

# Encoding categorical data
customer_profiles = pd.get_dummies(customer_profiles, columns=["Category"], drop_first=True)

# Standardizing numerical features
scaler = StandardScaler()
numerical_features = ["TotalValue", "Quantity", "Price"]
customer_profiles[numerical_features] = scaler.fit_transform(customer_profiles[numerical_features])

# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_profiles.drop("CustomerID", axis=1))

# Generate lookalikes for first 20 customers
lookalike_map = {}
for idx in range(20):  # First 20 customers (C0001 - C0020)
    customer_id = customer_profiles.iloc[idx]["CustomerID"]
    similarities = list(enumerate(similarity_matrix[idx]))
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[1:4]  # Top 3 lookalikes
    lookalikes = [(customer_profiles.iloc[i]["CustomerID"], score) for i, score in similarities]
    lookalike_map[customer_id] = lookalikes

# Create Lookalike.csv
lookalike_list = []
for cust_id, lookalikes in lookalike_map.items():
    for similar_cust_id, score in lookalikes:
        lookalike_list.append({
            "CustomerID": cust_id,
            "SimilarCustomerID": similar_cust_id,
            "SimilarityScore": score
        })

lookalike_df = pd.DataFrame(lookalike_list)
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike model completed and saved to Lookalike.csv!")


Lookalike model completed and saved to Lookalike.csv!
