In [25]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np



In [26]:
# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")


In [27]:
# Merge datasets
data = pd.merge(transactions, customers, on="CustomerID")
data = pd.merge(data, products, on="ProductID")


In [28]:
# Feature Engineering
# Aggregate transaction data for each customer
data['TransactionDate'] = pd.to_datetime(data['TransactionDate'])
customer_features = data.groupby('CustomerID').agg(
    total_value=('TotalValue', 'sum'),
    total_quantity=('Quantity', 'sum'),
    avg_transaction_value=('TotalValue', 'mean'),
    unique_categories=('Category', lambda x: x.nunique()),
    unique_products=('ProductID', lambda x: x.nunique())
).reset_index()


In [29]:
# Filter for the first 20 customers (C0001 - C0020)
filtered_customers = customer_features[customer_features['CustomerID'].isin([f"C{str(i).zfill(4)}" for i in range(1, 21)])]


In [30]:
# Normalize features for similarity calculation
scaler = StandardScaler()
features_normalized = scaler.fit_transform(filtered_customers.iloc[:, 1:])


In [31]:
# Compute cosine similarity
similarity_matrix = cosine_similarity(features_normalized)



In [32]:
# Generate Lookalike Data
lookalike_results = {}
customer_ids = filtered_customers['CustomerID'].values

for i, customer in enumerate(customer_ids):
    # Get similarity scores for the customer
    similarities = similarity_matrix[i]
    similar_customers = [
        (customer_ids[j], similarities[j]) for j in range(len(customer_ids)) if j != i
    ]
    # Sort by similarity score in descending order
    similar_customers = sorted(similar_customers, key=lambda x: x[1], reverse=True)[:3]
    lookalike_results[customer] = similar_customers


In [33]:
# Create Lookalike.csv
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_results.keys(),
    "Lookalikes": [str(value) for value in lookalike_results.values()]
})

# Save Lookalike.csv
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike Model for first 20 customers and Lookalike.csv generated successfully!")

Lookalike Model for first 20 customers and Lookalike.csv generated successfully!
