# Task-2

## Importing Required Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics.pairwise import cosine_similarity

## Load Datasets

In [2]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

## Merge Datasets

In [3]:
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")
merged_data.drop('Price_y', axis=1) 
merged_data = merged_data.rename(columns={'Price_x': 'Price'})

## Building the Lookalike Model

In [4]:
def create_customer_feature():
    # Aggregate customer-level features
    customer_features = merged_data.groupby('CustomerID').agg({
        'Region' : 'first',
        'TotalValue' : ['sum', 'mean', 'count'],
        'Quantity' : ['sum', 'mean'],
        'Price' : ['mean', 'max'],
        'Category' : lambda x: x.mode().iloc[0] if len(x) > 0 else 'Unknown'
    }).reset_index()
    
    customer_features.columns = [
        'CustomerID', 'Region', 'TotalSpend', 'AvgTransactionValue', 'TransactionCount',
        'TotalQuantity', 'AvgQuantity', 'AvgPrice', 'MaxPrice', 'TopCategory'
    ]
    
    return customer_features

In [5]:
def build_similarity_model():
    customer_features = create_customer_feature()
    
    # Preprocessing for numerical and categorical features
    preprocessor = ColumnTransformer(
        transformers = [
                ('num', StandardScaler(), ['TotalSpend', 'AvgTransactionValue', 'TransactionCount',
                                            'TotalQuantity', 'AvgQuantity', 'AvgPrice', 'MaxPrice']),
                ('cat', OneHotEncoder(handle_unknown = 'ignore'), ['Region', 'TopCategory'])
        ])
    return preprocessor, customer_features

In [6]:
def calculate_lookalikes(target_customer_id, top_n=3):
    # Build preprocessing pipeline
    preprocessor, customer_features = build_similarity_model()
    # Prepare feature matrix
    X = customer_features.drop('CustomerID', axis=1)
    
    # Transform features
    X_transformed = preprocessor.fit_transform(X)
    
    # Find target customer's transformed features
    target_index = customer_features[customer_features['CustomerID'] == target_customer_id].index[0]
    target_features = X_transformed[target_index].reshape(1, -1)
    
    # Calculate cosine similarity
    from sklearn.metrics.pairwise import cosine_similarity
    similarities = cosine_similarity(target_features, X_transformed)[0]
    
    # Remove self-similarity and get top N
    similarities[target_index] = -1  # Exclude self
    top_similar_indices = similarities.argsort()[-top_n-1:-1][::-1]
    
    # Prepare lookalike results
    lookalikes = [
        (customer_features.iloc[idx]['CustomerID'], similarities[idx])
        for idx in top_similar_indices
    ]
    
    return lookalikes

In [7]:
# Generate Lookalike Recommendations
lookalike_results = {}
for customer_id in customers['CustomerID'][:20]:
    lookalikes = calculate_lookalikes(customer_id)
    lookalike_results[customer_id] = lookalikes

In [8]:
# Create lookalike.csv
lookalike_df = pd.DataFrame(
    [(cust, lookalike[0], lookalike[1]) for cust, lookalikes in lookalike_results.items()
        for lookalike in lookalikes],
        columns = ['TargetCustomer', 'LookalikeCustomer', 'SimilarityScore']
)

lookalike_df.to_csv('Lookalike.csv', index = False)
print("Lookalike model completed.\nResult saved in Lookalike.csv")

Lookalike model completed.
Result saved in Lookalike.csv


## Test Case

In [9]:
# Test-Cases
target_customer_id = input("Enter Target Customer ID: ")
lookalikes = calculate_lookalikes(target_customer_id)
i=1
print("Target Customer ID : ", target_customer_id)
for lookalike in lookalikes:
    print(f"{i}. lookalike : {lookalike}")
    i += 1
    print()

Enter Target Customer ID: C0012
Target Customer ID :  C0012
1. lookalike : ('C0113', 0.8950479373034015)

2. lookalike : ('C0163', 0.8619963594927427)

3. lookalike : ('C0065', 0.836805824506756)

