# Task 2:Lookalike model Implementation

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [3]:
#datasets
customers = pd.read_csv("./datasets/Customers.csv")
products = pd.read_csv("./datasets/Products.csv")
transactions = pd.read_csv("./datasets/Transactions.csv")

Merging datasets to create a comprehensive dataset

In [4]:
customer_transactions = pd.merge(transactions, customers, on='CustomerID')
data = pd.merge(customer_transactions, products, on='ProductID')

Feature engineering: Aggregating transaction history per customer

In [6]:
customer_profiles = data.groupby('CustomerID').agg({
    'Region': lambda x: x.mode()[0],  # Most common region
    'Category': lambda x: x.mode()[0],  # Most common product category
    'TotalValue': 'mean',  # Average transaction value
    'Quantity': 'sum'      # Total quantity purchased
}).reset_index()

Encoding categorical variables

In [8]:
customer_profiles_encoded = pd.get_dummies(customer_profiles, columns=['Region', 'Category'])


Normalizing numerical features

In [9]:
scaler = StandardScaler()
numerical_features = ['TotalValue', 'Quantity']
customer_profiles_encoded[numerical_features] = scaler.fit_transform(customer_profiles_encoded[numerical_features])

In [10]:
similarity_matrix = cosine_similarity(customer_profiles_encoded.iloc[:, 1:])

### Function to get top 3 similar customers

In [11]:
def get_similar_customers(customer_id, similarity_matrix, customer_ids):
    index = customer_ids.index(customer_id)
    similarity_scores = list(enumerate(similarity_matrix[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_3 = similarity_scores[1:4]  # Exclude the customer themselves (index 0)
    return [(customer_ids[i[0]], i[1]) for i in top_3]

Getting top 3 lookalikes for customers C0001 to C0020

In [12]:
customer_ids = customer_profiles['CustomerID'].tolist()
lookalike_results = {}
for customer_id in customer_ids[:20]:
    lookalike_results[customer_id] = get_similar_customers(customer_id, similarity_matrix, customer_ids)

### Creating Lookalike.csv

In [15]:
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_results.keys()),
    'Lookalikes': [', '.join([f"{cust_id} (Score: {score:.2f})" for cust_id, score in lookalike_results[customer_id]]) for customer_id in lookalike_results.keys()]
})

lookalike_df.to_csv("Anand_Gupta_Lookalike.csv", index=False)
print("Lookalike model completed successfully. Results saved to Anand_Gupta_Lookalike.csv.")

Lookalike model completed successfully. Results saved to Anand_Gupta_Lookalike.csv.
