# Lookalike Model
This notebook builds a lookalike model to recommend 3 similar customers based on their profile and transaction history.

In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load the datasets
customers = pd.read_csv('/mnt/data/Customers.csv')
products = pd.read_csv('/mnt/data/Products.csv')
transactions = pd.read_excel('/mnt/data/Transactions.xlsx')

# Merge the datasets
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")


In [None]:

# Feature Engineering
customer_features = merged_data.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum",
    "Price": "mean",
    "Region": lambda x: x.mode()[0] if len(x.mode()) > 0 else np.nan
}).reset_index()

# Encode the 'Region' column
customer_features = pd.get_dummies(customer_features, columns=["Region"], drop_first=True)


In [None]:

# Normalize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])

# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features["CustomerID"], columns=customer_features["CustomerID"])


In [None]:

# Generate lookalike recommendations for the first 20 customers
lookalike_results = {}

for customer_id in customer_features["CustomerID"].head(20):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    lookalike_results[customer_id] = [(idx, round(score, 2)) for idx, score in similar_customers.items()]

# Save the results to a CSV file
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_results.keys(),
    "Lookalikes": [str(v) for v in lookalike_results.values()]
})

lookalike_df.to_csv('/mnt/data/Bhavadharani_Haribabu_Lookalike.csv', index=False)


In [None]:

# Display the Lookalike results
lookalike_df.head()
