### Task 2: Lookalike Model

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
customers_df = pd.read_csv(r'C:\Users\Bhatta\Downloads\Customers.csv')
products_df = pd.read_csv(r'C:\Users\Bhatta\Downloads\Products.csv')
transactions_df =  pd.read_csv(r'C:\Users\Bhatta\Downloads\Transactions.csv')

In [3]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
# Step 2: Merge Data
transactions_products = pd.merge(transactions_df, products_df, on="ProductID")
full_data = pd.merge(transactions_products, customers_df, on="CustomerID")

In [5]:
# Step 3: Feature Engineering
customer_features = full_data.groupby("CustomerID").agg(
    TotalSpending=("TotalValue", "sum"),
    PurchaseFrequency=("TransactionID", "count"),
    AvgTransactionValue=("TotalValue", "mean"),
    PreferredCategory=("Category", lambda x: x.mode()[0]),  # Most purchased category
).reset_index()

In [6]:
# Include Region from the Customers data
customer_features = pd.merge(customer_features, customers_df[["CustomerID", "Region"]], on="CustomerID")

In [7]:
# Step 4: Preprocessing
categorical_features = ["PreferredCategory", "Region"]
numerical_features = ["TotalSpending", "PurchaseFrequency", "AvgTransactionValue"]

In [8]:
# One-hot encoding for categorical features and scaling for numerical features
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(), categorical_features),])

In [9]:
# Apply the transformations
customer_features_transformed = preprocessor.fit_transform(customer_features)

In [10]:
encoded_columns = (
    numerical_features + list(preprocessor.named_transformers_["cat"].get_feature_names_out(categorical_features))
)
customer_features_encoded = pd.DataFrame(
    customer_features_transformed, columns=encoded_columns, index=customer_features["CustomerID"]
)

In [11]:
# Step 5: Compute Similarity
similarity_matrix = cosine_similarity(customer_features_encoded)
similarity_df = pd.DataFrame(
    similarity_matrix, index=customer_features["CustomerID"], columns=customer_features["CustomerID"]
)

In [12]:
# Step 6: Generate Recommendations
lookalike_map = {}
for cust_id in customer_features["CustomerID"][:20]:  # First 20 customers (C0001 - C0020)
    scores = similarity_df.loc[cust_id]
    top_similar = scores[scores.index != cust_id].nlargest(3)  # Top 3 similar customers
    lookalike_map[cust_id] = list(zip(top_similar.index, top_similar.values))

In [17]:
lookalike_df = pd.DataFrame.from_dict(lookalike_map, orient="index")
lookalike_df.to_csv("Dhanush_Udupa_Lookalike.csv", header=False)

In [14]:
# Output
print("Lookalike Map for Customers C0001 - C0020:")
print(lookalike_map)

Lookalike Map for Customers C0001 - C0020:
{'C0001': [('C0190', 0.968215451295126), ('C0048', 0.9410720811249149), ('C0181', 0.9090457490236504)], 'C0002': [('C0088', 0.9663574397998078), ('C0134', 0.9417092227435143), ('C0106', 0.897417999902512)], 'C0003': [('C0052', 0.9847977904024425), ('C0152', 0.9262643125643458), ('C0031', 0.8908812314103669)], 'C0004': [('C0165', 0.9711437718179058), ('C0155', 0.9620829055324883), ('C0169', 0.8873437226586512)], 'C0005': [('C0186', 0.9787905419345101), ('C0146', 0.9598508342465527), ('C0007', 0.9047531872453538)], 'C0006': [('C0168', 0.9732537429499296), ('C0171', 0.9513382554181677), ('C0187', 0.9447449444739173)], 'C0007': [('C0140', 0.9764156563035168), ('C0115', 0.9342226712579291), ('C0005', 0.9047531872453538)], 'C0008': [('C0109', 0.8700104206236069), ('C0139', 0.8117685282119976), ('C0098', 0.7892991816476466)], 'C0009': [('C0010', 0.9760669630706748), ('C0198', 0.9520351377564203), ('C0062', 0.9308169602315751)], 'C0010': [('C0009', 0.

In [15]:
lookalike_map_df1=pd.DataFrame(lookalike_map)

In [16]:
lookalike_map_df1

Unnamed: 0,C0001,C0002,C0003,C0004,C0005,C0006,C0007,C0008,C0009,C0010,C0011,C0012,C0013,C0014,C0015,C0016,C0017,C0018,C0019,C0020
0,"(C0190, 0.968215451295126)","(C0088, 0.9663574397998078)","(C0052, 0.9847977904024425)","(C0165, 0.9711437718179058)","(C0186, 0.9787905419345101)","(C0168, 0.9732537429499296)","(C0140, 0.9764156563035168)","(C0109, 0.8700104206236069)","(C0010, 0.9760669630706748)","(C0009, 0.9760669630706748)","(C0137, 0.9611944701177748)","(C0104, 0.9659896786844346)","(C0099, 0.9855644363688847)","(C0060, 0.9763044912298495)","(C0036, 0.980365127065939)","(C0183, 0.9998750016492791)","(C0075, 0.9694478848397573)","(C0117, 0.947351865308591)","(C0121, 0.902523629526074)","(C0050, 0.8471955082476953)"
1,"(C0048, 0.9410720811249149)","(C0134, 0.9417092227435143)","(C0152, 0.9262643125643458)","(C0155, 0.9620829055324883)","(C0146, 0.9598508342465527)","(C0171, 0.9513382554181677)","(C0115, 0.9342226712579291)","(C0139, 0.8117685282119976)","(C0198, 0.9520351377564203)","(C0111, 0.9708504756032534)","(C0169, 0.9203951647051838)","(C0113, 0.9266666313281656)","(C0108, 0.9198465159324428)","(C0151, 0.9084845085220972)","(C0131, 0.9746260478193932)","(C0067, 0.9168655516177493)","(C0081, 0.8567347350005199)","(C0185, 0.8362687678528234)","(C0081, 0.83132258252199)","(C0140, 0.8359805626087087)"
2,"(C0181, 0.9090457490236504)","(C0106, 0.897417999902512)","(C0031, 0.8908812314103669)","(C0169, 0.8873437226586512)","(C0007, 0.9047531872453538)","(C0187, 0.9447449444739173)","(C0005, 0.9047531872453538)","(C0098, 0.7892991816476466)","(C0062, 0.9308169602315751)","(C0103, 0.9552968720724648)","(C0126, 0.9178664272707295)","(C0195, 0.907840343168503)","(C0141, 0.8387719059266356)","(C0097, 0.9035777546704962)","(C0058, 0.8355728389274786)","(C0042, 0.7522632809302456)","(C0057, 0.8453541123524987)","(C0046, 0.8257522302304171)","(C0119, 0.7808868770746169)","(C0130, 0.7625116428321821)"
