# Task - 2 Lookalike Model

In [10]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Loading the datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Merging the datasets
merged_data = transactions.merge(customers, on="CustomerID", how="left")
merged_data = merged_data.merge(products, on="ProductID", how="left")

# Step 1: Aggregating customer transaction data
customer_data = merged_data.groupby("CustomerID").agg({
    "TotalValue": "sum",       # Total spending
    "Quantity": "sum",         # Total quantity purchased
    "TransactionID": "count"   # Total transactions
}).rename(columns={
    "TotalValue": "TotalSpent",
    "Quantity": "TotalQuantity",
    "TransactionID": "TransactionCount"
}).reset_index()

# Merging aggregated data with customer profile information
customer_profiles = customers.merge(customer_data, on="CustomerID", how="left").fillna(0)

# Step 2: I used Feature scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_profiles[["TotalSpent", "TotalQuantity", "TransactionCount"]])

# Step 3: Computing cosine similarity matrix
similarity_matrix = cosine_similarity(scaled_features)

# Step 4: Defining function to find top 3 similar customers
def find_top_3_similar(customer_index, similarity_matrix, customer_ids):
    similarity_scores = similarity_matrix[customer_index]
    # Excluding the customer itself
    top_3_indices = similarity_scores.argsort()[-4:-1][::-1]
    return [(customer_ids[i], round(similarity_scores[i], 4)) for i in top_3_indices]

# Step 5: Generating a Lookalike recommendations for first 20 customers
customer_ids = customer_profiles["CustomerID"].tolist()
lookalike_results = {
    customer_ids[i]: find_top_3_similar(i, similarity_matrix, customer_ids)
    for i in range(20)  # First 20 customers (C0001 to C0020)
}

# Step 6: Creating a DataFrame for Lookalike recommendations
lookalike_list = [
    {"CustomerID": cust_id, "LookalikeID": similar[0], "SimilarityScore": similar[1]}
    for cust_id, similar_list in lookalike_results.items()
    for similar in similar_list
]

lookalike_df = pd.DataFrame(lookalike_list)

# Saving to CSV
lookalike_df.to_csv("Lookalike.csv", index=False)
print("Lookalike recommendations saved to Lookalike.csv.")


Lookalike recommendations saved to Lookalike.csv.


# Displaying the Lookalike.csv file

In [11]:
lookalike_df = pd.read_csv("Lookalike.csv")
lookalike_df

Unnamed: 0,CustomerID,LookalikeID,SimilarityScore
0,C0001,C0164,0.9999
1,C0001,C0103,0.9959
2,C0001,C0069,0.9833
3,C0002,C0029,0.9998
4,C0002,C0031,0.999
5,C0002,C0077,0.9939
6,C0003,C0176,0.8964
7,C0003,C0027,0.8694
8,C0003,C0010,0.8256
9,C0004,C0075,0.9978
