In [1]:
#Importing Necessary Libraries

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
#uploading datasets

transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')
customers = pd.read_csv('Customers.csv')

In [3]:
# Merge datasets
transactions_products = transactions.merge(products, on="ProductID", how="left")
full_data = transactions_products.merge(customers, on="CustomerID", how="left")

In [4]:
# Aggregate transaction history by customer
customer_transactions = full_data.groupby("CustomerID").agg(
    total_spent=("TotalValue", "sum"),
    total_quantity=("Quantity", "sum"),
    product_preference=("ProductID", lambda x: x.mode()[0] if not x.mode().empty else None)
).reset_index()

In [5]:
# Merge with customer profiles
customer_profiles = customers.merge(customer_transactions, on="CustomerID", how="left")

In [6]:
# One-hot encode categorical features (Region and Product Preference)
customer_profiles_encoded = pd.get_dummies(customer_profiles, columns=["Region", "product_preference"], drop_first=True)

In [7]:
# Fill missing values with 0 (e.g., for customers with no transactions)
customer_profiles_encoded = customer_profiles_encoded.fillna(0)

In [8]:
# Standardize features for similarity calculation
scaler = StandardScaler()
customer_features = scaler.fit_transform(customer_profiles_encoded.drop(columns=["CustomerID", "CustomerName", "SignupDate"]))

In [9]:
# Compute cosine similarity
similarity_matrix = cosine_similarity(customer_features)

In [10]:
# Map customer IDs to their index in the matrix
customer_id_to_index = {cid: idx for idx, cid in enumerate(customer_profiles_encoded["CustomerID"])}

In [11]:
# Get the top 3 similar customers for a given customer
lookalike_results = {}
for customer_id in customers["CustomerID"][:20]:  # First 20 customers (C0001 - C0020)
    idx = customer_id_to_index[customer_id]
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Top 3 similar customers (excluding itself)
    top_lookalikes = [
        (customers.iloc[sim[0]]["CustomerID"], sim[1])
        for sim in similarity_scores[1:4]  # Skip the first as it's the customer itself
    ]
    lookalike_results[customer_id] = top_lookalikes

In [13]:
# Save results to Lookalike.csv
lookalike_data = []
for cust_id, lookalikes in lookalike_results.items():
    lookalike_data.append({
        "cust_id": cust_id,
        "lookalikes": [f"{lid}:{score:.2f}" for lid, score in lookalikes]
    })

lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike model completed and results saved to Lookalike.csv.")

Lookalike model completed and results saved to Lookalike.csv.


In [14]:
# Print results
for index, row in lookalike_df.iterrows():
    print(f"Customer ID: {row['cust_id']}, Lookalikes: {row['lookalikes']}")

print("Lookalike model completed and results saved to Lookalike.csv.")

Customer ID: C0001, Lookalikes: ['C0025:0.98', 'C0140:0.88', 'C0097:0.82']
Customer ID: C0002, Lookalikes: ['C0164:0.93', 'C0030:0.93', 'C0173:0.92']
Customer ID: C0003, Lookalikes: ['C0181:1.00', 'C0031:0.99', 'C0186:0.91']
Customer ID: C0004, Lookalikes: ['C0175:0.92', 'C0022:0.89', 'C0182:0.82']
Customer ID: C0005, Lookalikes: ['C0149:0.94', 'C0023:0.93', 'C0180:0.17']
Customer ID: C0006, Lookalikes: ['C0040:0.95', 'C0114:0.89', 'C0058:0.84']
Customer ID: C0007, Lookalikes: ['C0112:0.96', 'C0009:0.95', 'C0180:0.13']
Customer ID: C0008, Lookalikes: ['C0030:0.98', 'C0173:0.93', 'C0002:0.89']
Customer ID: C0009, Lookalikes: ['C0007:0.95', 'C0112:0.92', 'C0019:0.26']
Customer ID: C0010, Lookalikes: ['C0034:0.94', 'C0019:0.19', 'C0105:0.12']
Customer ID: C0011, Lookalikes: ['C0171:0.99', 'C0073:0.90', 'C0131:0.89']
Customer ID: C0012, Lookalikes: ['C0133:0.98', 'C0128:0.88', 'C0191:0.14']
Customer ID: C0013, Lookalikes: ['C0021:0.97', 'C0101:0.97', 'C0191:0.14']
Customer ID: C0014, Looka