In [32]:
# Task 2: Lookalike Model

In [34]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [36]:
# Load the datasets
customers = pd.read_csv(r"C:\eCommerce\Customers.csv")
products = pd.read_csv(r"C:\eCommerce\Products.csv")
transactions = pd.read_csv(r"C:\eCommerce\Transactions.csv")

In [38]:
# Load the datasets by given drive link
customers_url = "https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE"
products_url = "https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0"
transactions_url = "https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF"


In [40]:
customers = pd.read_csv(customers_url)
products = pd.read_csv(products_url)
transactions = pd.read_csv(transactions_url)

In [41]:
# Data preprocessing
# Aggregate transaction data by CustomerID
customer_transactions = transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    total_quantity=('Quantity', 'sum'),
    total_transactions=('TransactionID', 'count')
).reset_index()

In [44]:
# Merge with customer profiles
customer_profiles = pd.merge(customers, customer_transactions, on='CustomerID', how='left')
customer_profiles.fillna(0, inplace=True)

In [46]:
# Standardize the data
scaler = StandardScaler()
profile_features = ['total_spent', 'total_quantity', 'total_transactions']
customer_profiles_scaled = customer_profiles.copy()
customer_profiles_scaled[profile_features] = scaler.fit_transform(customer_profiles[profile_features])

In [48]:
# Compute similarity scores
similarity_matrix = cosine_similarity(customer_profiles_scaled[profile_features])
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])


In [50]:
# Generate lookalike recommendations for the first 20 customers
lookalike_recommendations = {}
for customer_id in customer_profiles['CustomerID'][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]
    lookalike_recommendations[customer_id] = [(sim_customer, round(score, 4)) for sim_customer, score in similar_customers.items()]

In [52]:
# Save the lookalike recommendations to a CSV file
lookalike_output = []
for customer, recommendations in lookalike_recommendations.items():
    for rec_customer, score in recommendations:
        lookalike_output.append({'CustomerID': customer, 'SimilarCustomerID': rec_customer, 'SimilarityScore': score})

In [54]:
lookalike_df = pd.DataFrame(lookalike_output)
lookalike_df.to_csv('Lookalike.csv', index=False)


In [56]:
# Display the recommendations
print("Lookalike Recommendations:")
print(lookalike_df.head(20))


Lookalike Recommendations:
   CustomerID SimilarCustomerID  SimilarityScore
0       C0001             C0164           0.9999
1       C0001             C0103           0.9959
2       C0001             C0069           0.9833
3       C0002             C0029           0.9998
4       C0002             C0031           0.9990
5       C0002             C0077           0.9939
6       C0003             C0176           0.8964
7       C0003             C0027           0.8694
8       C0003             C0010           0.8256
9       C0004             C0075           0.9978
10      C0004             C0165           0.9945
11      C0004             C0113           0.9941
12      C0005             C0123           0.9998
13      C0005             C0131           0.9996
14      C0005             C0058           0.9996
15      C0006             C0079           0.9999
16      C0006             C0117           0.9897
17      C0006             C0196           0.9480
18      C0007             C0125           

In [58]:
# Save recommendations in the specified map format
lookalike_map = {customer: recs for customer, recs in lookalike_recommendations.items()}
print("\nLookalike Map for first 20 customers:")
print(lookalike_map)


Lookalike Map for first 20 customers:
{'C0001': [('C0164', 0.9999), ('C0103', 0.9959), ('C0069', 0.9833)], 'C0002': [('C0029', 0.9998), ('C0031', 0.999), ('C0077', 0.9939)], 'C0003': [('C0176', 0.8964), ('C0027', 0.8694), ('C0010', 0.8256)], 'C0004': [('C0075', 0.9978), ('C0165', 0.9945), ('C0113', 0.9941)], 'C0005': [('C0123', 0.9998), ('C0131', 0.9996), ('C0058', 0.9996)], 'C0006': [('C0079', 0.9999), ('C0117', 0.9897), ('C0196', 0.948)], 'C0007': [('C0125', 0.998), ('C0140', 0.9979), ('C0092', 0.9978)], 'C0008': [('C0179', 0.9982), ('C0081', 0.9964), ('C0084', 0.993)], 'C0009': [('C0192', 0.9987), ('C0083', 0.9956), ('C0177', 0.9933)], 'C0010': [('C0142', 0.964), ('C0029', 0.9552), ('C0027', 0.9528)], 'C0011': [('C0096', 0.9993), ('C0171', 0.998), ('C0170', 0.9937)], 'C0012': [('C0068', 0.9996), ('C0065', 0.9992), ('C0102', 0.998)], 'C0013': [('C0188', 0.9994), ('C0059', 0.9992), ('C0022', 0.999)], 'C0014': [('C0033', 0.9996), ('C0186', 0.9995), ('C0095', 0.9994)], 'C0015': [('C005