In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv("/content/Customers.csv")
transactions = pd.read_csv("/content/Transactions.csv")
products = pd.read_csv("/content/Products.csv")


In [7]:
# Preprocess data
# Summarize transaction history for each customer
customer_transactions = transactions.groupby('CustomerID').agg({
    'Quantity': 'sum',
    'TotalValue': 'sum'
}).reset_index()

# Merge customer profiles with transaction history
customer_profiles = customers.merge(customer_transactions, on='CustomerID', how='left').fillna(0)

# Encode categorical features
customer_profiles = pd.get_dummies(customer_profiles, columns=['Region'], drop_first=True)

# Standardize numerical features
scaler = StandardScaler()
customer_profiles_scaled = scaler.fit_transform(customer_profiles.drop(columns=['CustomerID', 'CustomerName', 'SignupDate']))


In [8]:
# Compute similarity scores using cosine similarity
similarity_matrix = cosine_similarity(customer_profiles_scaled)

# Generate lookalike recommendations
lookalike_results = {}
customer_ids = customer_profiles['CustomerID'].tolist()

for i, customer_id in enumerate(customer_ids[:20]):  # First 20 customers
    # Get similarity scores for the current customer
    scores = similarity_matrix[i]
    # Get the top 3 most similar customers (excluding self)
    similar_indices = scores.argsort()[::-1][1:4]
    similar_customers = [(customer_ids[idx], round(scores[idx], 3)) for idx in similar_indices]
    # Add to results
    lookalike_results[customer_id] = similar_customers


In [9]:

# Save the lookalike results to a CSV file
lookalike_df = pd.DataFrame([
    {'CustomerID': customer_id,
     'Lookalikes': str(lookalikes)}
    for customer_id, lookalikes in lookalike_results.items()
])

lookalike_df.to_csv("Madhav_K_Lookalike.csv", index=False)


In [10]:
# Display the lookalike results
print("Lookalike Recommendations for the First 20 Customers:")
print(lookalike_df)

Lookalike Recommendations for the First 20 Customers:
   CustomerID                                         Lookalikes
0       C0001  [('C0107', 0.996), ('C0137', 0.996), ('C0184',...
1       C0002  [('C0088', 0.996), ('C0142', 0.988), ('C0159',...
2       C0003  [('C0147', 0.998), ('C0190', 0.997), ('C0174',...
3       C0004  [('C0113', 0.994), ('C0102', 0.98), ('C0169', ...
4       C0005  [('C0186', 0.997), ('C0159', 0.996), ('C0140',...
5       C0006  [('C0048', 0.994), ('C0126', 0.991), ('C0187',...
6       C0007  [('C0146', 1.0), ('C0178', 0.994), ('C0177', 0...
7       C0008  [('C0018', 0.984), ('C0122', 0.962), ('C0046',...
8       C0009  [('C0198', 1.0), ('C0014', 0.997), ('C0063', 0...
9       C0010  [('C0019', 0.991), ('C0073', 0.985), ('C0166',...
10      C0011  [('C0107', 0.995), ('C0048', 0.995), ('C0001',...
11      C0012  [('C0148', 0.996), ('C0163', 0.994), ('C0155',...
12      C0013  [('C0163', 0.996), ('C0148', 0.994), ('C0155',...
13      C0014  [('C0060', 0.999), ('