In [2]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

In [4]:
# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Inspect the merged data
print("Merged Dataset:\n", merged_data.head())

Merged Dataset:
   TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue  Price_x     CustomerName         Region  SignupDate  \
0      300.68   300.68   Andrea Jenkins         Europe  2022-12-03   
1      300.68   300.68  Brittany Harvey           Asia  2024-09-04   
2      300.68   300.68  Kathryn Stevens         Europe  2024-04-04   
3      601.36   300.68  Travis Campbell  South America  2024-04-11   
4      902.04   300.68    Timothy Perez         Europe  2022-03-15   

                       ProductName     Category  Price_y  
0  ComfortLiving Bluetooth Speaker  Electronics   300.68  
1  ComfortLiving 

In [5]:
# Aggregate transaction data
customer_transactions = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',        # Total spending
    'Quantity': 'sum',          # Total quantity purchased
    'ProductID': 'count'        # Total number of transactions
}).rename(columns={
    'TotalValue': 'TotalSpending',
    'Quantity': 'TotalQuantity',
    'ProductID': 'TransactionCount'
}).reset_index()

# Merge with customer demographic data
lookalike_data = pd.merge(customers, customer_transactions, on='CustomerID', how='left')

# Fill missing values
lookalike_data.fillna(0, inplace=True)

# Encode categorical variables
lookalike_data = pd.get_dummies(lookalike_data, columns=['Region'], drop_first=True)

# Standardize numerical features
scaler = StandardScaler()
numerical_features = ['TotalSpending', 'TotalQuantity', 'TransactionCount']
lookalike_data[numerical_features] = scaler.fit_transform(lookalike_data[numerical_features])


In [6]:
# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(lookalike_data.drop(['CustomerID', 'CustomerName', 'SignupDate'], axis=1))

# Convert to DataFrame for better interpretation
similarity_df = pd.DataFrame(similarity_matrix, index=lookalike_data['CustomerID'], columns=lookalike_data['CustomerID'])


In [7]:
# Create recommendations
lookalike_results = {}

for customer_id in lookalike_data['CustomerID']:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Exclude self
    lookalike_results[customer_id] = list(similar_customers.index) + list(similar_customers.values)

# Format as DataFrame
lookalike_output = pd.DataFrame({
    "CustomerID": lookalike_results.keys(),
    "SimilarCustomers": [v[:3] for v in lookalike_results.values()],
    "Scores": [v[3:] for v in lookalike_results.values()]
})


In [12]:
# Save to CSV
lookalike_output.to_csv('Nishu_Rajput_Lookalike.csv', index=False)