**Loading And Merging The Datasets**

In [None]:
import pandas as pd
import numpy as np
# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')
# Merge datasets
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')
print("Merged Data Preview:")
print(merged_data.head())


Merged Data Preview:
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue  Price_x     CustomerName         Region  SignupDate  \
0      300.68   300.68   Andrea Jenkins         Europe  2022-12-03   
1      300.68   300.68  Brittany Harvey           Asia  2024-09-04   
2      300.68   300.68  Kathryn Stevens         Europe  2024-04-04   
3      601.36   300.68  Travis Campbell  South America  2024-04-11   
4      902.04   300.68    Timothy Perez         Europe  2022-03-15   

                       ProductName     Category  Price_y  
0  ComfortLiving Bluetooth Speaker  Electronics   300.68  
1  ComfortLiv

**Creating A Customer Profile**

In [None]:
customer_profile = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum', # Total spending
    'Quantity': 'sum',   # Total quantity purchased
}).reset_index()
region_dummies = pd.get_dummies(customers.set_index('CustomerID')['Region'], prefix='Region')
customer_profile = customer_profile.join(region_dummies, on='CustomerID')
product_preferences = merged_data.groupby(['CustomerID', 'Category']).size().unstack(fill_value=0)
customer_profile = customer_profile.join(product_preferences, on='CustomerID')
print("Customer Profile Preview:")
print(customer_profile.head())


Customer Profile Preview:
  CustomerID  TotalValue  Quantity  Region_Asia  Region_Europe  \
0      C0001     3354.52        12        False          False   
1      C0002     1862.74        10         True          False   
2      C0003     2725.38        14        False          False   
3      C0004     5354.88        23        False          False   
4      C0005     2034.24         7         True          False   

   Region_North America  Region_South America  Books  Clothing  Electronics  \
0                 False                  True      1         0            3   
1                 False                 False      0         2            0   
2                 False                  True      0         1            1   
3                 False                  True      3         0            2   
4                 False                 False      0         0            2   

   Home Decor  
0           1  
1           2  
2           2  
3           3  
4           1  


**Cosine similarity To Check Similarity Between Two Customers**

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
features = customer_profile.drop(columns=['CustomerID'])
similarity_matrix = cosine_similarity(features)
customer_ids = customer_profile['CustomerID']

**Generating Recommendations For The First 20 Customers**

In [None]:
lookalike_recommendations = {}
for i, customer_id in enumerate(customer_ids[:20]):
    similarities = list(enumerate(similarity_matrix[i]))
    sorted_similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[1:4]
    similar_customers = [(customer_ids[j], round(score, 4)) for j, score in sorted_similarities]
    lookalike_recommendations[customer_id] = similar_customers
for customer, recommendations in lookalike_recommendations.items():
    print(f"Customer {customer} Lookalikes: {recommendations}")


Customer C0001 Lookalikes: [('C0068', 1.0), ('C0045', 1.0), ('C0120', 1.0)]
Customer C0002 Lookalikes: [('C0134', 1.0), ('C0043', 1.0), ('C0062', 1.0)]
Customer C0003 Lookalikes: [('C0031', 1.0), ('C0113', 1.0), ('C0190', 1.0)]
Customer C0004 Lookalikes: [('C0113', 1.0), ('C0017', 1.0), ('C0039', 1.0)]
Customer C0005 Lookalikes: [('C0007', 1.0), ('C0146', 1.0), ('C0127', 1.0)]
Customer C0006 Lookalikes: [('C0082', 1.0), ('C0079', 1.0), ('C0185', 1.0)]
Customer C0007 Lookalikes: [('C0146', 1.0), ('C0005', 1.0), ('C0140', 1.0)]
Customer C0008 Lookalikes: [('C0181', 1.0), ('C0086', 1.0), ('C0166', 1.0)]
Customer C0009 Lookalikes: [('C0198', 1.0), ('C0092', 1.0), ('C0061', 1.0)]
Customer C0010 Lookalikes: [('C0111', 1.0), ('C0049', 1.0), ('C0109', 1.0)]
Customer C0011 Lookalikes: [('C0153', 1.0), ('C0087', 1.0), ('C0155', 1.0)]
Customer C0012 Lookalikes: [('C0065', 1.0), ('C0179', 1.0), ('C0104', 1.0)]
Customer C0013 Lookalikes: [('C0188', 1.0), ('C0183', 1.0), ('C0107', 1.0)]
Customer C00

**Saving Recomendations To Lookalike.CSV**

In [None]:
lookalike_data = []
for customer_id, recommendations in lookalike_recommendations.items():
    for similar_customer, score in recommendations:
        lookalike_data.append({
            'CustomerID': customer_id,
            'SimilarCustomerID': similar_customer,
            'SimilarityScore': score
        })
lookalike_df = pd.DataFrame(lookalike_data)
#Saving to Csv File
lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike recommendations saved to 'Lookalike.csv'")


Lookalike recommendations saved to 'Lookalike.csv'
