In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import csv

In [4]:

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")


print(customers.head())
print(products.head())
print(transactions.head())


  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3       

In [5]:

merged_data = pd.merge(transactions, customers, on="CustomerID")
merged_data = pd.merge(merged_data, products, on="ProductID")

customer_profiles = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Category': lambda x: list(x.unique())
}).reset_index()


scaler = StandardScaler()
customer_profiles[['TotalValue', 'Quantity']] = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity']])


print(customer_profiles.head())


  CustomerID  TotalValue  Quantity                             Category
0      C0001   -0.061701 -0.122033     [Books, Home Decor, Electronics]
1      C0002   -0.877744 -0.448000               [Home Decor, Clothing]
2      C0003   -0.405857  0.203934  [Home Decor, Clothing, Electronics]
3      C0004    1.032547  1.670787     [Books, Home Decor, Electronics]
4      C0005   -0.783929 -0.936951            [Home Decor, Electronics]


In [6]:
similarity_matrix = cosine_similarity(customer_profiles[['TotalValue', 'Quantity']])

similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])

print(similarity_df.head())


CustomerID     C0001     C0002     C0003     C0004     C0005     C0006  \
CustomerID                                                               
C0001       1.000000  0.807594  0.002500 -0.996354  0.973989 -0.181691   
C0002       0.807594  1.000000  0.591756 -0.854965  0.920220 -0.726656   
C0003       0.002500  0.591756  1.000000 -0.087809  0.229031 -0.983807   
C0004      -0.996354 -0.854965 -0.087809  1.000000 -0.989770  0.264927   
C0005       0.973989  0.920220  0.229031 -0.989770  1.000000 -0.399791   

CustomerID     C0007     C0008     C0009     C0010  ...     C0191     C0192  \
CustomerID                                          ...                       
C0001       0.995762 -0.993763  0.967313  0.560457  ...  0.965058  0.952317   
C0002       0.858408 -0.736793  0.930746  0.941034  ...  0.933907  0.949021   
C0003       0.094457  0.109029  0.256004  0.829582  ...  0.264447  0.307491   
C0004      -0.999978  0.980625 -0.985421 -0.629073  ... -0.983896 -0.974876   
C0005  

In [7]:

lookalike_recommendations = {}


for customer in customer_profiles['CustomerID'][:20]:

    similar_customers = similarity_df[customer].sort_values(ascending=False).iloc[1:4]


    lookalike_recommendations[customer] = list(zip(similar_customers.index, similar_customers.values))


for customer, recommendations in lookalike_recommendations.items():
    print(f"Customer {customer} Lookalikes:")
    for rec in recommendations:
        print(f"\tSimilar Customer: {rec[0]}, Similarity Score: {rec[1]}")


Customer C0001 Lookalikes:
	Similar Customer: C0085, Similarity Score: 0.9999990504724361
	Similar Customer: C0042, Similarity Score: 0.9998215747742084
	Similar Customer: C0089, Similarity Score: 0.9997850140987701
Customer C0002 Lookalikes:
	Similar Customer: C0157, Similarity Score: 0.9999942410168485
	Similar Customer: C0166, Similarity Score: 0.999875010843091
	Similar Customer: C0029, Similarity Score: 0.9998254255985104
Customer C0003 Lookalikes:
	Similar Customer: C0111, Similarity Score: 0.9940081095432594
	Similar Customer: C0160, Similarity Score: 0.9904545038572361
	Similar Customer: C0147, Similarity Score: 0.9876382719212549
Customer C0004 Lookalikes:
	Similar Customer: C0162, Similarity Score: 0.9999999965087093
	Similar Customer: C0165, Similarity Score: 0.9999594720114721
	Similar Customer: C0090, Similarity Score: 0.9986409558134951
Customer C0005 Lookalikes:
	Similar Customer: C0080, Similarity Score: 0.9999822355480511
	Similar Customer: C0167, Similarity Score: 0.9

In [8]:
# Save the lookalike recommendations to a CSV file
with open("Lookalike.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["CustomerID", "Recommendations"])
    for cust_id, recommendations in lookalike_recommendations.items():
        # Convert recommendations into a string format for CSV saving
        recommendations_str = ', '.join([f"{rec[0]}: {rec[1]:.2f}" for rec in recommendations])
        writer.writerow([cust_id, recommendations_str])

# Check the saved file
print("Lookalike.csv saved successfully.")


Lookalike.csv saved successfully.
