In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


customers = pd.read_csv("Customers.csv") 
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv") 

# Merge datasets 
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  
    'Quantity': 'sum',   
    'Price_y': 'mean'      
}).reset_index()

# Normalize features using StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['TotalValue', 'Quantity', 'Price_y']])

# Compute cosine similarity scores between customers
similarity_matrix = cosine_similarity(scaled_features)

# Create a dictionary to store the top 3 lookalikes for each customer
lookalikes = {}
for i, customer_id in enumerate(customer_features['CustomerID']):
    similar_indices = similarity_matrix[i].argsort()[-4:-1][::-1]  
    similar_customers = [(customer_features['CustomerID'][j], similarity_matrix[i][j]) for j in similar_indices]
    lookalikes[customer_id] = similar_customers

# Save the lookalike recommendations for the first 20 customers
lookalike_results = {k: lookalikes[k] for k in customer_features['CustomerID'][:20]}
lookalike_df = pd.DataFrame.from_dict(
    lookalike_results, 
    orient='index', 
    columns=['Lookalike1', 'Lookalike2', 'Lookalike3']
)

lookalike_df.to_csv("Charan_GS_Lookalike.csv", index=True)

print("Lookalike recommendations for the first 20 customers have been saved to Charan_GS_Lookalike.csv.")


Index(['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate',
       'Quantity', 'TotalValue', 'Price_x', 'CustomerName', 'Region',
       'SignupDate', 'ProductName', 'Category', 'Price_y'],
      dtype='object')
Lookalike recommendations for the first 20 customers have been saved to Charan_GS_Lookalike.csv.
