In [36]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [37]:
customers = pd.read_csv("C:/Users/Charan/Desktop/zeotap/Customers.csv")
products = pd.read_csv("C:/Users/Charan/Desktop/zeotap/Products.csv")
transactions = pd.read_csv("C:/Users/Charan/Desktop/zeotap/Transactions.csv")

In [38]:
# Merge datasets for analysis
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [39]:
# Feature Engineering: Creating a customer profile
# Aggregate the data by customer
customer_profile = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total money spent
    'Quantity': 'sum',  # Total quantity bought
    'Category': lambda x: x.mode()[0],  # Most frequent product category
}).reset_index()

In [40]:
# Normalize the numerical features for similarity calculation
scaler = StandardScaler()
numerical_features = customer_profile[['TotalValue', 'Quantity']]
customer_profile[['TotalValue', 'Quantity']] = scaler.fit_transform(numerical_features)

In [41]:
# Creating a simplified customer profile: Region and Category (encoded)
customer_profile = customer_profile.merge(customers[['CustomerID', 'Region']], on='CustomerID')

In [42]:
# One-hot encode the 'Region' and 'Category' columns
customer_profile = pd.get_dummies(customer_profile, columns=['Region', 'Category'])


In [43]:
# Calculate similarity using Cosine Similarity
profile_matrix = customer_profile.drop(columns=['CustomerID'])
cosine_sim = cosine_similarity(profile_matrix)

In [44]:
# Create a mapping of customer IDs to their index
customer_id_to_index = {customer_profile['CustomerID'][i]: i for i in range(len(customer_profile))}

In [45]:
# Generate Lookalike Recommendations for the first 20 customers
lookalike_recommendations = {}


In [46]:
for customer_id in customers['CustomerID'][:20]:
    customer_index = customer_id_to_index[customer_id]
    

In [47]:
 similarity_scores = list(enumerate(cosine_sim[customer_index]))

In [48]:
similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

In [49]:
# Get the top 3 most similar customers (excluding the customer themselves)
top_3_similar = [(customers['CustomerID'][score[0]], score[1]) for score in similarity_scores[1:4]]

In [50]:
lookalike_recommendations[customer_id] = top_3_similar

In [51]:
lookalike_df = pd.DataFrame(columns=['cust_id', 'lookalikes'])
lookalike_data = []

In [52]:
lookalike_data = []

In [53]:
for customer_id, recommendations in lookalike_recommendations.items():
    lookalike_data.append({
        'cust_id': customer_id,
        'lookalikes': str(recommendations)
    })

In [54]:
lookalike_df = pd.DataFrame(lookalike_data)

In [55]:
# Save the lookalike recommendations to CSV
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)