In [11]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [12]:
# Load datasets
customers_data = pd.read_csv("/Users/narasimha/Downloads/zeotap intern/Customers.csv")
products_data = pd.read_csv("/Users/narasimha/Downloads/zeotap intern/Products.csv")
transactions_data = pd.read_csv("/Users/narasimha/Downloads/zeotap intern/Transactions.csv")

**Task 2: Lookalike Model**


In [13]:
# Create Customer-Product interaction matrix
customer_product_matrix = transactions_data.pivot_table(
    index='CustomerID', columns='ProductID', values='Quantity', fill_value=0
)

# Add Customer Features 
customers_data['SignupDate'] = pd.to_datetime(customers_data['SignupDate'])
customers_data['SignupYear'] = customers_data['SignupDate'].dt.year  # Extract year for numerical encoding
customer_features = customers_data.set_index('CustomerID')[['Region', 'SignupYear']]

# Encode Region using one-hot encoding
customer_features = pd.get_dummies(customer_features, columns=['Region'])

# Combine Customer Features with Transaction Data
combined_matrix = customer_product_matrix.join(customer_features, how='left').fillna(0)

# Normalize the data to avoid dominance of large values
scaler = StandardScaler()
scaled_matrix = scaler.fit_transform(combined_matrix)

# Compute Cosine Similarity
similarity_matrix = cosine_similarity(scaled_matrix)

# Converting similarity matrix to DataFrame
similarity_df = pd.DataFrame(similarity_matrix, 
                             index=combined_matrix.index, 
                             columns=combined_matrix.index)

# Generate Lookalike Recommendations
lookalikes = {}
for customer in similarity_df.index[:20]:  
    # Sort customers by similarity score in descending order, and excluding the customer itself
    similar_customers = similarity_df[customer].sort_values(ascending=False)[1:4]  
    lookalikes[customer] = [{"LookalikeID": k, "Score": v} for k, v in similar_customers.items()]

# Save Lookalikes to CSV
lookalike_df = pd.DataFrame([{
    "CustomerID": cust, 
    "Lookalikes": lookalikes[cust]
} for cust in lookalikes])

# Save the lookalikes dataframe to a CSV file
lookalike_df.to_csv("Narasimha_Rao_chunchu_Lookalike.csv", index=False)

#Top 3 Lookalike Recommendations for First 20 Customers
print("\nTop 3 Lookalike Recommendations for the First 20 Customers:")
for customer in lookalikes:
    print(f"\nCustomerID: {customer}")
    # Display the top 3 lookalikes with their similarity scores
    for i, lookalike in enumerate(lookalikes[customer], 1):
        print(f"  {i}. LookalikeID: {lookalike['LookalikeID']}, Score: {lookalike['Score']}")



Top 3 Lookalike Recommendations for the First 20 Customers:

CustomerID: C0001
  1. LookalikeID: C0104, Score: 0.3779815507779106
  2. LookalikeID: C0194, Score: 0.3364208801241058
  3. LookalikeID: C0199, Score: 0.3156066993077916

CustomerID: C0002
  1. LookalikeID: C0091, Score: 0.4062800819383005
  2. LookalikeID: C0030, Score: 0.3563152474090616
  3. LookalikeID: C0173, Score: 0.3354325287970556

CustomerID: C0003
  1. LookalikeID: C0181, Score: 0.48808264676424756
  2. LookalikeID: C0134, Score: 0.4266236593109025
  3. LookalikeID: C0144, Score: 0.3835356381677593

CustomerID: C0004
  1. LookalikeID: C0070, Score: 0.3711522179070912
  2. LookalikeID: C0175, Score: 0.3005691294307162
  3. LookalikeID: C0025, Score: 0.27324142712509425

CustomerID: C0005
  1. LookalikeID: C0096, Score: 0.45566086438131376
  2. LookalikeID: C0023, Score: 0.4398322886912092
  3. LookalikeID: C0055, Score: 0.31121388711556613

CustomerID: C0006
  1. LookalikeID: C0040, Score: 0.3738840310844944
  2. 