In [1]:
#Import necessary libraries
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

In [2]:
#Load the given datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [5]:
# Create customer profile based on transaction data (Total value spent, number of purchases, etc.)
customer_profile = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count'
}).reset_index()

In [7]:
# Merge customer profile with product data for more feature richness (only numeric columns used for similarity)
customer_profile = customer_profile.merge(customers[['CustomerID', 'Region']], on='CustomerID', how='left')

In [9]:
# Exclude non-numeric columns like 'Region' for the cosine similarity calculation
numeric_data = customer_profile[['TotalValue', 'TransactionID']]


In [11]:
# Calculate cosine similarity matrix between customers based on their total value and transaction count
similarity_matrix = cosine_similarity(numeric_data)

In [13]:
# Create a function to recommend lookalikes
def recommend_lookalikes(customer_id, similarity_matrix, top_n=3):
    # Get the index of the customer in the profile dataframe
    customer_idx = customer_profile[customer_profile['CustomerID'] == customer_id].index[0]
    similarity_scores = similarity_matrix[customer_idx]
    
    # Get top N similar customers (excluding the customer itself)
    similar_customers = similarity_scores.argsort()[-top_n-1:-1][::-1]  # Exclude self from recommendations
    similar_customer_ids = customer_profile.iloc[similar_customers]['CustomerID'].values
    scores = similarity_scores[similar_customers]
    
    return list(zip(similar_customer_ids, scores))

In [15]:
# Example: Recommend lookalikes for customers 1 to 20
lookalike_recommendations = {}
for customer_id in customer_profile['CustomerID'][:20]:  # For first 20 customers
    lookalike_recommendations[customer_id] = recommend_lookalikes(customer_id, similarity_matrix)

In [17]:
# Create a DataFrame to save the recommendations
lookalike_data = []
for customer_id, recommendations in lookalike_recommendations.items():
    for similar_customer, score in recommendations:
        lookalike_data.append([customer_id, similar_customer, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'Lookalike_ID', 'Similarity_Score'])

In [23]:
# Save lookalike recommendations to CSV
lookalike_df.to_csv('Gudibandi_SaiChandana_Lookalike.csv', index=False, header=True)

print("Lookalike recommendations have been saved to Gudibandi_SaiChandana_Lookalike.csv")

Lookalike recommendations have been saved to Gudibandi_SaiChandana_Lookalike.csv


In [None]:
lookalike_df.to_csv('/path/to/your/directory/Gudibandi_SaiChandana_Lookalike.csv', index=False)