In [2]:
# Lookalike Model Development
# Importing necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the datasets
customers = pd.read_csv("/content/Customers.csv")
products = pd.read_csv("/content/Products.csv")
transactions = pd.read_csv("/content/Transactions.csv")

# Merging datasets
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Preparing customer profiles
## Aggregate transaction data for each customer
customer_profiles = merged_data.groupby('CustomerID').agg({
    'ProductName': lambda x: ' '.join(x),  # Combine product names as a single string
    'TotalValue': 'sum'  # Total spend per customer
}).reset_index()

# Applying TF-IDF on product names
vectorizer = TfidfVectorizer()
product_tfidf_matrix = vectorizer.fit_transform(customer_profiles['ProductName'])

# Calculating similarity scores using cosine similarity
similarity_matrix = cosine_similarity(product_tfidf_matrix)

# Recommending top 3 similar customers for CustomerID C0001 - C0020
customer_ids = customer_profiles['CustomerID'][:20]
lookalike_results = {}

for idx, customer_id in enumerate(customer_ids):
    # Get similarity scores for the current customer
    similarity_scores = list(enumerate(similarity_matrix[idx]))

    # Sort scores in descending order and exclude self-comparison
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]

    # Retrieve top 3 similar customers
    top_3_lookalikes = [(customer_profiles.iloc[i[0]]['CustomerID'], i[1]) for i in sorted_scores]

    # Add to results
    lookalike_results[customer_id] = top_3_lookalikes

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame([
    {
        'CustomerID': customer_id,
        'Lookalikes': str(lookalike_results[customer_id])
    }
    for customer_id in lookalike_results
])
lookalike_df.to_csv("Aanand_Surana_Lookalike.csv", index=False)

# Display results
print("Lookalike Recommendations:\n", lookalike_df.head())


Lookalike Recommendations:
   CustomerID                                         Lookalikes
0      C0001  [('C0197', 0.8173310844459089), ('C0026', 0.73...
1      C0002  [('C0133', 0.8837932938918894), ('C0173', 0.76...
2      C0003  [('C0164', 0.7229023422425487), ('C0085', 0.65...
3      C0004  [('C0118', 0.740279454546346), ('C0008', 0.718...
4      C0005  [('C0128', 0.7597061203041797), ('C0096', 0.74...
