In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets
customers_url = "https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE"
products_url = "https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0"
transactions_url = "https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF"

customers = pd.read_csv(customers_url)
products = pd.read_csv(products_url)
transactions = pd.read_csv(transactions_url)

# Merge datasets for enriched information
transactions = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Create a pivot table for customer-product interactions
pivot_table = transactions.pivot_table(
    index='CustomerID', 
    columns='ProductID', 
    values='TotalValue', 
    aggfunc='sum', 
    fill_value=0
)

# Standardize the pivot table
scaler = StandardScaler()
pivot_scaled = scaler.fit_transform(pivot_table)

# Compute cosine similarity
similarity_matrix = cosine_similarity(pivot_scaled)
similarity_df = pd.DataFrame(
    similarity_matrix, 
    index=pivot_table.index, 
    columns=pivot_table.index
)

# Function to get top 3 similar customers for each customer
def get_top_3_lookalikes(similarity_df, customer_id):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    return [(idx, score) for idx, score in similar_customers.items()]

# Generate lookalike recommendations for customers C0001 to C0020
lookalike_results = {}
for customer_id in customers['CustomerID'][:20]:
    lookalike_results[customer_id] = get_top_3_lookalikes(similarity_df, customer_id)

# Format results for CSV
lookalike_data = []
for cust_id, lookalikes in lookalike_results.items():
    lookalike_data.append({
        'CustomerID': cust_id,
        'Lookalikes': [
            {
                'SimilarCustomerID': similar_cust,
                'SimilarityScore': round(score, 4)
            } for similar_cust, score in lookalikes
        ]
    })

lookalike_csv_df = pd.DataFrame(lookalike_data)

# Save to CSV
lookalike_csv_filename = "Phani_Manoj_Lookalike.csv"
lookalike_csv_df.to_csv(lookalike_csv_filename, index=False)

print(f"Lookalike CSV file saved as {lookalike_csv_filename}")


Lookalike CSV file saved as Phani_Manoj_Lookalike.csv
