In [8]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the data
customers_df = pd.read_csv("Customers.csv")
transactions_df = pd.read_csv("Transactions.csv")

# 1. Merge customer data and transaction history
# Group the transactions by CustomerID and aggregate the necessary metrics.
customer_transactions = transactions_df.groupby('CustomerID').agg({
    'Quantity': 'sum',  # total quantity purchased
    'TotalValue': 'sum',  # total spend
}).reset_index()

# Merge with customer data
customer_profile = pd.merge(customers_df[['CustomerID', 'Region']], customer_transactions, on='CustomerID', how='left')

# 2. Create a customer feature matrix based on 'Region' and transaction data
# For region-based similarity, we'll encode 'Region' using one-hot encoding.
region_dummies = pd.get_dummies(customer_profile['Region'], prefix='Region')

# Combine customer profile with transaction data
features_df = pd.concat([customer_profile[['CustomerID', 'Quantity', 'TotalValue']], region_dummies], axis=1)

# 3. Handle missing values by filling NaNs with 0
features_df = features_df.fillna(0)

# 4. Normalize the numerical features (Quantity, TotalValue)
scaler = StandardScaler()
features_df[['Quantity', 'TotalValue']] = scaler.fit_transform(features_df[['Quantity', 'TotalValue']])

# 5. Calculate the cosine similarity between all customers
cosine_sim = cosine_similarity(features_df.drop(columns=['CustomerID']))

# 6. Generate the lookalike recommendations for customers with IDs C0001 to C0020
lookalike_recommendations = {}
for idx, row in features_df.iterrows():
    customer_id = row['CustomerID']
    
    # Only consider the first 20 customers
    if customer_id >= 'C0001' and customer_id <= 'C0020':
        similarity_scores = cosine_sim[idx]
        
        # Exclude the current customer from their own similarity score
        similarity_scores[idx] = -1  # Set their own similarity score to a very low value

        # Get top 3 most similar customers
        top_3_indices = np.argsort(similarity_scores)[-3:][::-1]
        top_3_customers = [(features_df.iloc[i]['CustomerID'], similarity_scores[i]) for i in top_3_indices]
        
        lookalike_recommendations[customer_id] = top_3_customers

# 7. Save the recommendations to a CSV file in the required map format
lookalike_data = []
for customer_id, recommendations in lookalike_recommendations.items():
    for recommended_customer, score in recommendations:
        lookalike_data.append([customer_id, recommended_customer, score])

# Convert to DataFrame and save as 'Lookalike.csv'
lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'RecommendedCustomerID', 'SimilarityScore'])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to 'Lookalike.csv'.")


Lookalike recommendations saved to 'Lookalike.csv'.
