In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Step 1: Load the Data
customers = pd.read_csv("C:/Users/Akshay G S/OneDrive/Desktop/Uvce Documents/Zeotap/Customers.csv")
products = pd.read_csv("C:/Users/Akshay G S/OneDrive/Desktop/Uvce Documents/Zeotap/Products.csv")
transactions = pd.read_csv("C:/Users/Akshay G S/OneDrive/Desktop/Uvce Documents/Zeotap/Transactions.csv")

# Step 2: Merge the datasets
customer_transactions = transactions.merge(customers, on='CustomerID')
customer_transactions = customer_transactions.merge(products, on='ProductID')

# Step 3: Feature Engineering
features = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Region': 'first',  # You can encode this later
    'ProductID': lambda x: x.nunique()  # Number of unique products purchased
}).reset_index()

# Encode categorical variables (e.g., Region)
features = pd.get_dummies(features, columns=['Region'], drop_first=True)

# Step 4: Standardization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features.drop(columns=['CustomerID']))

# Step 5: Calculate Similarity
similarity_matrix = cosine_similarity(scaled_features)

# Step 6: Generate Lookalike Recommendations
lookalike_results = {}

# Get the top 3 lookalikes for the first 20 customers
for i in range(20):  # For CustomerID C0001 to C0020
    similar_indices = np.argsort(similarity_matrix[i])[::-1][1:4]  # Get top 3 similar customers
    similar_customers = features.iloc[similar_indices]['CustomerID'].tolist()
    scores = similarity_matrix[i][similar_indices].tolist()
    lookalike_results[features.iloc[i]['CustomerID']] = list(zip(similar_customers, scores))

# Convert to DataFrame for easier saving
# Flatten the results into a list of dictionaries
flattened_results = []
for customer_id, similar_info in lookalike_results.items():
    for similar_customer_id, score in similar_info:
        flattened_results.append({
            'CustomerID': customer_id,
            'SimilarCustomerID': similar_customer_id,
            'Score': score
        })

# Create DataFrame from flattened results
lookalike_df = pd.DataFrame(flattened_results)

# Step 7: Save the Results
lookalike_df.to_csv('Akshay_GS_Lookalike.csv', index=False)

print("Lookalike recommendations have been saved to 'Akshay_GS_Lookalike.csv'.")

Lookalike recommendations have been saved to 'Akshay_GS_Lookalike.csv'.
