In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load data
customers_url = "https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE"
products_url = "https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0"
transactions_url = "https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF"

customers = pd.read_csv(customers_url)
products = pd.read_csv(products_url)
transactions = pd.read_csv(transactions_url)

# Step 1: Data Preprocessing
# Aggregate transaction data
transactions_agg = transactions.groupby('CustomerID').agg(
    TotalSpend=('TotalValue', 'sum'),
    AvgTransactionValue=('TotalValue', 'mean'),
    PurchaseFrequency=('TransactionID', 'count')
).reset_index()

# Merge with customer data
customer_profile = pd.merge(customers, transactions_agg, on='CustomerID', how='left')

# Fill missing values (if any) with 0
customer_profile.fillna(0, inplace=True)

# Step 2: Feature Selection
# Select relevant features for similarity calculation
features = ['TotalSpend', 'AvgTransactionValue', 'PurchaseFrequency']

# Encode the 'Region' column using one-hot encoding (if it exists)
if 'Region' in customer_profile.columns:
    region_dummies = pd.get_dummies(customer_profile['Region'], prefix='Region')
    customer_profile = pd.concat([customer_profile, region_dummies], axis=1)
    features.extend(region_dummies.columns)

# Normalize features
scaler = MinMaxScaler()
customer_profile[features] = scaler.fit_transform(customer_profile[features])

# Step 3: Calculate Similarity
# Compute pairwise cosine similarity
similarity_matrix = cosine_similarity(customer_profile[features])

# Create a DataFrame for similarity
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profile['CustomerID'], columns=customer_profile['CustomerID'])

# Step 4: Generate Lookalike Recommendations
lookalike_results = []
for customer_id in customer_profile['CustomerID']:
    # Get similarity scores for the given customer
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)
    # Exclude the customer themselves (similarity with self = 1)
    similar_customers = similar_customers.drop(customer_id).head(3)
    # Append top 3 lookalikes
    lookalike_results.append({
        'CustomerID': customer_id,
        'Lookalike1': similar_customers.index[0],
        'Score1': similar_customers.iloc[0],
        'Lookalike2': similar_customers.index[1],
        'Score2': similar_customers.iloc[1],
        'Lookalike3': similar_customers.index[2],
        'Score3': similar_customers.iloc[2],
    })

# Convert results to a DataFrame
lookalike_df = pd.DataFrame(lookalike_results)

# Filter for customers C0001 to C0020
filtered_lookalikes = lookalike_df[lookalike_df['CustomerID'].str.startswith('C00') & (lookalike_df['CustomerID'].str[1:].astype(int) <= 20)]

# Save to CSV
filtered_lookalikes.to_csv("Kavin_T_Lookalike.csv", index=False)

print("Lookalike recommendations saved to Kavin_T_Lookalike.csv.")


Lookalike recommendations saved to Kavin_T_Lookalike.csv.
