<a href="https://colab.research.google.com/github/Swagat-modder/Zeotap/blob/main/Swagat_Garadia_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from datetime import datetime

In [2]:
# Reading input data
customers_df = pd.read_csv('/content/Customers.csv')
products_df = pd.read_csv('/content/Products.csv')

In [3]:
# Generating target customer list (C0001-C0020)
target_customers = [f'C{str(i).zfill(4)}' for i in range(1, 21)]

In [4]:
# Preprocessing customer data
# Converting signup date to numeric (days since earliest signup)
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
min_date = customers_df['SignupDate'].min()
customers_df['DaysSinceSignup'] = (customers_df['SignupDate'] - min_date).dt.days

In [8]:
# One-hot encoding region
region_dummies = pd.get_dummies(customers_df['Region'], prefix='Region').astype(int)

In [6]:
# Creating feature matrix
features_df = pd.concat([
    customers_df[['CustomerID', 'DaysSinceSignup']],
    region_dummies], axis=1)

In [11]:
recommendations = {}
# Generating recommendations for each target customer
for target_id in target_customers:
    # Getting target customer features
    target_features = features_df[features_df['CustomerID'] == target_id].iloc[0]

    # Initializing list to store distances
    distances = []

    # Calculating similarity for all other customers
    feature_cols = [col for col in features_df.columns if col != 'CustomerID']
    for _, row in features_df.iterrows():
        if row['CustomerID'] != target_id:
            # Ensuring feature values are numerical
            target_values = target_features[feature_cols].astype(float)
            row_values = row[feature_cols].astype(float)

            # Calculating Euclidean distance
            dist = np.sqrt(sum((target_values - row_values) ** 2))
            # Converting distance to similarity score (inverse of normalized distance)
            similarity = 1 / (1 + dist)
            distances.append((row['CustomerID'], similarity))

    # Sorting by similarity score (descending) and get top 3
    distances.sort(key=lambda x: x[1], reverse=True)
    recommendations[target_id] = distances[:3]


In [12]:
rows = []
for target_id, similar_customers in recommendations.items():
    # Formatting similar customers as string: "cust_id1:score1,cust_id2:score2,cust_id3:score3"
    similar_str = ','.join([f"{cust}:{score:.4f}" for cust, score in similar_customers])
    rows.append({'CustomerID': target_id, 'Lookalikes': similar_str})

In [13]:
# Creating and saving recommendations DataFrame
recommendations_df = pd.DataFrame(rows)
recommendations_df.to_csv('Lookalike.csv', index=False)

In [14]:
# Printing sample results for verification
print("Sample recommendations for first 5 customers:")
for i in range(5):
    target_id = target_customers[i]
    print(f"\nTarget Customer: {target_id}")
    for similar_id, score in recommendations[target_id]:
        print(f"Similar Customer: {similar_id}, Similarity Score: {score:.4f}")

Sample recommendations for first 5 customers:

Target Customer: C0001
Similar Customer: C0112, Similarity Score: 0.2500
Similar Customer: C0025, Similarity Score: 0.1429
Similar Customer: C0071, Similarity Score: 0.1000

Target Customer: C0002
Similar Customer: C0166, Similarity Score: 0.4142
Similar Customer: C0104, Similarity Score: 0.2317
Similar Customer: C0134, Similarity Score: 0.1429

Target Customer: C0003
Similar Customer: C0050, Similarity Score: 0.1614
Similar Customer: C0136, Similarity Score: 0.1228
Similar Customer: C0081, Similarity Score: 0.0764

Target Customer: C0004
Similar Customer: C0173, Similarity Score: 0.2317
Similar Customer: C0179, Similarity Score: 0.1228
Similar Customer: C0108, Similarity Score: 0.0833

Target Customer: C0005
Similar Customer: C0073, Similarity Score: 0.1614
Similar Customer: C0135, Similarity Score: 0.1096
Similar Customer: C0159, Similarity Score: 0.1000
