<a href="https://colab.research.google.com/github/Habeebhb/Habeeb_DS_Zeotap/blob/main/Habeeb_Rahuman_lookalikeModel_Zeotap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


Lookalike Model;

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [2]:
customers_file = 'Customers.csv'
products_file = 'Products.csv'
transactions_file = 'Transactions.csv'


In [3]:
customers_df = pd.read_csv(customers_file)
products_df = pd.read_csv(products_file)
transactions_df = pd.read_csv(transactions_file)
#reading the files

In [4]:
transactions_customers = pd.merge(transactions_df, customers_df, on="CustomerID", how="left")
merged_df = pd.merge(transactions_customers, products_df, on="ProductID", how="left")
#merging datasets

In [5]:
#Create customer profiles
def create_customer_profiles(data):
    profiles = (
        data.groupby("CustomerID")
        .agg(
            {
                "TotalValue": "sum",  # Total transaction value
                "Quantity": "sum",  # Total quantity purchased
                "ProductID": "nunique",  # Number of unique products purchased
                "Category": lambda x: x.mode()[0] if len(x) > 0 else None,  # Most common category
                "Region": "first",  # Customer's region
            }
        )
        .reset_index()
    )
    return profiles

customer_profiles = create_customer_profiles(merged_df)


In [6]:
# Encoding
customer_profiles_encoded = pd.get_dummies(customer_profiles, columns=["Category", "Region"], drop_first=True)



In [7]:
# Normalize numerical columns
scaler = StandardScaler()
numerical_cols = ["TotalValue", "Quantity", "ProductID"]
customer_profiles_encoded[numerical_cols] = scaler.fit_transform(customer_profiles_encoded[numerical_cols])



In [8]:
# Calculate similarity matrix
def calculate_similarity_matrix(data):
    return cosine_similarity(data)

customer_ids = customer_profiles_encoded["CustomerID"]
similarity_matrix = calculate_similarity_matrix(customer_profiles_encoded.drop(columns=["CustomerID"]))



In [9]:
# Recommend top 3 similar customers
def get_top_3_similar(customers, sim_matrix, num_customers=20):
    lookalike_map = {}
    for i in range(num_customers):  # First 20 customers
        customer_id = customers.iloc[i]
        similarities = list(enumerate(sim_matrix[i]))
        similarities = sorted(similarities, key=lambda x: x[1], reverse=True)  # Sort by similarity score
        top_3 = [(customers.iloc[j], score) for j, score in similarities[1:4]]  # Exclude self
        lookalike_map[customer_id] = top_3
    return lookalike_map


In [10]:
# Get lookalikes for the first 20 customers
lookalike_map = get_top_3_similar(customer_ids, similarity_matrix)


In [11]:
# Generate Lookalike.csv
def generate_lookalike_csv(lookalike_map, output_file="Lookalike.csv"):
    lookalike_data = []
    for cust_id, lookalikes in lookalike_map.items():
        for lookalike_id, score in lookalikes:
            lookalike_data.append({"CustomerID": cust_id, "LookalikeID": lookalike_id, "Score": score})
    lookalike_df = pd.DataFrame(lookalike_data)
    lookalike_df.to_csv(output_file, index=False)
    print(f"{output_file} generated successfully!")



In [12]:
# Generate the CSV and display the first 20 results
generate_lookalike_csv(lookalike_map)


Lookalike.csv generated successfully!


In [13]:
# Print lookalike recommendations for the first 20 customers
for customer_id, recommendations in lookalike_map.items():
    print(f"Customer {customer_id}:")
    for lookalike_id, score in recommendations:
        print(f"  Lookalike: {lookalike_id}, Score: {score:.4f}")


Customer C0001:
  Lookalike: C0048, Score: 0.9820
  Lookalike: C0190, Score: 0.9646
  Lookalike: C0184, Score: 0.9453
Customer C0002:
  Lookalike: C0088, Score: 0.9418
  Lookalike: C0077, Score: 0.8708
  Lookalike: C0083, Score: 0.8562
Customer C0003:
  Lookalike: C0031, Score: 0.8989
  Lookalike: C0076, Score: 0.8951
  Lookalike: C0052, Score: 0.8941
Customer C0004:
  Lookalike: C0087, Score: 0.9496
  Lookalike: C0169, Score: 0.9454
  Lookalike: C0155, Score: 0.9400
Customer C0005:
  Lookalike: C0186, Score: 0.9975
  Lookalike: C0007, Score: 0.9866
  Lookalike: C0140, Score: 0.9859
Customer C0006:
  Lookalike: C0168, Score: 0.9531
  Lookalike: C0187, Score: 0.8966
  Lookalike: C0011, Score: 0.8729
Customer C0007:
  Lookalike: C0005, Score: 0.9866
  Lookalike: C0115, Score: 0.9825
  Lookalike: C0140, Score: 0.9770
Customer C0008:
  Lookalike: C0065, Score: 0.8779
  Lookalike: C0090, Score: 0.8772
  Lookalike: C0139, Score: 0.8510
Customer C0009:
  Lookalike: C0198, Score: 0.9877
  Look