<a href="https://colab.research.google.com/github/PavankumarPandya14/ZEOTAP_OA/blob/main/FirstName_LastName_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import davies_bouldin_score, silhouette_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.cluster import KMeans

## Loading Dataset

In [2]:
# Load the dataset
customers_df = pd.read_csv('https://drive.google.com/uc?export=download&id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE')
products_df = pd.read_csv('https://drive.google.com/uc?export=download&id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0')
transactions_df = pd.read_csv('https://drive.google.com/uc?export=download&id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF')

# Checking the shapes of the dataframes
customers_df.shape, products_df.shape, transactions_df.shape

((200, 4), (100, 4), (1000, 7))

## Lookalike Model

In [3]:
# Merge datasets
transactions_products = pd.merge(transactions_df, products_df, on="ProductID", how="left")
customer_transactions = pd.merge(transactions_products, customers_df, on="CustomerID", how="left")

# Create customer profiles
customer_profiles = customer_transactions.groupby("CustomerID").agg({
    'TotalValue': 'sum',  # Total spending by the customer
    'TransactionID': 'count',  # Number of transactions
    'Category': lambda x: x.mode()[0],  # Most common product category
    'Region': 'first',  # Region (same for each customer)
}).rename(columns={
    'TotalValue': 'TotalSpending',
    'TransactionID': 'TransactionCount',
    'Category': 'FavoriteCategory'
}).reset_index()

# Encode categorical features
label_encoders = {}
for col in ['Region', 'FavoriteCategory']:
    label_encoders[col] = LabelEncoder()
    customer_profiles[col] = label_encoders[col].fit_transform(customer_profiles[col])

# Normalize numeric features
scaler = MinMaxScaler()
customer_profiles[['TotalSpending', 'TransactionCount']] = scaler.fit_transform(
    customer_profiles[['TotalSpending', 'TransactionCount']]
)

# Compute similarity matrix
features = ['TotalSpending', 'TransactionCount', 'FavoriteCategory', 'Region']
similarity_matrix = cosine_similarity(customer_profiles[features])

# Function to recommend lookalikes
def recommend_lookalikes(customer_id, top_n=3):
    customer_idx = customer_profiles[customer_profiles['CustomerID'] == customer_id].index[0]
    similarity_scores = similarity_matrix[customer_idx]
    similar_indices = similarity_scores.argsort()[::-1][1:top_n + 1]
    recommendations = customer_profiles.iloc[similar_indices][['CustomerID']].copy()
    recommendations['SimilarityScore'] = similarity_scores[similar_indices]
    return recommendations

# Example usage
test_customer_id = "C0001"  # Replace with the desired customer ID
lookalikes = recommend_lookalikes(test_customer_id)
print(lookalikes)

    CustomerID  SimilarityScore
188      C0190         0.999954
47       C0048         0.999918
179      C0181         0.999630


### Summary of above code

1. **Combine Data**: Merge customer, product, and transaction information into one dataset.

2. **Create Profiles**: Summarize each customer's total spending, number of purchases, favorite product category, and region.

3. **Prepare Data**: Convert text data to numbers and scale numeric features for fair comparison.

4. **Find Similarities**: Calculate how similar each customer is to others based on their profiles.

5. **Recommend**: For a given customer, suggest the top 3 most similar customers.

In [4]:
# Assuming 'lookalikes' DataFrame exists and has columns 'CustomerID' and 'SimilarityScore'
lookalikes_df = pd.DataFrame(columns=['CustomerID', 'SimilarityScore'])

# Loop to create test_customer_id and populate lookalikes_df
for i in range(1, 21):
    # Format the customer ID with leading zeros
    if i < 10:
        test_customer_id = "C000" + str(i)
    else:
        test_customer_id = "C00" + str(i)

    # Create a list of dictionaries for 'CustomerID' and 'SimilarityScore'
    result_list = lookalikes.apply(lambda row: {row['CustomerID']: row['SimilarityScore']}, axis=1).tolist()

    # Append the result to the DataFrame
    lookalikes_df = pd.concat(
        [lookalikes_df, pd.DataFrame({'CustomerID': [test_customer_id], 'SimilarityScore': [result_list]})],
        ignore_index=True
    )

# Display the resulting DataFrame
lookalikes_df.head()

Unnamed: 0,CustomerID,SimilarityScore
0,C0001,"[{'C0190': 0.9999538555540228}, {'C0048': 0.99..."
1,C0002,"[{'C0190': 0.9999538555540228}, {'C0048': 0.99..."
2,C0003,"[{'C0190': 0.9999538555540228}, {'C0048': 0.99..."
3,C0004,"[{'C0190': 0.9999538555540228}, {'C0048': 0.99..."
4,C0005,"[{'C0190': 0.9999538555540228}, {'C0048': 0.99..."


Exporting the code

In [5]:
lookalikes_df.to_csv('lookalikes.csv', index=False)