<a href="https://colab.research.google.com/github/RAM-1166/Zeotap_assignment/blob/main/Peteti_Ram_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Load datasets
customers = pd.read_csv("/content/drive/MyDrive/Zeotap/Customers.csv")
products = pd.read_csv("/content/drive/MyDrive/Zeotap/Products.csv")
transactions = pd.read_csv("/content/drive/MyDrive/Zeotap/Transactions.csv")

# Merge datasets
merged_data = transactions.merge(products, on='ProductID', how='left').merge(customers, on='CustomerID', how='left')

# Aggregate customer-level features
customer_features = merged_data.groupby('CustomerID').agg(
    TotalSpent=('TotalValue', 'sum'),
    TotalTransactions=('TransactionID', 'count'),
    AvgTransactionValue=('TotalValue', 'mean'),
    TotalQuantity=('Quantity', 'sum'),
    UniqueCategories=('Category', 'nunique'),
    Region=('Region', 'first')
).reset_index()

# One-hot encode 'Region' to include in feature vector
customer_features = pd.get_dummies(customer_features, columns=['Region'])

# Normalize numeric features for similarity computation
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))

# Compute cosine similarity
similarity_matrix = cosine_similarity(normalized_features)

# Generate recommendations for the first 20 customers
lookalike_map = {}
customer_ids = customer_features['CustomerID'].tolist()

for idx, customer_id in enumerate(customer_ids[:20]):  # First 20 customers (C0001 - C0020)
    # Get similarity scores for the current customer
    similarity_scores = list(enumerate(similarity_matrix[idx]))

    # Exclude the customer themselves and sort by similarity score
    sorted_scores = sorted(
        [(customer_ids[i], score) for i, score in similarity_scores if customer_ids[i] != customer_id],
        key=lambda x: x[1],
        reverse=True
    )

    # Take the top 3 similar customers
    top_3 = sorted_scores[:3]
    lookalike_map[customer_id] = top_3

# Save the lookalike map to a CSV file
lookalike_df = pd.DataFrame([
    {"cust_id": cust_id, "lookalikes": str(lookalikes)}
    for cust_id, lookalikes in lookalike_map.items()
])
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike recommendations saved to Lookalike.csv!")


Lookalike recommendations saved to Lookalike.csv!
