Task 2: Lookalike Model 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
customers = pd.read_csv("D:/Zeotap/Customers.csv")
products = pd.read_csv("D:/Zeotap/Products.csv")
transactions = pd.read_csv("D:/Zeotap/Transactions.csv")

In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Merge datasets for analysis
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Preprocess data: Create a customer-product matrix
customer_product_matrix = merged_data.pivot_table(
    index="CustomerID", columns="ProductID", values="TotalValue", aggfunc="sum", fill_value=0
)

# Normalize the data for similarity calculations
scaler = StandardScaler()
normalized_matrix = scaler.fit_transform(customer_product_matrix)

# Compute cosine similarity
similarity_matrix = cosine_similarity(normalized_matrix)
similarity_df = pd.DataFrame(
    similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index
)

# Generate Lookalike recommendations for the first 20 customers
lookalikes = {}
for customer in customers["CustomerID"][:20]:  # First 20 customers
    if customer in similarity_df.index:
        # Get top 3 similar customers (excluding the customer itself)
        similar_customers = (
            similarity_df.loc[customer]
            .sort_values(ascending=False)
            .iloc[1:4]  # Exclude the customer itself (highest similarity)
        )
        lookalikes[customer] = list(zip(similar_customers.index, similar_customers.values))

# Save the Lookalike recommendations to "Lookalike.csv"
lookalike_data = [
    {"cust_id": customer, "lookalikes": lookalikes[customer]} for customer in lookalikes
]
lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike saved to Lookalike.csv.")

Lookalike saved to Lookalike.csv.
