In [11]:
# TASK 2: Lookalike Model
def lookalike_model(customers, transactions):
    # Merge datasets
    merged = pd.merge(transactions, customers, on="CustomerID", how="inner")
    merged = pd.merge(merged, products, on="ProductID", how="inner")

    # Create pivot table for Customer-Product matrix
    customer_product_matrix = merged.pivot_table(
        index="CustomerID", columns="ProductID", values="Quantity", fill_value=0
    )

    # Calculate similarity scores using cosine similarity
    similarity_matrix = cosine_similarity(customer_product_matrix)
    similarity_df = pd.DataFrame(
        similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index
    )

    # Get top 3 lookalikes for first 20 customers
    lookalike_results = {}
    for customer in similarity_df.index[:20]:
        similar_customers = (
            similarity_df[customer]
            .sort_values(ascending=False)
            .iloc[1:4]
            .reset_index()
            .values
        )
        lookalike_results[customer] = [
            {"CustomerID": row[0], "Score": row[1]} for row in similar_customers
        ]

    print("\nLookalike Results:")
    print(lookalike_results)
    return lookalike_results

lookalike_results = lookalike_model(customers, transactions)



Lookalike Results:
{'C0001': [{'CustomerID': 'C0097', 'Score': 0.5477225575051661}, {'CustomerID': 'C0194', 'Score': 0.469668218313862}, {'CustomerID': 'C0199', 'Score': 0.4381780460041329}], 'C0002': [{'CustomerID': 'C0091', 'Score': 0.3801987652174059}, {'CustomerID': 'C0030', 'Score': 0.37282185960072}, {'CustomerID': 'C0071', 'Score': 0.329914439536929}], 'C0003': [{'CustomerID': 'C0134', 'Score': 0.5199469468957452}, {'CustomerID': 'C0181', 'Score': 0.5175973113765044}, {'CustomerID': 'C0144', 'Score': 0.39999999999999997}], 'C0004': [{'CustomerID': 'C0070', 'Score': 0.4988876515698588}, {'CustomerID': 'C0132', 'Score': 0.3843075691322091}, {'CustomerID': 'C0063', 'Score': 0.3360672201667223}], 'C0005': [{'CustomerID': 'C0096', 'Score': 0.6482037235521645}, {'CustomerID': 'C0055', 'Score': 0.5144957554275265}, {'CustomerID': 'C0064', 'Score': 0.3328770246548891}], 'C0006': [{'CustomerID': 'C0058', 'Score': 0.6488856845230502}, {'CustomerID': 'C0040', 'Score': 0.5803810000880093},

In [12]:
from sklearn.metrics.pairwise import cosine_similarity

# Assuming `customers` and `transactions` are already cleaned and loaded

# Merge datasets to create a Customer-Product interaction matrix
merged = pd.merge(transactions, customers, on="CustomerID", how="inner")
merged = pd.merge(merged, products, on="ProductID", how="inner")

# Pivot table for Customer-Product interaction matrix (CustomerID x ProductID)
customer_product_matrix = merged.pivot_table(
    index="CustomerID", columns="ProductID", values="Quantity", fill_value=0
)

# Calculate similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(customer_product_matrix)
similarity_df = pd.DataFrame(
    similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index
)

# Extract top 3 similar customers for CustomerIDs from C0001 to C0020
lookalike_results = {}
for customer in similarity_df.index[:20]:
    similar_customers = (
        similarity_df[customer]
        .sort_values(ascending=False)  # Sort by similarity score
        .iloc[1:4]  # Select top 3 similar customers (excluding self)
    )
    lookalike_results[customer] = [
        {"CustomerID": sim_customer, "Score": score}
        for sim_customer, score in similar_customers.items()
    ]

# Create a structured dataframe for output
rows = []
for customer, lookalikes in lookalike_results.items():
    for lookalike in lookalikes:
        rows.append({
            "CustomerID": customer,
            "SimilarCustomerID": lookalike["CustomerID"],
            "SimilarityScore": lookalike["Score"]
        })

lookalike_df = pd.DataFrame(rows)

# Save as CSV file
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)

print("Lookalike.csv file generated successfully!")


Lookalike.csv file generated successfully!
