In [1]:
from google.colab import files
uploaded = files.upload()

Saving Customers.csv to Customers.csv
Saving Products.csv to Products.csv
Saving Transactions.csv to Transactions.csv


In [2]:
# Importing necessary libraries
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets for customer-product interaction
merged_data = transactions.merge(customers, on='CustomerID', how='left')
merged_data = merged_data.merge(products, on='ProductID', how='left')

# Feature Engineering
# Create a pivot table for customer-product interactions
customer_product_matrix = merged_data.pivot_table(
    index='CustomerID', columns='ProductID', values='Quantity', fill_value=0
)

# Standardize the data
scaler = StandardScaler()
scaled_matrix = scaler.fit_transform(customer_product_matrix)

# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_matrix)
similarity_df = pd.DataFrame(similarity_matrix,
                             index=customer_product_matrix.index,
                             columns=customer_product_matrix.index)

# Get top 3 similar customers for the first 20 customers
lookalike_map = {}
for cust_id in customers['CustomerID'][:20]:
    if cust_id in similarity_df.index:
        # Get similarity scores for the current customer
        scores = similarity_df.loc[cust_id]
        # Sort scores in descending order, exclude the customer themselves
        top_3 = scores.nlargest(4).iloc[1:4]
        # Store the top 3 customers and their scores
        lookalike_map[cust_id] = list(zip(top_3.index, top_3.values))

# Create Lookalike.csv
lookalike_data = []
for cust_id, similar_customers in lookalike_map.items():
    for sim_cust_id, score in similar_customers:
        lookalike_data.append({'cust_id': cust_id, 'similar_cust_id': sim_cust_id, 'score': score})

lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv('Lookalike.csv', index=False)

# Model Explanation (in comments):
# 1. Feature matrix: Used customer-product interactions (quantities purchased).
# 2. Scaling: Standardized the data for fair comparison.
# 3. Similarity: Computed cosine similarity to find customers with similar purchasing patterns.
# 4. Recommendations: For each customer, retrieved the top 3 most similar customers by similarity score.

# Deliverables
print("Lookalike model completed. Check 'Lookalike.csv' for results.")


Lookalike model completed. Check 'Lookalike.csv' for results.


In [3]:
# Importing necessary libraries
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [4]:
# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets for customer-product interaction
merged_data = transactions.merge(customers, on='CustomerID', how='left')
merged_data = merged_data.merge(products, on='ProductID', how='left')

# Feature Engineering
customer_product_matrix = merged_data.pivot_table(
    index='CustomerID', columns='ProductID', values='Quantity', fill_value=0
)

In [7]:
scaler = StandardScaler()
scaled_matrix = scaler.fit_transform(customer_product_matrix)

similarity_matrix = cosine_similarity(scaled_matrix)
similarity_df = pd.DataFrame(similarity_matrix,
                             index=customer_product_matrix.index,
                             columns=customer_product_matrix.index)

In [8]:
lookalike_map = {}
for cust_id in customers['CustomerID'][:20]:
    if cust_id in similarity_df.index:
        scores = similarity_df.loc[cust_id]
        top_3 = scores.nlargest(4).iloc[1:4]
        lookalike_map[cust_id] = [(sim_cust_id, round(score, 4)) for sim_cust_id, score in zip(top_3.index, top_3.values)]

lookalike_csv_data = []
for cust_id, similar_customers in lookalike_map.items():
    lookalike_csv_data.append({"cust_id": cust_id, "lookalikes": similar_customers})

lookalike_df = pd.DataFrame(lookalike_csv_data)
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model completed. Check 'Lookalike.csv' for results.")


Lookalike model completed. Check 'Lookalike.csv' for results.
