In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
#load dataset
customers_df = pd.read_csv(r"C:\Users\ASUS\OneDrive\Desktop\data science assignment\Customers.csv")
transactions_df = pd.read_csv(r"C:\Users\ASUS\OneDrive\Desktop\data science assignment\Products.csv")
products_df = pd.read_csv(r"C:\Users\ASUS\OneDrive\Desktop\data science assignment\Transactions.csv")

In [3]:
# Data Preparation
# Merge transactions with product and customer data
transactions_df = transactions_df.merge(products_df, on='ProductID', how='left')
transactions_df = transactions_df.merge(customers_df, on='CustomerID', how='left')

In [7]:
#Feature engineering for customer profiles
customer_features = transactions_df.groupby('CustomerID').agg(
    total_transactions=('TransactionID', 'count'),
    unique_products=('ProductID', 'nunique')
).reset_index()

In [8]:
# Merge with customer demographic features
customer_profiles = customers_df.merge(customer_features, on='CustomerID', how='left')
customer_profiles = customer_profiles.fillna(0)  # Fill missing values with 0

In [9]:
# Standardize features for similarity calculation
scaler = StandardScaler()
feature_columns = ['total_transactions', 'unique_products']
customer_profiles[feature_columns] = scaler.fit_transform(customer_profiles[feature_columns])


In [10]:
# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_profiles[feature_columns])
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])


In [11]:
# Generate Lookalike Recommendations
lookalike_results = {}
for customer_id in customer_profiles['CustomerID'][:20]:  # For customers C0001-C0020
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]  # Top 3 excluding self
    lookalike_results[customer_id] = list(zip(similar_customers.index, similar_customers.values))


In [12]:
# Create Lookalike CSV
lookalike_output = []
for cust_id, recommendations in lookalike_results.items():
    for rec_id, score in recommendations:
        lookalike_output.append({'cust_id': cust_id, 'rec_id': rec_id, 'score': score})

lookalike_df = pd.DataFrame(lookalike_output)
lookalike_csv_path = 'Lookalike.csv'
lookalike_df.to_csv(lookalike_csv_path, index=False)
print(f"Lookalike recommendations saved to {lookalike_csv_path}")


Lookalike recommendations saved to Lookalike.csv


In [13]:
# Explanation of model development and logic
print("\nModel Explanation:")
print("1. Customer profiles were created using transaction and product data to capture behavioral features.")
print("2. Features were standardized to ensure equal weight in similarity calculations.")
print("3. Cosine similarity was used to compute pairwise customer similarities.")
print("4. For each target customer, the top 3 most similar customers were identified based on similarity scores.")



Model Explanation:
1. Customer profiles were created using transaction and product data to capture behavioral features.
2. Features were standardized to ensure equal weight in similarity calculations.
3. Cosine similarity was used to compute pairwise customer similarities.
4. For each target customer, the top 3 most similar customers were identified based on similarity scores.
