## IMPORTING LIBRARIES

In [1]:
import warnings
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

## Loading Data With Pandas


In [2]:
customers_df    = pd.read_csv(r"C:\Users\Pruthviraj\Desktop\Zeotap\Customers.csv")

products_df     = pd.read_csv(r"C:\Users\Pruthviraj\Desktop\Zeotap\Products.csv")

transactions_df =  pd.read_csv(r"C:\Users\Pruthviraj\Desktop\Zeotap\Transactions.csv")

In [3]:
customers_df.head(3)

Unnamed: 0,CustomerID,CustomerName,Region,SignupDate
0,C0001,Lawrence Carroll,South America,2022-07-10
1,C0002,Elizabeth Lutz,Asia,2022-02-13
2,C0003,Michael Rivera,South America,2024-03-07


In [4]:
products_df.head(3)

Unnamed: 0,ProductID,ProductName,Category,Price
0,P001,ActiveWear Biography,Books,169.3
1,P002,ActiveWear Smartwatch,Electronics,346.3
2,P003,ComfortLiving Biography,Books,44.12


In [5]:
transactions_df.head()

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68


## Data Preprocessing

**Merge Customer and Product Information:**

First, combine the customers_df, products_df, and transactions_df into a single data frame so that each transaction contains both customer and product details.

In [6]:
transactions_with_customer = pd.merge(transactions_df, customers_df, on='CustomerID', how='left')
transactions_with_all_info = pd.merge(transactions_with_customer, products_df, on='ProductID', how='left')

## Feature Engineering:

Create customer profiles based on their transaction history. For example:

Total spend per customer.

Most purchased product category.

Frequency of purchases.

Recency of last purchase.

In [11]:
customer_features = transactions_with_all_info.groupby('CustomerID').agg(
    total_spend=('TotalValue', 'sum'),
    purchase_frequency=('TransactionID', 'count'),
    recency=('TransactionDate', 'max')
).reset_index()



# Convert recency to days since last purchase
customer_features['recency'] = (pd.to_datetime('today') - pd.to_datetime(customer_features['recency'])).dt.days


## Normalize the features

In [14]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
customer_features[['total_spend', 'purchase_frequency', 'recency']] = scaler.fit_transform(customer_features[['total_spend', 'purchase_frequency', 'recency']])

## Similarity Calculation on Customer

**Define Similarity Metrics:**


Use cosine similarity or Euclidean distance to calculate the similarity between customers based on their profiles (total spend, purchase frequency, recency).

In [15]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate similarity between customers
similarity_matrix = cosine_similarity(customer_features[['total_spend', 'purchase_frequency', 'recency']])


## Generate Similarity Scores:

 each customer (C0001 to C0020), find the top 3 most similar customers based on their similarity score

In [16]:
import numpy as np

top_3_similar_customers = {}

for idx, customer_id in enumerate(customer_features['CustomerID'].head(20)):
    similarity_scores = similarity_matrix[idx]
    similar_customers = np.argsort(similarity_scores)[::-1][1:4]  # Here I Skip the customer itself (index 0) for example i-0 not calculated j-0 so
    top_3_similar_customers[customer_id] = [
        (customer_features['CustomerID'].iloc[sim_idx], similarity_scores[sim_idx])
        for sim_idx in similar_customers
    ]


## Create the Lookalike.csv

In [18]:
lookalike_data = []

for customer_id, similar_customers in top_3_similar_customers.items():
    for sim_customer_id, score in similar_customers:
        lookalike_data.append([customer_id, sim_customer_id, score])

lookalike_df = pd.DataFrame(lookalike_data, columns=['CustomerID', 'LookalikeCustomerID', 'SimilarityScore'])

# Save to CSV
lookalike_df.to_csv('Pruthviraj_Desai_Lookalike.csv', index=False)
