In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv("final_data.csv")
df.head()

Unnamed: 0,ProductID,ProductName,Category,CustomerID,CustomerName,Region,SignupDate,TransactionID,TransactionDate,Quantity,TotalValue,Price,CalculatedValue,Month,Year,Month_Year,Day,Day_Name
0,P001,ActiveWear Biography,Books,C0017,Jennifer King,Europe,2023-12-05,T00758,2024-05-28 14:47:15,3,507.9,169.3,507.9,5,2024,2024-05,2,Tuesday
1,P001,ActiveWear Biography,Books,C0019,Brandon Rodriguez,Europe,2023-01-12,T00088,2024-01-30 17:23:03,2,338.6,169.3,338.6,1,2024,2024-01,2,Tuesday
2,P001,ActiveWear Biography,Books,C0024,Michele Cooley,North America,2024-02-05,T00314,2024-09-24 17:15:16,4,677.2,169.3,677.2,9,2024,2024-09,2,Tuesday
3,P001,ActiveWear Biography,Books,C0036,Brian Aguilar DDS,North America,2024-07-06,T00433,2024-05-05 05:01:18,2,338.6,169.3,338.6,5,2024,2024-05,0,Sunday
4,P001,ActiveWear Biography,Books,C0045,Michael Williams,Asia,2022-02-25,T00732,2024-08-13 10:42:48,2,338.6,169.3,338.6,8,2024,2024-08,2,Tuesday


In [3]:
df['SignupDate'] = pd.to_datetime(df['SignupDate'])
df['SignupDays'] = (pd.to_datetime('today') - df['SignupDate']).dt.days


In [4]:
df['SignupDate'] = pd.to_datetime(df['SignupDate'])
df['SignupDays'] = (pd.to_datetime('today') - df['SignupDate']).dt.days

le_region = LabelEncoder()
df['Region'] = le_region.fit_transform(df['Region'])

le_category = LabelEncoder()
df['Category'] = le_category.fit_transform(df['Category'])

features = ['Region', 'TotalValue', 'Quantity', 'Category', 'SignupDays']
X = df[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
# Fit Nearest Neighbors model
knn = NearestNeighbors(n_neighbors=4, metric='euclidean') 
knn.fit(X_scaled)

# Function to get similar customers
def find_similar_customers(customer_id, top_n=3):
    customer_index = df[df['CustomerID'] == customer_id].index[0]
    distances, indices = knn.kneighbors([X_scaled[customer_index]])

    similar_customers = []
    for i in range(1, top_n + 1):
        similar_customers.append((df.iloc[indices[0][i]]['CustomerID'], distances[0][i]))

    return similar_customers


In [6]:
selected_customers = df[df['CustomerID'].between('C0001', 'C0020')]

lookalike_map = {}

for customer_id in selected_customers['CustomerID']:
    similar_customers = find_similar_customers(customer_id)
    lookalike_map[customer_id] = similar_customers

lookalike_df = pd.DataFrame([(k, v) for k, v in lookalike_map.items()], columns=['CustomerID', 'Lookalikes'])

# Save as CSV
lookalike_df.to_csv("Lookalike.csv", index=False)
