In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score, silhouette_score, mean_squared_error

In [5]:
# Load datasets
customers = pd.read_csv('C:\\Users\\hp\\Downloads\\Customers.csv')
products = pd.read_csv('C:\\Users\\hp\\Downloads\\Products.csv')
transactions = pd.read_csv('C:\\Users\\hp\\Downloads\\Transactions.csv')

In [6]:
# Merging datasets
df = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [7]:
# Lookalike Model
features = df.groupby('CustomerID').agg({'TotalValue': 'sum', 'Quantity': 'sum'}).reset_index()
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features[['TotalValue', 'Quantity']])
similarity_matrix = cosine_similarity(features_scaled)

In [8]:
lookalikes = {}
for i, customer in enumerate(features['CustomerID'][:20]):
    similar_customers = np.argsort(similarity_matrix[i])[::-1][1:4]
    lookalikes[customer] = [(features.iloc[j]['CustomerID'], similarity_matrix[i][j]) for j in similar_customers]

lookalikes_df = pd.DataFrame.from_dict(lookalikes, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])
lookalikes_df.to_csv('Lookalike.csv')

In [16]:
silhouette_avg = silhouette_score(features_scaled, KMeans(n_clusters=4, random_state=42).fit_predict(features_scaled))
print(f'Silhouette Score: {silhouette_avg}')

Silhouette Score: 0.4497501059000638
