In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

customers = pd.read_csv("D:/JN/Customers.csv")
transactions = pd.read_csv("D:/JN/Transactions.csv")



In [2]:
customers.head()


Unnamed: 0,CustomerID,CustomerName,Region,SignupDate
0,C0001,Lawrence Carroll,South America,2022-07-10
1,C0002,Elizabeth Lutz,Asia,2022-02-13
2,C0003,Michael Rivera,South America,2024-03-07
3,C0004,Kathleen Rodriguez,South America,2022-10-09
4,C0005,Laura Weber,Asia,2022-08-15


In [3]:
transactions.head()


Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68


In [None]:
merged_data = pd.merge(transactions, customers, on='CustomerID')

customer_features = merged_data.groupby('CustomerID').agg({
    'ProductID': 'count',
    'TotalValue': 'sum',
    'Region': 'first'
}).reset_index()

customer_features = pd.get_dummies(customer_features, columns=['Region'])

scaler = StandardScaler()
numerical_features = customer_features[['ProductID', 'TotalValue']]
scaled_features = scaler.fit_transform(numerical_features)

final_features = np.hstack((scaled_features, customer_features.drop(columns=['CustomerID', 'ProductID', 'TotalValue']).values))

similarity_matrix = cosine_similarity(final_features)

recommendations = {}

for i in range(len(customer_features)):
    similar_indices = np.argsort(similarity_matrix[i])[::-1][1:4]
    similar_customers = customer_features.iloc[similar_indices]['CustomerID'].values
    scores = similarity_matrix[i][similar_indices]
    recommendations[customer_features.iloc[i]['CustomerID']] = list(zip(similar_customers, scores))

lookalike_df = pd.DataFrame.from_dict(recommendations, orient='index', columns=['Lookalike Customers'])
lookalike_df.reset_index(inplace=True)
lookalike_df.columns = ['CustomerID', 'Lookalike Customers']

lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike recommendations saved to Lookalike.csv")

