In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')

merged_df = pd.merge(transactions_df, customers_df, on="CustomerID", how="inner")

numerical_features = ['TotalValue', 'Quantity', 'Price']

merged_df[numerical_features] = merged_df[numerical_features].fillna(0)

customer_profiles = merged_df.groupby('CustomerID')[numerical_features].sum()

scaler = StandardScaler()
customer_profiles[numerical_features] = scaler.fit_transform(customer_profiles[numerical_features])

similarity_matrix = cosine_similarity(customer_profiles[numerical_features])

similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles.index, columns=customer_profiles.index)

lookalike_results = {}

for customer in customer_profiles.index[:20]:  
    similar_customers = similarity_df.loc[customer].sort_values(ascending=False).iloc[1:4]
    lookalike_results[customer] = [(similar_customer, round(score, 4)) for similar_customer, score in zip(similar_customers.index, similar_customers.values)]

lookalike_df = pd.DataFrame(list(lookalike_results.items()), columns=['CustomerID', 'Lookalikes'])

lookalike_df['Lookalikes'] = lookalike_df['Lookalikes'].apply(lambda x: str([(cust, float(score)) for cust, score in x]))

lookalike_df.to_csv('Sujal_Chauhan_Lookalike.csv', index=False)

print("Lookalike model has been generated and saved to 'Sujal_Chauhan_Lookalike.csv'.")


Lookalike model has been generated and saved to 'Sujal_Chauhan_Lookalike.csv'.
