In [23]:
import pandas as pd
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')
products = pd.read_csv('Products.csv')

In [24]:
merged_data = transactions.merge(customers, on='CustomerID')

customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total revenue by customer with customerid
    'Quantity': 'sum',     # Total quantity by customer 
    'Region': 'first',     # Customer region
    'ProductID': lambda x: ' '.join(x)  
}).reset_index()

In [25]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()# Convert categorical data to numerical 
product_matrix = tfidf.fit_transform(customer_features['ProductID'])
numerical_features = customer_features[['TotalValue', 'Quantity']]# Combine numerical features

In [29]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_numerical_features = scaler.fit_transform(numerical_features)
final_features = pd.concat([pd.DataFrame(scaled_numerical_features), pd.DataFrame(product_matrix.toarray())], axis=1)# Combine all features
similarity_matrix = cosine_similarity(final_features)# Calculate similarity scores

In [30]:

lookalike_list = []# Create  list
for i in range(20):  # For customers
    customer_id = customer_features['CustomerID'].iloc[i]
    similarity_scores = similarity_matrix[i]  
    similar_indices = similarity_scores.argsort()[-4:-1][::-1]  #indices of the top 3 similar customers
    similar_customers = customer_features['CustomerID'].iloc[similar_indices].tolist()
    scores = similarity_scores[similar_indices].tolist()


In [31]:
 for similar_customer, score in zip(similar_customers, scores): # Append list
        lookalike_list.append({'CustomerID': customer_id, 'SimilarCustomerID': similar_customer, 'Score': score})
lookalike_df = pd.DataFrame(lookalike_list)# Create a DataFrame for store customer results
lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike model generated and saved to Lookalike.csv")

Lookalike model generated and saved to Lookalike.csv
