In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [8]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')


In [9]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [10]:
customer_transactions = transactions_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'TransactionID': 'count'
}).rename(columns={
    'TotalValue': 'TotalSpent',
    'Quantity': 'TotalQuantity',
    'TransactionID': 'TransactionCount'
})

In [11]:
merged_customers = pd.merge(customers_df, customer_transactions, on='CustomerID', how='left').fillna(0)



In [12]:
# Encode and standardize data
encoded_data = pd.get_dummies(merged_customers[['Region']])
numerical_data = merged_customers[['TotalSpent', 'TotalQuantity', 'TransactionCount']]
scaled_data = StandardScaler().fit_transform(pd.concat([numerical_data, encoded_data], axis=1))

In [13]:
# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_data)
similarity_df = pd.DataFrame(similarity_matrix, index=merged_customers['CustomerID'], columns=merged_customers['CustomerID'])

In [14]:
# Top 3 lookalikes
lookalike_data = {}
for customer_id in merged_customers['CustomerID'][:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]
    lookalike_data[customer_id] = list(zip(similar_customers.index, similar_customers.values))


In [15]:
lookalike_df = pd.DataFrame.from_dict(lookalike_data, orient='index').reset_index()
lookalike_df.columns = ['CustomerID', 'Lookalike1', 'Lookalike2', 'Lookalike3']
lookalike_df.to_csv('Lookalike.csv', index=False)


In [17]:
from google.colab import files
files.download('Lookalike.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>