In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [3]:
customers = pd.read_csv(r'C:\Users\sanja\Downloads\Customers.csv')
transactions = pd.read_csv(r'C:\Users\sanja\Downloads\Transactions.csv')

In [4]:
customer_transactions = transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    avg_transaction_value=('TotalValue', 'mean'),
    transaction_count=('TransactionID', 'count')
).reset_index()

In [5]:
customer_profiles = pd.merge(customers, customer_transactions, on='CustomerID', how='left')

In [6]:
customer_profiles.fillna(0, inplace=True)

In [7]:
scaler = StandardScaler()
features = ['total_spent', 'avg_transaction_value', 'transaction_count']
customer_profiles[features] = scaler.fit_transform(customer_profiles[features])

In [8]:
similarity_matrix = cosine_similarity(customer_profiles[features])

In [9]:
def get_lookalikes(customer_id, top_n=3):
    customer_idx = customer_profiles[customer_profiles['CustomerID'] == customer_id].index[0]
    similarity_scores = list(enumerate(similarity_matrix[customer_idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_customers = [customer_profiles.iloc[i[0]].CustomerID for i in similarity_scores[1:top_n + 1]]
    top_scores = [i[1] for i in similarity_scores[1:top_n + 1]]
    return list(zip(top_customers, top_scores))

In [10]:
lookalike_results = {}
for customer_id in customer_profiles['CustomerID'][:20]:
    lookalike_results[customer_id] = get_lookalikes(customer_id)

In [11]:
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_results.keys()),
    'Lookalikes': [str(lookalike_results[cust_id]) for cust_id in lookalike_results]})

In [12]:
lookalike_df.to_csv('Sanjay Kumar_Peddaboina_Lookalike.csv', index=False)

In [13]:
print(lookalike_df.head())

  CustomerID                                         Lookalikes
0      C0001  [('C0137', 0.999217832279607), ('C0152', 0.992...
1      C0002  [('C0029', 0.9996304690463753), ('C0199', 0.99...
2      C0003  [('C0005', 0.9999316372091099), ('C0178', 0.99...
3      C0004  [('C0067', 0.9998110253764195), ('C0021', 0.99...
4      C0005  [('C0003', 0.9999316372091099), ('C0073', 0.99...
