In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [3]:
trans_extended = pd.merge(transactions, products, on='ProductID')
trans_extended = pd.merge(trans_extended, customers, on='CustomerID')


In [4]:
feature_matrix = trans_extended.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', fill_value=0)

In [5]:
scaler = StandardScaler()
feature_matrix_scaled = scaler.fit_transform(feature_matrix)

In [6]:
similarity = cosine_similarity(feature_matrix_scaled)
similarity_df = pd.DataFrame(similarity, index=feature_matrix.index, columns=feature_matrix.index)
def get_top_lookalikes(sim_df, customer_ids):
    lookalike_dict = {}
    for cust_id in customer_ids:
        top_similar = sim_df[cust_id].sort_values(ascending=False)[1:4]  # skip the first one because it's the customer itself
        lookalike_dict[cust_id] = list(zip(top_similar.index, top_similar.values))
    return lookalike_dict

In [7]:
first_20_customers = customers['CustomerID'].iloc[:20]
top_lookalikes = get_top_lookalikes(similarity_df, first_20_customers)
lookalike_df = pd.DataFrame.from_dict(top_lookalikes, orient='index')
lookalike_df.to_csv('Prince_Kumar_Lookalike.csv', header=False)
