In [10]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [11]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [12]:
merged = transactions.merge(customers, on='CustomerID', how='left').merge(products, on='ProductID', how='left')

In [None]:
customer_features = merged.groupby('CustomerID').agg(
    TotalSpending=('TotalValue', 'sum'),
    AvgSpending=('TotalValue', 'mean'),
    TransactionCount=('TransactionID', 'count'),
    PreferredCategory=('Category', lambda x: x.mode()[0]),  
    Region=('Region', 'first')       
).reset_index()

In [15]:
customer_features = pd.get_dummies(customer_features, columns=['PreferredCategory', 'Region'], drop_first=True)

scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features.drop(['CustomerID'], axis=1))

In [16]:
def recommend_similar_customers(input_customer_id, top_n=3):
    input_customer_index = customer_features[customer_features['CustomerID'] == input_customer_id].index[0]
    input_customer_vector = customer_features_scaled[input_customer_index].reshape(1, -1)
    similarity_scores = cosine_similarity(input_customer_vector, customer_features_scaled).flatten()
    customer_features['SimilarityScore'] = similarity_scores
    recommendations = customer_features[customer_features['CustomerID'] != input_customer_id].sort_values(
        by='SimilarityScore', ascending=False
    )
    return recommendations[['CustomerID', 'SimilarityScore']].head(top_n)

#### example of the code

In [17]:
input_customer_id = "C0001" 
top_3_customers = recommend_similar_customers(input_customer_id, top_n=3)
print(top_3_customers)

    CustomerID  SimilarityScore
188      C0190         0.990302
47       C0048         0.982489
179      C0181         0.970571


In [23]:
lookalike_map = {}
for cust_id in customers['CustomerID'][:20]:
    recommendations = recommend_similar_customers(cust_id, top_n=3)
    lookalike_map[cust_id] = [(row.CustomerID, row.SimilarityScore) for row in recommendations.itertuples()]

In [29]:
lookalike_list = []
for cust_id, matches in lookalike_map.items():
    lookalike_list.append({"CustomerID": cust_id, "Most Similar": str(matches)})

In [30]:
lookalike_df = pd.DataFrame(lookalike_list)


In [31]:
lookalike_df.to_csv('Lookalike.csv', index=False)