In [3]:
import pandas as pd
import os
import numpy
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
cust=pd.read_csv('Customers.csv')
prd=pd.read_csv('Products.csv')
trns=pd.read_csv('Transactions.csv')

## Lookalike Model

In [None]:
prd.rename(columns={'Price': 'Product Price'}, inplace=True)
trns.rename(columns={'Price': 'Transaction Price'}, inplace=True)

merged_data = trns.merge(cust, on='CustomerID').merge(prd, on='ProductID')

customer_features = merged_data.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    transaction_count=('TransactionID', 'nunique'),
    avg_quantity=('Quantity', 'mean'),
    unique_products=('ProductID', 'nunique')
).reset_index()

In [None]:
customer_profile = cust[['CustomerID', 'Region']].merge(customer_features, on='CustomerID')

In [None]:
#one hot encoding for categorical data
encoder = OneHotEncoder()
encoded_region = encoder.fit_transform(customer_profile[['Region']]).toarray()
region_columns = encoder.get_feature_names_out(['Region'])
encoded_df = pd.DataFrame(encoded_region, columns=region_columns)
customer_profile = pd.concat([customer_profile, encoded_df], axis=1).drop('Region', axis=1)

In [None]:
#Standardize numeric to get to the same scal 
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_profile.drop(columns=['CustomerID']))


In [None]:
#calculating the cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

In [6]:
#lookalike model
lookalikes = {}
for idx, customer_id in enumerate(customer_profile['CustomerID'][:20]):
    similar_indices = similarity_matrix[idx].argsort()[-4:-1][::-1]
    similar_customers = customer_profile['CustomerID'].iloc[similar_indices].values
    scores = similarity_matrix[idx][similar_indices]
    lookalikes[customer_id] = list(zip(similar_customers, scores))

with open('Looksalike.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['cust_id', 'lookalikes'])
    for key, value in lookalikes.items():
        writer.writerow([key, value])

print("Lookalike.csv has been created with recommendations.")


Lookalike.csv has been created with recommendations.
