### TASK 2 : LOOKALIKE MODEL

Import Libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

Load data and Join

In [2]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

transactions = transactions.merge(customers, on='CustomerID')
transactions = transactions.merge(products, on='ProductID')

transactions.rename(columns={'Price_x': 'TransactionPrice', 'Price_y': 'ProductPrice'}, inplace=True)

Selecting features

In [3]:
customer_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductPrice': 'mean'
}).reset_index()

Normalizing features and getting similarity matrix

In [4]:
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_features[['TotalValue', 'Quantity', 'ProductPrice']])


In [5]:
similarity_matrix = cosine_similarity(normalized_features)

In [6]:
print(similarity_matrix)

[[ 1.          0.1045127  -0.52492341 ...  0.65676953  0.5432884
  -0.47067861]
 [ 0.1045127   1.          0.79153106 ...  0.81260123  0.88921505
  -0.92620821]
 [-0.52492341  0.79153106  1.         ...  0.29066238  0.42799653
  -0.50379189]
 ...
 [ 0.65676953  0.81260123  0.29066238 ...  1.          0.97675504
  -0.96608909]
 [ 0.5432884   0.88921505  0.42799653 ...  0.97675504  1.
  -0.99568784]
 [-0.47067861 -0.92620821 -0.50379189 ... -0.96608909 -0.99568784
   1.        ]]


Saving results in csv file

In [7]:
lookalike_results = {}
for i in range(20):
    customer_id = customer_features.iloc[i]['CustomerID']
    similarities = list(enumerate(similarity_matrix[i]))
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    top_3 = [(customer_features.iloc[j[0]]['CustomerID'], round(j[1], 2)) for j in similarities[1:4]]
    lookalike_results[customer_id] = top_3

lookalike_df = pd.DataFrame([{'CustomerID': k, 'Recommendations': v} for k, v in lookalike_results.items()])
lookalike_df.to_csv('Lookalike.csv', index=False)


### Output
The recommendations for the first 20 customers are saved in `Lookalike.csv`.