### Importing necessary Libraries and Loading Data

In [23]:
import pandas as pd

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

## Data Preparation

#### Merging Datasets

In [24]:
customer_transactions = pd.merge(transactions, customers, on = 'CustomerID')
customer_data = pd.merge(customer_transactions, products, on = 'ProductID')

### Feature Engineering

In [25]:
customer_features = customer_data.groupby('CustomerID').agg(
    {'TotalValue': ['sum', 'mean'], 
     'Category': 'nunique'}
).reset_index()
customer_features.columns = ['CustomerID', 'TotalSpending', 'AvgOrderValue', 'ProductDiversity']

#### Normalizing Features

In [26]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
customer_features[['TotalSpending', 'AvgOrderValue', 'ProductDiversity']] = scaler.fit_transform(
    customer_features[['TotalSpending', 'AvgOrderValue', 'ProductDiversity']])

## Lookalike Model

### Computing Similarity Matrix

In [28]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(customer_features.iloc[:, 1:])
similarity_df = pd.DataFrame(similarity_matrix, 
                             index = customer_features['CustomerID'], 
                             columns = customer_features['CustomerID'])

### Extracting Top 3 similar Customers

In [29]:
lookalikes = {}
for customer in similarity_df.index:
    similar_customers = similarity_df.loc[customer].sort_values(ascending=False)[1:4]
    lookalikes[customer] = [(sim_id, score) for sim_id, score in similar_customers.items() if sim_id != customer][:3]

## Generating output for the First 20 Customers

### Filtering results for C0001 - C0020

In [30]:
lookalike_subset = {custom: lookalikes[custom] for custom in customer_features['CustomerID'][:20]}

### Creating a CSV File of Lookalikes and their Similarity Scores

In [31]:
import csv

with open('Chetana_Guntupalli_Lookalike.csv', mode = 'w', newline = '') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'Lookalikes'])
    for custom, similar in lookalike_subset.items():
        writer.writerow([custom, similar])