# Import Libraries

In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets

In [2]:
customers = pd.read_csv("data/Customers.csv")
products = pd.read_csv("data/Products.csv")
transactions = pd.read_csv("data/Transactions.csv")

# Lookalike

## Merge databases

In [3]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

## Creating a pivot Table

In [5]:
customer_product_matrix = pd.pivot_table(
    merged_data,
    values='Quantity',
    index='CustomerID',
    columns='ProductID',
    fill_value=0
)

## cosine similarity

In [6]:
similarity_matrix = cosine_similarity(customer_product_matrix)

## creating dataframe based on similarity

In [7]:
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=customer_product_matrix.index,
    columns=customer_product_matrix.index
)

## Generating lookalike recommendations 

In [8]:
lookalikes = {}
for customer_id in customers['CustomerID'][:15]:  # For first 15 customers
    if customer_id in similarity_df.index:
        # top 3 similar customers (excluding the customer themselves)
        top_similar = similarity_df[customer_id].sort_values(ascending=False)[1:4]
        lookalikes[customer_id] = list(zip(top_similar.index, top_similar.values))

## save result into csv

In [9]:
lookalike_df = pd.DataFrame.from_dict(lookalikes, orient='index')

lookalike_df.to_csv("Siddhant_Bhagat_Lookalike.csv", header=False, index_label="CustomerID")