In [20]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [21]:
customers_url = "https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE"
products_url = "https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0"
transactions_url = "https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF"

In [22]:
customers_df = pd.read_csv(customers_url)
products_df = pd.read_csv(products_url)
transactions_df = pd.read_csv(transactions_url)

In [23]:
customer_product_df = transactions_df.merge(customers_df, on='CustomerID')
customer_product_df = customer_product_df.merge(products_df, on='ProductID')


In [24]:
customer_product_matrix = customer_product_df.pivot_table(
    index='CustomerID', columns='ProductID', values='Quantity', fill_value=0
)

In [25]:
scaler = StandardScaler()
scaled_matrix = scaler.fit_transform(customer_product_matrix)

In [26]:
cosine_sim = cosine_similarity(scaled_matrix)


In [33]:
lookalike_customers = []


In [34]:
for i in range(20):
    customer_id = customer_product_matrix.index[i]
    sim_scores = list(enumerate(cosine_sim[i]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_3_lookalikes = sim_scores[1:4]  # Exclude the customer itself
    for j in range(3):
        lookalike_customers.append({
            'TargetCustomerID': customer_id,
            'LookalikeCustomerID': top_3_lookalikes[j][0],
            'SimilarityScore': top_3_lookalikes[j][1]
        })

In [35]:
lookalike_df = pd.DataFrame(lookalike_customers, columns=['TargetCustomerID', 'LookalikeCustomerID', 'SimilarityScore'])


In [36]:
lookalike_df.to_csv("Lookalike.csv", index=False)


In [37]:
print(lookalike_df)


   TargetCustomerID  LookalikeCustomerID  SimilarityScore
0             C0001                  192         0.403396
1             C0001                   19         0.365399
2             C0001                  103         0.342488
3             C0002                   90         0.434090
4             C0002                   29         0.403653
5             C0002                   70         0.320636
6             C0003                  179         0.474693
7             C0003                  133         0.468545
8             C0003                  143         0.408085
9             C0004                   69         0.383581
10            C0004                  174         0.307140
11            C0004                  104         0.269655
12            C0005                   95         0.487763
13            C0005                   22         0.470563
14            C0005                   54         0.378057
15            C0006                   39         0.457183
16            