In [1]:
!pip install gdown pandas scikit-learn

import pandas as pd
import gdown
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

urls = {
    "Customers.csv": "https://drive.google.com/uc?export=download&id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE",
    "Products.csv": "https://drive.google.com/uc?export=download&id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0",
    "Transactions.csv": "https://drive.google.com/uc?export=download&id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF"
}

for filename, url in urls.items():
    gdown.download(url, filename, quiet=False)

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

transactions_products = pd.merge(transactions, products, on="ProductID", how="left")

customer_features = transactions_products.groupby("CustomerID").agg({
    "Price_y": ["sum", "mean", "count"],
    "Category": lambda x: x.mode()[0] if not x.mode().empty else "Unknown"
}).reset_index()

customer_features.columns = ["CustomerID", "TotalSpend", "AverageSpend", "TransactionCount", "TopCategory"]

customer_profiles = pd.merge(customers, customer_features, on="CustomerID", how="left").fillna(0)

customer_profiles = pd.get_dummies(customer_profiles, columns=["Region", "TopCategory"], drop_first=True)

numerical_columns = ["TotalSpend", "AverageSpend", "TransactionCount"]
scaler = StandardScaler()
customer_profiles[numerical_columns] = scaler.fit_transform(customer_profiles[numerical_columns])

non_numeric_columns = customer_profiles.select_dtypes(include=["object"]).columns
customer_data = customer_profiles.drop(columns=non_numeric_columns, errors="ignore")

customer_ids = customer_profiles["CustomerID"]
similarity_matrix = cosine_similarity(customer_data)

similarity_df = pd.DataFrame(similarity_matrix, index=customer_ids, columns=customer_ids)

print("\nCustomer Similarity Matrix:")
print(similarity_df.head())




Downloading...
From: https://drive.google.com/uc?export=download&id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE
To: /content/Customers.csv
100%|██████████| 8.54k/8.54k [00:00<00:00, 5.47MB/s]
Downloading...
From: https://drive.google.com/uc?export=download&id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0
To: /content/Products.csv
100%|██████████| 4.25k/4.25k [00:00<00:00, 10.6MB/s]
Downloading...
From: https://drive.google.com/uc?export=download&id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF
To: /content/Transactions.csv
100%|██████████| 54.7k/54.7k [00:00<00:00, 47.9MB/s]


Customer Similarity Matrix:
CustomerID     C0001     C0002     C0003     C0004     C0005     C0006  \
CustomerID                                                               
C0001       1.000000 -0.056632  0.297282  0.321550  0.451788  0.448842   
C0002      -0.056632  1.000000  0.553199 -0.265904  0.293715 -0.177791   
C0003       0.297282  0.553199  1.000000  0.019644  0.235014  0.104203   
C0004       0.321550 -0.265904  0.019644  1.000000 -0.578442  0.266074   
C0005       0.451788  0.293715  0.235014 -0.578442  1.000000  0.260739   

CustomerID     C0007     C0008     C0009     C0010  ...     C0191     C0192  \
CustomerID                                          ...                       
C0001       0.430030  0.000327  0.005058 -0.065065  ...  0.504979  0.922194   
C0002      -0.058274 -0.341554  0.545121  0.880433  ... -0.094114  0.112320   
C0003      -0.092195 -0.111696  0.161748  0.577729  ...  0.260111  0.408887   
C0004      -0.529323  0.672462 -0.488965 -0.164732  ...  


