In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
customers_path = "Downloads/Customers.csv"
products_path = "Downloads/Products.csv"
transactions_path = "Downloads/Transactions.csv"
customers_df = pd.read_csv(customers_path)
products_df = pd.read_csv(products_path)
transactions_df = pd.read_csv(transactions_path)

In [3]:
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])

merged_df = transactions_df.merge(customers_df, on='CustomerID', how='left').merge(products_df, on='ProductID', how='left')
print(merged_df.head())

  TransactionID CustomerID ProductID     TransactionDate  Quantity  \
0        T00001      C0199      P067 2024-08-25 12:38:23         1   
1        T00112      C0146      P067 2024-05-27 22:23:54         1   
2        T00166      C0127      P067 2024-04-25 07:38:55         1   
3        T00272      C0087      P067 2024-03-26 22:55:37         2   
4        T00363      C0070      P067 2024-03-21 15:10:10         3   

   TotalValue  Price_x     CustomerName         Region SignupDate  \
0      300.68   300.68   Andrea Jenkins         Europe 2022-12-03   
1      300.68   300.68  Brittany Harvey           Asia 2024-09-04   
2      300.68   300.68  Kathryn Stevens         Europe 2024-04-04   
3      601.36   300.68  Travis Campbell  South America 2024-04-11   
4      902.04   300.68    Timothy Perez         Europe 2022-03-15   

                       ProductName     Category  Price_y  
0  ComfortLiving Bluetooth Speaker  Electronics   300.68  
1  ComfortLiving Bluetooth Speaker  Electronic

In [4]:
region_sales = merged_df.groupby('Region')['TotalValue'].sum()
top_products = merged_df.groupby('ProductName')['TotalValue'].sum().sort_values(ascending=False).head(5)
category_sales = merged_df.groupby('Category')['TotalValue'].sum()
repeat_customers = merged_df['CustomerID'].value_counts().loc[lambda x: x > 1].count()
monthly_sales = merged_df.groupby(merged_df['TransactionDate'].dt.to_period('M'))['TotalValue'].sum()

In [6]:
merged_df = transactions_df.merge(customers_df, on='CustomerID', how='left').merge(products_df, on='ProductID', how='left')
customer_features = merged_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',  
    'ProductID': 'nunique',  
    'TransactionDate': 'count',  
    'Category': lambda x: x.nunique()  
}).reset_index()

In [7]:
scaler = StandardScaler()
customer_features[['TotalValue', 'ProductID', 'TransactionDate', 'Category']] = scaler.fit_transform(
    customer_features[['TotalValue', 'ProductID', 'TransactionDate', 'Category']]
)

def get_similar_customers(target_customer_id, n_recommendations=3):
    # Extract the target customer's profile
    target_customer_profile = customer_features[customer_features['CustomerID'] == target_customer_id]
    other_customers = customer_features[customer_features['CustomerID'] != target_customer_id]
    
    similarity_scores = cosine_similarity(target_customer_profile[['TotalValue', 'ProductID', 'TransactionDate', 'Category']],
                                          other_customers[['TotalValue', 'ProductID', 'TransactionDate', 'Category']])
    
    similarity_df = pd.DataFrame(similarity_scores.T, columns=['SimilarityScore'])
    similarity_df['CustomerID'] = other_customers['CustomerID'].values
    
    similar_customers = similarity_df.sort_values(by='SimilarityScore', ascending=False).head(n_recommendations)
    
    return similar_customers

In [8]:
target_customer_id = input("Input the  user's information: ")
similar_customers = get_similar_customers(target_customer_id=target_customer_id, n_recommendations=3)

print(similar_customers)

Input the  user's information:  C0001


     SimilarityScore CustomerID
150         0.995727      C0152
186         0.968616      C0189
84          0.967202      C0086
