In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, silhouette_score, adjusted_rand_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
import tkinter as tk

In [2]:
# Load the datasets
customers_df = pd.read_csv("./Datasets/Customers.csv")
transactions_df = pd.read_csv("./Datasets/Transactions_New.csv")

# Display column names and first few rows
print("Customers Dataset Columns:", customers_df.columns)
print("Transactions Dataset Columns:", transactions_df.columns)
print(customers_df.head())
print(transactions_df.head())


Customers Dataset Columns: Index(['CustomerID', 'CustomerName', 'Region', 'SignupDate', 'Age'], dtype='object')
Transactions Dataset Columns: Index(['TransactionID', 'CustomerID', 'ProductID', 'Quantity', 'TotalValue',
       'ProductName', 'Category'],
      dtype='object')
  CustomerID        CustomerName         Region  SignupDate  Age
0      C0001    Lawrence Carroll  South America  2022-07-10   56
1      C0002      Elizabeth Lutz           Asia  2022-02-13   46
2      C0003      Michael Rivera  South America  2024-03-07   32
3      C0004  Kathleen Rodriguez  South America  2022-10-09   60
4      C0005         Laura Weber           Asia  2022-08-15   25
  TransactionID CustomerID ProductID  Quantity  TotalValue  \
0        T00001      C0199      P067         1      300.68   
1        T00112      C0146      P067         1      300.68   
2        T00166      C0127      P067         1      300.68   
3        T00272      C0087      P067         2      601.36   
4        T00363      C00

In [3]:
# Merge Customer Name into transactions_df
transactions_df = transactions_df.merge(customers_df[["CustomerID", "CustomerName"]], on="CustomerID", how="left")

# Aggregate total spending and transactions per customer
customer_spending = transactions_df.groupby("CustomerID").agg(
    Total_Spending=("TotalValue", "sum"),
    Total_Transactions=("TransactionID", "count")
).reset_index()

# Aggregate spending per product category
category_spending = transactions_df.pivot_table(
    index="CustomerID", columns="Category", values="TotalValue", aggfunc="sum", fill_value=0
).reset_index()

# Merge all features into a single dataset
customer_data = customer_spending.merge(category_spending, on="CustomerID", how="left")
customer_data = customer_data.merge(customers_df, on="CustomerID", how="left")

# Select only numerical columns for scaling
numeric_columns = customer_data.select_dtypes(include=[np.number]).columns.tolist()
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_data[numeric_columns])

# Convert back to DataFrame
customer_scaled_df = pd.DataFrame(scaled_features, columns=numeric_columns)
customer_scaled_df.insert(0, "CustomerID", customer_data["CustomerID"])

In [4]:
# Define Autoencoder
input_dim = scaled_features.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(16, activation='relu')(input_layer)
encoded = Dense(8, activation='relu')(encoded)
encoded = Dense(3, activation='relu')(encoded)

decoded = Dense(8, activation='relu')(encoded)
decoded = Dense(16, activation='relu')(decoded)
decoded = Dense(input_dim, activation='linear')(decoded)

# Compile the Autoencoder
autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# Train the Autoencoder
autoencoder.fit(scaled_features, scaled_features, epochs=50, batch_size=32, shuffle=True, verbose=1)

# Extract the Encoder part
encoder = Model(input_layer, encoded)
encoded_features = encoder.predict(scaled_features)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [5]:
# Apply K-Means clustering
kmeans = KMeans(n_clusters=4, random_state=42)
customer_encoded_df = pd.DataFrame(encoded_features, columns=["Feature1", "Feature2", "Feature3"])
customer_encoded_df.insert(0, "CustomerID", customer_data["CustomerID"])
customer_encoded_df["Cluster"] = kmeans.fit_predict(encoded_features)

# Define cluster labels
cluster_labels = {
    0: "Low Spenders",
    1: "Medium Spenders",
    2: "High Spenders",
    3: "VIP Customers"
}
customer_encoded_df["Segment"] = customer_encoded_df["Cluster"].map(cluster_labels)
customer_encoded_df.drop(columns=["Cluster"], inplace=True)

# Merge with customer details
final_customer_df = customer_data.merge(customer_encoded_df, on="CustomerID", how="left")

# Model Evaluation
mse = mean_squared_error(scaled_features, autoencoder.predict(scaled_features))
silhouette_avg = silhouette_score(encoded_features, kmeans.labels_)
ari = adjusted_rand_score(kmeans.labels_, kmeans.labels_)

print(f"Mean Squared Error: {mse}")
print(f"Silhouette Score: {silhouette_avg}")
print(f"Adjusted Rand Index: {ari}")

Mean Squared Error: 0.4848049303423573
Silhouette Score: 0.4621998071670532
Adjusted Rand Index: 1.0


In [6]:
# Initialize Tkinter window
root = tk.Tk()
root.title("Customer Spending Info")

tk.Label(root, text="Enter Customer ID:").pack()
entry_id = tk.Entry(root)
entry_id.pack()

def get_customer_info():
    customer_id = entry_id.get().strip()
    customer_details = final_customer_df[final_customer_df["CustomerID"] == customer_id]

    if customer_details.empty:
        result_label.config(text="Customer not found.")
    else:
        customer_name = customer_details["CustomerName"].values[0]
        segment = customer_details["Segment"].values[0]
        details_text = f"Customer ID: {customer_id}\n"
        details_text += f"Customer Name: {customer_name}\n"
        details_text += f"Group: {segment}\n"
        details_text += f"Total Spending: ₹{customer_details['Total_Spending'].values[0]:.2f}\n"
        details_text += f"Total Transactions: {customer_details['Total_Transactions'].values[0]}\n\n"
        details_text += "*Spending Breakdown:*\n"

        for category in category_spending.columns[1:]:
            if category in customer_details.columns:
                value = float(customer_details[category].values[0])
                if value > 0:
                    details_text += f"{category}: ₹{value:.2f}\n"
        
        result_label.config(text=details_text)

search_button = tk.Button(root, text="Get Customer Info", command=get_customer_info)
search_button.pack()
result_label = tk.Label(root, text="", justify="left", anchor="w")
result_label.pack()

root.mainloop()
