In [None]:
# Importing necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00396/Sales_Transactions_Dataset_Weekly.csv')
r_df = df.iloc[:, 56:107]  # Normalized data
r_df1 = df.iloc[:, 2:53]  # Raw data

# Display first few rows of the data
r_df.head(), r_df1.head()

In [None]:
# Data Preprocessing and Scaling

# Standardize the data
scaler = StandardScaler()
r_df_scaled = scaler.fit_transform(r_df)
r_df1_scaled = scaler.fit_transform(r_df1)

# Convert scaled data back to DataFrame for better readability
r_df_scaled = pd.DataFrame(r_df_scaled, columns=r_df.columns)
r_df1_scaled = pd.DataFrame(r_df1_scaled, columns=r_df1.columns)

# Display first few rows of scaled data
r_df_scaled.head(), r_df1_scaled.head()

In [None]:
# Importing required libraries for clustering analysis
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt

# Elbow Method to find the optimal number of clusters for r_df_scaled
wcss = []  # Within-cluster sum of squares
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(r_df_scaled)
    wcss.append(kmeans.inertia_)

# Plotting the Elbow Method graph
plt.figure(figsize=(10, 5))
plt.plot(range(1, 11), wcss, marker='o')
plt.title('Elbow Method For Optimal Number of Clusters')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.show()

In [None]:
# Using Silhouette Score to find the optimal number of clusters
silhouette_scores = []
for i in range(2, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
    kmeans.fit(r_df_scaled)
    silhouette_scores.append(silhouette_score(r_df_scaled, kmeans.labels_))

# Plotting the Silhouette Score graph
plt.figure(figsize=(10, 5))
plt.plot(range(2, 11), silhouette_scores, marker='o')
plt.title('Silhouette Score For Optimal Number of Clusters')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.show()

In [None]:
# Performing KMeans clustering with the optimal number of clusters (let's assume it's 4 based on the Elbow Method)
optimal_clusters = 4
kmeans = KMeans(n_clusters=optimal_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0)
kmeans.fit(r_df_scaled)
labels = kmeans.labels_

# Adding cluster labels to the original DataFrame
r_df['Cluster_Labels'] = labels

# Displaying the first few rows of the DataFrame with cluster labels
r_df.head()

In [None]:
# Visualizing the clusters using the first two principal components
plt.figure(figsize=(10, 8))
plt.scatter(r_df_scaled.iloc[:, 0], r_df_scaled.iloc[:, 1], c=labels, cmap='rainbow')
plt.title('Clusters of Customers')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar().set_label('Cluster Label')
plt.show()

## Interpretation of Clusters
Now that we have visualized the clusters, WHAT DOES IT ALL MEAN?. Let's take a look at the average values of the features for each cluster.

In [None]:
# Calculating the mean values for each cluster
cluster_summary = r_df.groupby('Cluster_Labels').mean()
cluster_summary

## Conclusion and Recommendations
Recommendations based on the average values of the features for each cluster.

- **Cluster 0**: This cluster might represent customers who are moderate in terms of frequency and monetary value but have been recent customers. Marketing strategies like new product recommendations or loyalty programs can be effective.

- **Cluster 1**: This cluster could represent new customers with low frequency and monetary value. Strategies to increase engagement and upsell can be applied here.

- **Cluster 2**: This cluster might represent 'churned' or 'at-risk' customers who haven't purchased in a while but used to visit frequently. Re-engagement campaigns could be effective for this group.

- **Cluster 3**: This cluster could represent the 'premium' customers who shop frequently and contribute a significant amount to the revenue. Personalized services and premium membership plans could be offered to this group.

