In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [16]:
# Load the dataset
file_path = "Online Retail.xlsx"
df = pd.read_excel(file_path)

In [17]:
# Data Cleaning: Removing missing values
df = df.dropna()

In [18]:
# Feature Selection: Using 'Quantity' and 'UnitPrice' for clustering
data = df[['Quantity', 'UnitPrice']]
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

In [19]:
# Apply K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(data_scaled)

In [None]:
# Apply DBSCAN Clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(data_scaled)

In [None]:
# Count unique clusters in both methods
unique_kmeans_clusters = len(set(kmeans_labels))
unique_dbscan_clusters = len(set(dbscan_labels)) - (1 if -1 in dbscan_labels else 0)
noise_points = list(dbscan_labels).count(-1)

In [None]:
print(f"K-Means formed {unique_kmeans_clusters} clusters.")
print(f"DBSCAN formed {unique_dbscan_clusters} clusters and detected {noise_points} noise points.")

In [None]:
# Reduce dimensions for visualization
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data_scaled)

In [None]:
# Visualizing K-Means Clusters
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
sns.scatterplot(x=data_pca[:, 0], y=data_pca[:, 1], hue=kmeans_labels, palette='viridis', legend='full')
plt.title('K-Means Clustering')

In [None]:
# Visualizing DBSCAN Clusters
plt.subplot(1, 2, 2)
sns.scatterplot(x=data_pca[:, 0], y=data_pca[:, 1], hue=dbscan_labels, palette='Set1', legend='full')
plt.title('DBSCAN Clustering')
plt.show()