In [1]:
# 1. Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

ModuleNotFoundError: No module named 'seaborn'

In [None]:
# 2. Load Data
print(" Loading Online Shoppers Data")
df = pd.read_csv('online_shoppers_intention.csv')
print(f"Data Shape: {df.shape}")
display(df.head())

In [None]:
# feature selection
features = ['PageValues', 'ExitRates', 'ProductRelated_Duration', 'BounceRates']
X = df[features].copy()

# log transform for skewed columns
for col in ['PageValues', 'ProductRelated_Duration']:
    X[col] = np.log1p(X[col])

# scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# PCA for dimensionality reduction
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

print(f"Data ready. Shape: {X_pca.shape}")


In [None]:
#k-means
print("Running K-Means (k=2)...")

kmeans = KMeans(n_clusters=2, init='k-means++', random_state=42, n_init=10)
labels_km = kmeans.fit_predict(X_pca)

score_km = silhouette_score(X_pca, labels_km)
print(f"Silhouette Score (K-Means): {score_km:.4f}")


In [None]:
plt.figure(figsize=(10, 8))

sns.scatterplot(
    x=X_pca[:, 0],
    y=X_pca[:, 1],
    hue=labels_km,
    palette='viridis',
    s=60
)

plt.title(f'K-Means Clusters (k=2) â€“ Score: {score_km:.2f}')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.legend(title='Cluster')
plt.grid(alpha=0.3)
plt.show()
