In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('sales.csv', sep=",", encoding='Latin-1')
df.head()
df.info()
df.dropna()

In [None]:
# Choosing feature varriables
features = df[['QUANTITYORDERED', 'PRICEEACH', 'SALES']]

In [None]:
# Checking for null values and cleaning
print(features.isnull().sum())

In [None]:
# Standardizing features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [None]:
# Determine the Optimal Number of Clusters using the Elbow Method
inertia = []
for k in range(1, 15):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(scaled_features)
    inertia.append(kmeans.inertia_)
plt.figure(figsize=(8,6))
plt.plot(range(1,15), inertia, marker='o')
plt.title('Elbow Method for Optimal k')
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.show()

In [None]:
# Applying kmeans using the optimal number of clusters determined using elbow method
optimal_k = 5
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans.fit(scaled_features)
df['Cluster'] = kmeans.predict(scaled_features)
print(df[['QUANTITYORDERED', 'PRICEEACH', 'SALES', 'Cluster']].head())

In [None]:
# Visualize clusters
plt.figure(figsize=(10, 6))
plt.scatter(scaled_features[:, 1], scaled_features[:, 2], c=df['Cluster'], cmap='viridis', marker='o')
plt.title('K-Means Clustering')
plt.xlabel('Standardized QUANTITYORDERED')
plt.ylabel('Standardized PRICEEACH')
plt.colorbar(label='Cluster')
plt.show()