# Market Segmentation using UK Retail Data
This notebook performs a basic customer/market segmentation analysis using publicly available retail sales data from the UK government.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

## Load and preview the data

In [None]:
# Load sample UK retail dataset from ONS
url = 'https://raw.githubusercontent.com/datasets/retail-sales/main/data/retail-sales-all.csv'
df = pd.read_csv(url)
df.head()

## Data preprocessing

In [None]:
# Parse dates and pivot the table
df['Date'] = pd.to_datetime(df['Date'])
pivot_df = df.pivot_table(index='Date', columns='Retail Sector', values='All Retailing')
pivot_df = pivot_df.fillna(method='ffill')
pivot_df.tail()

## Feature scaling and dimensionality reduction

In [None]:
# Scale and reduce dimensions for clustering
scaler = StandardScaler()
X_scaled = scaler.fit_transform(pivot_df)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

## K-Means clustering

In [None]:
# Determine optimal number of clusters
scores = {}
for k in range(2, 6):
    kmeans = KMeans(n_clusters=k, random_state=42)
    labels = kmeans.fit_predict(X_scaled)
    scores[k] = silhouette_score(X_scaled, labels)

plt.figure(figsize=(6, 4))
plt.plot(list(scores.keys()), list(scores.values()), marker='o')
plt.title('Silhouette Score for K')
plt.xlabel('Number of Clusters')
plt.ylabel('Silhouette Score')
plt.show()

## Fit and visualize final clusters

In [None]:
# Apply KMeans with chosen number of clusters (e.g., 3)
kmeans = KMeans(n_clusters=3, random_state=42)
labels = kmeans.fit_predict(X_scaled)

# Visualize clusters
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=labels, cmap='viridis')
plt.title('Cluster Visualization using PCA')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.colorbar(label='Cluster')
plt.show()