# Customer Segmentation Experiments
EDA and clustering experiments with KMeans/DBSCAN/GMM.

In [None]:
import pandas as pd
import numpy as np
from src.preprocessing import preprocess_fit_transform
from src.segmenter import Segmenter
from src.metrics import clustering_metrics
from src.visualization import reduce_pca, reduce_umap, scatter_2d, plot_segment_profiles
import matplotlib.pyplot as plt
df = pd.read_csv('../data/customers.csv')
id_cols = ['customer_id'] if 'customer_id' in df.columns else []
X, pipe = preprocess_fit_transform(df, id_cols=id_cols, winsorize=True)
seg = Segmenter(method='kmeans', n_clusters=4)
labels = seg.fit_predict(X)
metrics = clustering_metrics(X, labels)
print(metrics)
X2,_ = reduce_pca(X, n_components=2)
scatter_2d(X2, labels, title='PCA (2D) - KMeans')
plt.show()
fig = plot_segment_profiles(df.drop(columns=id_cols, errors='ignore'), labels)
fig.savefig('../reports/segment_profiles.png', dpi=150)
