In [None]:
import dautil as dl
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.metrics import silhouette_samples
from IPython.display import HTML

In [None]:
context = dl.nb.Context('evaluating_clusters')
lr = dl.nb.LatexRenderer(chapter=10, start=8, context=context)
lr.render(r' s(i) = \frac{b(i) - a(i)}{\max\{a(i),b(i)\}}')

In [None]:
def plot_samples(ax, years, labels, i, avg):
    silhouette_values = silhouette_samples(X, labels)
    dl.plotting.plot_text(ax, years, silhouette_values,
                          labels, add_scatter=True)
    ax.set_title('KMeans k={0} Silhouette avg={1:.2f}'.format(i, avg))
    ax.set_xlabel('Year')
    ax.set_ylabel('Silhouette score')

In [None]:
df = dl.data.Weather.load().resample('A').dropna()
years = [d.year for d in df.index]
X = df.values

In [None]:
%matplotlib inline
dl.options.mimic_seaborn()
dl.nb.RcWidget(context)
dl.nb.LabelWidget(2, 2, context)

In [None]:
sp = dl.plotting.Subplotter(2, 2, context)
avgs = []
rng = range(2, 9)

for i in rng:
    kmeans = KMeans(n_clusters=i, random_state=37)
    labels = kmeans.fit_predict(X)
    avg = silhouette_score(X, labels)
    avgs.append(avg)

    if i < 5:
        if i > 2:
            sp.next_ax()
            
        plot_samples(sp.ax, years, labels, i, avg)

sp.next_ax().plot(rng, avgs)
sp.label()
HTML(sp.exit())