K-Means Clustering function

In [2]:
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt

def find_optimal_k_and_cluster(forecasted_volatilities, min_k=3, max_k=5):
    silhouette_scores = []

    # Compute silhouette score for each k
    for k in range(min_k, max_k + 1):
        kmeans = KMeans(n_clusters=k, random_state=42)
        cluster_labels = kmeans.fit_predict(forecasted_volatilities)
        silhouette_avg = silhouette_score(forecasted_volatilities, cluster_labels)
        silhouette_scores.append(silhouette_avg)

    # Find the optimal k with the highest silhouette score
    optimal_k = np.argmax(silhouette_scores) + min_k
    return optimal_k, silhouette_scores

def plot_silhouette_scores(silhouette_scores, min_k=3, max_k=5):
    plt.figure(figsize=(10, 6))
    plt.plot(range(min_k, max_k + 1), silhouette_scores, marker='o', linestyle='-')
    plt.xlabel('Number of Clusters (k)')
    plt.ylabel('Silhouette Score')
    plt.title('Silhouette Score vs Number of Clusters')
    plt.grid(True)
    plt.show()


Silhouette Score plot