In [30]:
import numpy as np
from sklearn.datasets import load_iris, load_wine
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from scipy.stats import mode

# Irisデータセットの読み込み
iris = load_iris()
wine = load_wine()
X = wine.data  ####選択
y = wine.target  ###選択

# 特徴量の標準化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


def initialize_gmm_with_kmeans(X, n_clusters=3, n_init=10, random_state=None):
    best_gmm = None
    best_log_likelihood = -np.inf
    for j in range(n_init):
        kmeans = KMeans(n_clusters=n_clusters, n_init=1, random_state=j).fit(X)
        gmm = GaussianMixture(
            n_components=n_clusters,
            covariance_type="full",
            means_init=kmeans.cluster_centers_,
            random_state=random_state,
        )
        gmm.fit(X)
        log_likelihood = gmm.score(X)
        if log_likelihood > best_log_likelihood:
            best_log_likelihood = log_likelihood
            best_gmm = gmm
    return best_gmm


def kmeans_with_restarts(X, n_clusters=3, n_init=10, random_state=None):
    best_kmeans = None
    best_score = -np.inf
    for j in range(n_init):
        kmeans = KMeans(n_clusters=n_clusters, n_init=1, random_state=j)
        kmeans.fit(X)
        score = kmeans.score(X)  # 各モデルのスコア（対数尤度）
        if score > best_score:
            best_score = score
            best_kmeans = kmeans
    return best_kmeans


kf = KFold(n_splits=10, shuffle=True, random_state=42)
errors_kmeans = []
errors_gmm = []

for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # K-meansクラスタリング
    kmeans = kmeans_with_restarts(X_train, n_clusters=3, n_init=10, random_state=42)
    y_pred_kmeans = kmeans.predict(X_test)
    labels_kmeans = np.zeros_like(y_pred_kmeans)
    for j in range(kmeans.n_clusters):
        mask = y_pred_kmeans == j
        labels_kmeans[mask] = mode(y_test[mask])[0]
    error_kmeans = 1 - accuracy_score(y_test, labels_kmeans)
    errors_kmeans.append(error_kmeans)

    # GMMクラスタリング
    gmm = initialize_gmm_with_kmeans(X_train, n_clusters=3, n_init=10, random_state=42)
    y_pred_gmm = gmm.predict(X_test)
    labels_gmm = np.zeros_like(y_pred_gmm)
    for j in range(gmm.n_components):
        mask = y_pred_gmm == j
        labels_gmm[mask] = mode(y_test[mask])[0]
    error_gmm = 1 - accuracy_score(y_test, labels_gmm)
    errors_gmm.append(error_gmm)

mean_error_kmeans = np.mean(errors_kmeans)
std_dev_kmeans = np.std(errors_kmeans)
std_error_kmeans = std_dev_kmeans / np.sqrt(len(errors_kmeans))

mean_error_gmm = np.mean(errors_gmm)
std_dev_gmm = np.std(errors_gmm)
std_error_gmm = std_dev_gmm / np.sqrt(len(errors_gmm))

print(
    f"K-means CV error: {mean_error_kmeans:.3f} +/- {std_error_kmeans:.3f} (standard error)"
)
print(f"GMM CV error: {mean_error_gmm:.3f} +/- {std_error_gmm:.3f} (standard error)")

  labels_gmm[mask] = mode(y_test[mask])[0]
  labels_gmm[mask] = mode(y_test[mask])[0]
  labels_gmm[mask] = mode(y_test[mask])[0]
  labels_gmm[mask] = mode(y_test[mask])[0]


K-means CV error: 0.034 +/- 0.012 (standard error)
GMM CV error: 0.090 +/- 0.041 (standard error)
