In [None]:
!pip install scikit-learn-extra
!pip install kneed
!pip install opencv-python
!pip install --upgrade joblib

import os
os.environ["LOKY_MAX_CPU_COUNT"] = "8"
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, davies_bouldin_score
from sklearn_extra.cluster import KMedoids
from kneed import KneeLocator
import cv2
import pandas as pd

data = pd.read_csv("train_and_test2.csv")
data.dropna(inplace=True)
X = data.select_dtypes(include=[np.number])

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

inertia = []
silhouette = []
k_range = range(2, 11)

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)
    silhouette.append(silhouette_score(X_scaled, kmeans.labels_))


knee = KneeLocator(k_range, inertia, curve="convex", direction="decreasing")

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(k_range, inertia, marker='o')
plt.axvline(knee.knee, color='r', linestyle='--')
plt.title("Метод ліктя (KMeans)")

# Силует
plt.subplot(1, 2, 2)
plt.plot(k_range, silhouette, marker='o', color='orange')
plt.title("Середній коефіцієнт силуету")
plt.tight_layout()
plt.show()

optimal_k = knee.knee or 3
print(f"Оптимальна кількість кластерів (k): {optimal_k}")

kmeans = KMeans(n_clusters=optimal_k, random_state=42)
labels_kmeans = kmeans.fit_predict(X_scaled)

kmedoids = KMedoids(n_clusters=optimal_k, random_state=42)
labels_kmedoids = kmedoids.fit_predict(X_scaled)

from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=labels_kmeans, palette="Set1")
plt.title("KMeans Clustering")

plt.subplot(1, 2, 2)
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=labels_kmedoids, palette="Set2")
plt.title("KMedoids Clustering")
plt.tight_layout()
plt.show()

print("Оцінка кластеризації (KMeans):")
print(f"- Silhouette Score: {silhouette_score(X_scaled, labels_kmeans):.4f}")
print(f"- Davies-Bouldin Index: {davies_bouldin_score(X_scaled, labels_kmeans):.4f}")

print("Оцінка кластеризації (KMedoids):")
print(f"- Silhouette Score: {silhouette_score(X_scaled, labels_kmedoids):.4f}")
print(f"- Davies-Bouldin Index: {davies_bouldin_score(X_scaled, labels_kmedoids):.4f}")

image = cv2.imread("tutanik.jpg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img_small = cv2.resize(image, (100, 100)) 
pixel_values = img_small.reshape((-1, 3)).astype(float)

kmeans_img = KMeans(n_clusters=5, random_state=42).fit(pixel_values)
segmented_img = kmeans_img.cluster_centers_[kmeans_img.labels_].reshape(img_small.shape).astype(np.uint8)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(img_small)
plt.title("Оригінальне зображення")

plt.subplot(1, 2, 2)
plt.imshow(segmented_img)
plt.title("Сегментоване (KMeans)")
plt.tight_layout()
plt.show()
