In [28]:
import cv2
import os
import joblib
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans

In [2]:
def get_relative_file_paths(folder_path):
    
    """
    Gets a list of relative paths to all files within a given folder.

    Args:
        folder_path (str): The path to the folder.

    Returns:
        list: A list of relative file paths.
    """

    file_paths = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            file_paths.append(file_path)
    return file_paths


In [25]:
# Load images and extract features (e.g., color histograms)
image_paths = sorted(get_relative_file_paths("./output/cropped_cells"))
images = [cv2.imread(image_path) for image_path in image_paths]
features = [cv2.calcHist(img, [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten() for img in images]

# img = cv2.imread(image_paths[0])#, cv2.IMREAD_GRAYSCALE)
# plt.subplot(1, 2, 1)
# plt.imshow(img)#, cmap="gray")


# hist = cv2.calcHist([img],[0], None, [256], [0,256])
# hist = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()
# plt.subplot(1, 2, 2)
# plt.plot(hist)


In [60]:
# Create a 2D array of features
features_array = np.array(features)

# Assuming you have 3 clusters and want to assign labels 'class1', 'class2', and 'class3'
class_mapping = {0: 'noise', 1: 'cell'}

# Apply K-means clustering
kmeans = KMeans(n_clusters=2)
kmeans.fit(features_array)
labels = kmeans.labels_
joblib.dump(kmeans, 'models/kmeans_model.pkl')

['models/kmeans_model.pkl']

In [64]:
loaded_kmeans = joblib.load('models/kmeans_model_cell1.pkl')
img_predict = cv2.imread(image_paths[1])
hist_predict = cv2.calcHist([img_predict], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]).flatten()
hist_predict = np.array(hist_predict).reshape(1, -1)
labels = loaded_kmeans.predict(hist_predict)
print(image_paths[1])
labels[0]


./output/cropped_cells/331_1.png


1

In [None]:
# Assign class labels based on cluster assignments
class_labels = []
for label in labels:
    # Assign a class label based on cluster index (e.g., using a predefined mapping)
    class_labels.append(class_mapping[label])

for idx, image in enumerate(image_paths):
    print(f"{image} is {class_labels[idx]}")