In [1]:
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler


In [2]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            img = cv2.resize(img, (64, 64))  # Resize to reduce computation
            img = img.flatten()  # Flatten the image
            images.append(img)
            if 'cat' in folder:
                labels.append(0)  # Label for cat
            elif 'dog' in folder:
                labels.append(1)  # Label for dog
    return images, labels

In [3]:
# Load training data
train_cat_folder = 'D:/UIT/nam3/hk2/khaithacdulieu/project/final_project/dataset/train/cat'
train_dog_folder = 'D:/UIT/nam3/hk2/khaithacdulieu/project/final_project/dataset/train/dog'
train_images_cat, train_labels_cat = load_images_from_folder(train_cat_folder)
train_images_dog, train_labels_dog = load_images_from_folder(train_dog_folder)

In [7]:
train_images = train_images_cat + train_images_dog
train_labels = train_labels_cat + train_labels_dog

In [8]:
# Convert to numpy arrays
train_images = np.array(train_images)
train_labels = np.array(train_labels)

In [9]:
# Normalize the data
scaler = StandardScaler()
train_images = scaler.fit_transform(train_images)

In [10]:
# Apply K-means clustering
kmeans = KMeans(n_clusters=2, random_state=0)
kmeans.fit(train_images)
train_clusters = kmeans.labels_

In [11]:
# Load test data
test_cat_folder = 'D:/UIT/nam3/hk2/khaithacdulieu/project/final_project/dataset/test/cat'
test_dog_folder = 'D:/UIT/nam3/hk2/khaithacdulieu/project/final_project/dataset/test/dog'
test_images_cat, test_labels_cat = load_images_from_folder(test_cat_folder)
test_images_dog, test_labels_dog = load_images_from_folder(test_dog_folder)

In [12]:
test_images = test_images_cat + test_images_dog
test_labels = test_labels_cat + test_labels_dog

In [13]:
# Convert to numpy arrays
test_images = np.array(test_images)
test_labels = np.array(test_labels)

In [14]:
# Normalize the data
test_images = scaler.transform(test_images)

In [15]:

# Predict clusters for test data
test_clusters = kmeans.predict(test_images)

In [16]:
# Evaluate the result
# Adjust the predicted labels to match the original labels
# Assuming cluster 0 is for cats and cluster 1 is for dogs
adjusted_test_clusters = np.copy(test_clusters)
if np.sum(adjusted_test_clusters == test_labels) < len(test_labels) / 2:
    adjusted_test_clusters = 1 - adjusted_test_clusters

print("Confusion Matrix:")
print(confusion_matrix(test_labels, adjusted_test_clusters))
print("Accuracy Score:", accuracy_score(test_labels, adjusted_test_clusters))

Confusion Matrix:
[[1099  901]
 [1025  975]]
Accuracy Score: 0.5185
