In [1]:
from genetic_utils import *

In [8]:
data, lables = csv_data_loader("fashion-TP")

In [9]:
data.shape

(7100, 784)

In [10]:
import numpy as np
import matplotlib.pyplot as plt

def k_means_clustering(data, k, max_iterations=100, tolerance=1e-4):
    # Initialize centroids randomly
    centroids = data[np.random.choice(len(data), k, replace=False)]
    
    for _ in range(max_iterations):
        # Assign each data point to the nearest centroid
        distances = np.array([np.sum((data - centroid) ** 2, axis=1) for centroid in centroids])
        labels = np.argmin(distances, axis=0)
        # Update centroids
        new_centroids = np.array([data[labels == i].mean(axis=0) for i in range(k)])
        
        # Check for convergence
        if np.linalg.norm(new_centroids - centroids) < tolerance:
            break
        
        centroids = new_centroids
    
    return labels, centroids




In [11]:
# Example usage:
if __name__ == "__main__":
    # Generate random data for testing
    np.random.seed(42)
#     data = np.random.rand(7003, 784)
    
    # Set the number of clusters (k)
    k = 2
    
    # Perform K-means clustering
    labels, centroids = k_means_clustering(data, k)
    
    # Print the results
    print("Cluster labels:", labels)
    print("Centroids shape:", centroids.shape)


Cluster labels: [1 0 1 ... 0 1 0]
Centroids shape: (2, 784)


In [12]:
from sklearn.metrics import confusion_matrix
import numpy as np
# sklearn.metrics.confusion_matrix(lables, 1-classes, labels=None, sample_weight=None, normalize=None)
true_labels = 1 - lables
predicted_labels = labels

cm = confusion_matrix(true_labels, predicted_labels)

# Normalize the confusion matrix to get percentages
cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Confusion Matrix (Percentage):")
print(cm_percentage)

Confusion Matrix (Percentage):
[[0.39885714 0.60114286]
 [0.45       0.55      ]]


In [13]:
from sklearn.metrics import precision_score, recall_score, f1_score

true_labels = 1 - lables
predicted_labels = labels

precision = precision_score(true_labels, predicted_labels)

# Calculate recall
recall = recall_score(true_labels, predicted_labels)

# Calculate F1 score
f1 = f1_score(true_labels, predicted_labels)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Precision: 0.012901712409101571
Recall: 0.55
F1 Score: 0.025212010084804035
