In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

# Fix memory leak issue on Windows
os.environ["OMP_NUM_THREADS"] = "1"

# Step 1: Define the dataset with corrected value
data = np.array([
    [1.0, 1.0],
    [1.5, 2.0],
    [3.0, 4.0],
    [5.0, 7.0],  
    [3.5, 5.0],
    [4.5, 5.0],
    [3.5, 4.5]
])

# Step 2: Initialize centroids manually
centroids = np.array([[1.0, 1.0], [5.0, 7.0]])

# Step 3: Compute distances and assign clusters
distances = cdist(data, centroids, 'euclidean')
labels = np.argmin(distances, axis=1)

# Step 4: Recompute centroids
new_centroids = np.array([data[labels == i].mean(axis=0) for i in range(2)])

# Step 5: Print results
print("Cluster Assignments:", labels)
print("New Centroids:", new_centroids)

# Step 6: Plot the results
plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis', marker='o', edgecolors='k')
plt.scatter(new_centroids[:, 0], new_centroids[:, 1], c='red', marker='x', s=200, label='Centroids')
plt.xlabel('Variable 1')
plt.ylabel('Variable 2')
plt.title('Manual K-Means Clustering - 1st Iteration')
plt.legend()
plt.show()