# K-Means Clustering on CSV Data
This notebook demonstrates K-means clustering on data loaded from a CSV file, visualizes the clusters, and allows user input to classify new items.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Load Dataset
We load the dataset from a CSV file.

In [None]:
input_file = 'synthetic_dataset.csv'
data = pd.read_csv(input_file)

# Extract features
X = data[['Weight', 'Volume']].values

## Initialize K-Means Parameters
Set the number of clusters, maximum iterations, and tolerance for convergence.

In [None]:
k = 2
max_iterations = 100
tolerance = 1e-4

# Randomly initialize centroids
centroids = X[np.random.choice(X.shape[0], k, replace=False)]

## Define K-Means Functions
Helper functions to calculate distances and perform the K-means clustering algorithm.

In [None]:
def calculate_distances(points, centroids):
    distances = np.zeros((points.shape[0], centroids.shape[0]))
    for i, centroid in enumerate(centroids):
        distances[:, i] = np.linalg.norm(points - centroid, axis=1)
    return distances

def k_means_clustering(data, centroids, max_iterations, tolerance):
    for iteration in range(max_iterations):
        # Calculate distances and assign clusters
        distances = calculate_distances(data, centroids)
        cluster_assignments = np.argmin(distances, axis=1)

        # Update centroids
        new_centroids = np.array([data[cluster_assignments == i].mean(axis=0) for i in range(k)])

        # Check for convergence
        if np.linalg.norm(new_centroids - centroids) < tolerance:
            print(f"Converged after {iteration + 1} iterations")
            break

        centroids = new_centroids

    return centroids, cluster_assignments

## Run K-Means Clustering
Perform the clustering on the dataset.

In [None]:
final_centroids, cluster_assignments = k_means_clustering(X, centroids, max_iterations, tolerance)

## Visualize Clusters
Plot the clusters and centroids.

In [None]:
plt.figure(figsize=(8, 6))
for i in range(k):
    cluster_data = X[cluster_assignments == i]
    plt.scatter(cluster_data[:, 0], cluster_data[:, 1], label=f'Cluster {i + 1}')
plt.scatter(final_centroids[:, 0], final_centroids[:, 1], color='black', marker='x', s=100, label='Centroids')
plt.title('K-Means Clustering on Weight vs Volume Data')
plt.xlabel('Weight')
plt.ylabel('Volume')
plt.legend()
plt.grid()
plt.show()

## Classify a New Item
Prompt the user to input weight and volume, and classify the item as defective or non-defective.

In [None]:
print("Enter the weight and volume of the new item to classify:")
new_weight = float(input("Weight: "))
new_volume = float(input("Volume: "))
new_item = np.array([[new_weight, new_volume]])

# Predict the cluster for the new item
new_item_distances = calculate_distances(new_item, final_centroids)
new_item_cluster = np.argmin(new_item_distances)

# Output the classification result
if new_item_cluster == 0:
    print("The new item is classified as: Defective")
else:
    print("The new item is classified as: Non-defective")