# K-Means Clustering: Fill-in Version
This notebook guides you through implementing K-means clustering step-by-step. Follow the prompts and hints to complete the missing parts.

## Step 1: Import Libraries
We need libraries for numerical operations, data manipulation, and plotting.

In [None]:
# TODO: Import the required libraries
# HINT: Use numpy for numerical operations, pandas for data handling, and matplotlib for visualization.
import ____ as np
import ____ as pd
import ____ as plt

## Step 2: Load Dataset
Load the dataset from a CSV file and extract the features for clustering.

In [None]:
# TODO: Load the dataset and extract features
# HINT: Use pandas to read the file and select relevant columns.
input_file = 'synthetic_dataset.csv'
data = pd.____(input_file)

# Extract features
X = data[['____', '____']].values

## Step 3: Initialize K-Means Parameters
Define the number of clusters, maximum iterations, and tolerance for convergence.

In [None]:
# TODO: Set K-Means parameters
# HINT: Set k to the number of clusters (e.g., 2), and define reasonable values for max_iterations and tolerance.
k = ____
max_iterations = ____
tolerance = ____

# Randomly initialize centroids
centroids = X[np.random.choice(X.shape[0], k, replace=False)]

## Step 4: Define K-Means Functions
Write helper functions to calculate distances and implement the K-means clustering algorithm.

In [None]:
# TODO: Define a function to calculate distances
# HINT: Calculate the Euclidean distance between points and centroids.
def calculate_distances(points, centroids):
    distances = np.zeros((points.shape[0], centroids.shape[0]))
    for i, centroid in enumerate(centroids):
        distances[:, i] = np.linalg.____(points - centroid, axis=1)
    return distances

# TODO: Define the K-means clustering function
# HINT: Use the calculate_distances function and iteratively update centroids.
def k_means_clustering(data, centroids, max_iterations, tolerance):
    for iteration in range(max_iterations):
        # Calculate distances and assign clusters
        distances = calculate_distances(data, centroids)
        cluster_assignments = np.argmin(distances, axis=1)

        # Update centroids
        new_centroids = np.array([data[cluster_assignments == i].____(axis=0) for i in range(k)])

        # Check for convergence
        if np.linalg.norm(new_centroids - centroids) < tolerance:
            print(f"Converged after {iteration + 1} iterations")
            break

        centroids = new_centroids

    return centroids, cluster_assignments

## Step 5: Run K-Means Clustering
Use the functions to perform clustering on the dataset.

In [None]:
# TODO: Run the K-means algorithm
# HINT: Call k_means_clustering with appropriate arguments.
final_centroids, cluster_assignments = k_means_clustering(X, centroids, ____, ____)

## Step 6: Visualize Clusters
Plot the clusters and centroids.

In [None]:
# TODO: Visualize the clusters
# HINT: Use matplotlib to scatter plot the data and centroids.
plt.figure(figsize=(8, 6))
for i in range(k):
    cluster_data = X[cluster_assignments == i]
    plt.scatter(cluster_data[:, 0], cluster_data[:, 1], label=f'Cluster {i + 1}')
plt.scatter(final_centroids[:, 0], final_centroids[:, 1], color='black', marker='x', s=100, label='Centroids')
plt.title('K-Means Clustering')
plt.xlabel('Weight')
plt.ylabel('Volume')
plt.legend()
plt.grid()
plt.show()

## Step 7: Classify a New Item
Prompt the user to input weight and volume, and classify the item as defective or non-defective.

In [None]:
# TODO: Prompt the user for a new item's features and classify it
# HINT: Use calculate_distances to find the nearest cluster.
print("Enter the weight and volume of the new item to classify:")
new_weight = float(input("Weight: "))
new_volume = float(input("Volume: "))
new_item = np.array([[new_weight, new_volume]])

# Predict the cluster for the new item
new_item_distances = calculate_distances(new_item, final_centroids)
new_item_cluster = np.argmin(new_item_distances)

# Output the classification result
if new_item_cluster == 0:
    print("The new item is classified as: Defective")
else:
    print("The new item is classified as: Non-defective")