In [None]:


# importing the libraries

import sys
import pandas as pd
from sklearn.cluster import KMeans
from sklearn import metrics

def find_best_kmeans(data, min_k, max_k):


    # Drop non-numeric columns and rows with missing values
    data_numeric = data.select_dtypes(include=['float64', 'int64']).dropna()

    best_k = min_k
    best_score = -1
    # Loop over range of K values
    for k in range(min_k, max_k + 1):
        # Initialize the K-Means model
        kmeans = KMeans(n_clusters=k, random_state=42, n_init='auto')

        # Fit the model and predict cluster labels
        labels = kmeans.fit_predict(data_numeric)

        # Use the data and calculate Silhouette Coefficient
        score = metrics.silhouette_score(data_numeric, labels)

        # Track the best score and corresponding k
        if score > best_score:
            best_score = score
            best_k = k

    # Return the best K with respect to Silhouette Coefficient
    return best_k




if __name__ == '__main__':



    if len(sys.argv) == 2:
        print("Usage: python assignment.py <number> <number>")
        sys.exit(1)

    input_data_one = sys.argv[1].strip()
    input_data_two = sys.argv[2].strip()


    if input_data_one.isdigit() and input_data_two.isdigit():

        min_k = int(input_data_one)
        max_k = int(input_data_two)

        # Debug: print the values of min_k and max_k
        print(f"min_k: {min_k}, max_k: {max_k}")

        if min_k >= 2 and max_k > min_k:
            data = pd.read_csv("./housing.csv")
            print(find_best_kmeans(data, min_k, max_k))
        else:
            print("Invalid input")
    else:
        print("Invalid input")


