In [None]:
!pip uninstall -y numpy
!pip install numpy==1.23.5  # Use a stable version that works with PyCaret
!pip install --upgrade pycaret

In [None]:
from pycaret.clustering import *
import pandas as pd

myDataSet = pd.read_csv("/content/forestfires.csv")

results = pd.DataFrame(columns=["Preprocessing", "Number of Clusters", "Silhouette", "Calinski-Harabasz", "Davies-Bouldin"])

num_clusters = [3, 4, 5]

def get_parameters(num_clusters, preprocessing):
    for num in num_clusters:
        model = create_model('kmeans', num_clusters=num)
        prediction = assign_model(model)
        performance = pull()

        results.loc[len(results)] = [
            preprocessing,
            num,
            performance['Silhouette'][0],
            performance['Calinski-Harabasz'][0],
            performance['Davies-Bouldin'][0]
        ]

def run_all():
    print("For no data processing")
    setup(data=myDataSet, verbose=False)
    get_parameters(num_clusters, "No Data Processing")

    print("For using normalization")
    setup(data=myDataSet, normalize=True, normalize_method='zscore', verbose=False)
    get_parameters(num_clusters, "Normalization")

    print("For using transformation")
    setup(data=myDataSet, transformation=True, transformation_method='yeo-johnson', verbose=False)
    get_parameters(num_clusters, "Transformation")

    print("For using PCA")
    setup(data=myDataSet, pca=True, pca_method='linear', verbose=False)
    get_parameters(num_clusters, "PCA")

    print("For using N+T")
    setup(data=myDataSet, normalize=True, normalize_method='zscore', transformation=True,
          transformation_method='yeo-johnson', verbose=False)
    get_parameters(num_clusters, "N+T")

    print("For using N+T+PCA")
    setup(data=myDataSet, normalize=True, normalize_method='zscore', transformation=True,
          transformation_method='yeo-johnson', pca=True, pca_method='linear', verbose=False)
    get_parameters(num_clusters, "N+T+PCA")

run_all()

print("\nClustering Performance Metrics:")
print(results)


In [None]:
from pycaret.clustering import *
import pandas as pd

myDataSet = pd.read_csv("/content/forestfires.csv")

results = pd.DataFrame(columns=["Preprocessing", "Number of Clusters", "Silhouette", "Calinski-Harabasz", "Davies-Bouldin"])

num_clusters = [3, 4, 5]

def get_parameters(num_clusters, preprocessing):
    for num in num_clusters:
        model = create_model('hclust', num_clusters=num)
        prediction = assign_model(model)
        performance = pull()

        results.loc[len(results)] = [
            preprocessing,
            num,
            performance['Silhouette'][0],
            performance['Calinski-Harabasz'][0],
            performance['Davies-Bouldin'][0]
        ]

print("For no data processing")
setup(data=myDataSet, verbose=False)
get_parameters(num_clusters, "No Data Processing")

print("For using normalization")
setup(data=myDataSet, normalize=True, normalize_method='zscore', verbose=False)
get_parameters(num_clusters, "Normalization")

print("For using transformation")
setup(data=myDataSet, transformation=True, transformation_method='yeo-johnson', verbose=False)
get_parameters(num_clusters, "Transformation")

print("For using PCA")
setup(data=myDataSet, pca=True, pca_method='linear', verbose=False)
get_parameters(num_clusters, "PCA")

print("For using N+T")
setup(data=myDataSet, normalize=True, normalize_method='zscore', transformation=True,
      transformation_method='yeo-johnson', verbose=False)
get_parameters(num_clusters, "N+T")

print("For using N+T+PCA")
setup(data=myDataSet, normalize=True, normalize_method='zscore', transformation=True,
      transformation_method='yeo-johnson', pca=True, pca_method='linear', verbose=False)
get_parameters(num_clusters, "N+T+PCA")

print("\nClustering Performance Metrics:")
print(results)



In [None]:
from pycaret.clustering import *
import pandas as pd

myDataSet = pd.read_csv("/content/forestfires.csv")

results = pd.DataFrame(columns=["Preprocessing", "Number of Clusters", "Silhouette", "Calinski-Harabasz", "Davies-Bouldin"])

num_clusters = [3, 4, 5]

def get_parameters(num_clusters, preprocessing):
    for num in num_clusters:
        model = create_model('meanshift', num_clusters=num)
        prediction = assign_model(model)
        performance = pull()

        results.loc[len(results)] = [
            preprocessing,
            num,
            performance['Silhouette'][0],
            performance['Calinski-Harabasz'][0],
            performance['Davies-Bouldin'][0]
        ]

print("For no data processing")
kMeanClusteringParameters = setup(data=myDataSet, verbose=False)
get_parameters(num_clusters, "No Data Processing")

print("For using normalization")
normparam = setup(data=myDataSet, normalize=True, normalize_method='zscore', verbose=False)
get_parameters(num_clusters, "Normalization")

print("For using transformation")
transformparam = setup(data=myDataSet, transformation=True, transformation_method='yeo-johnson', verbose=False)
get_parameters(num_clusters, "Transformation")

print("For using PCA")
pcaparam = setup(data=myDataSet, pca=True, pca_method='linear', verbose=False)
get_parameters(num_clusters, "PCA")

print("For using N+T")
ntparam = setup(data=myDataSet, transformation=True, normalize=True, normalize_method='zscore',
                transformation_method='yeo-johnson', verbose=False)
get_parameters(num_clusters, "N+T")

print("For using N+T+PCA")
ntpcaparam = setup(data=myDataSet, transformation=True, normalize=True, pca=True,
                   normalize_method='zscore', transformation_method='yeo-johnson',
                   pca_method='linear', verbose=False)
get_parameters(num_clusters, "N+T+PCA")

print("\nClustering Performance Metrics:")
print(results)


In [None]:

t_kmeans=setup(data=myDataSet,transformation = True, transformation_method = 'yeo-johnson', verbose=False)
KMeanModel = create_model('kmeans', num_clusters=4)
evaluate_model(KMeanModel)
plot_model(KMeanModel, plot='cluster')

In [None]:
hier=setup(data=myDataSet,transformation = True,pca = True, pca_method = 'linear', verbose=False)
hclustModel = create_model('hclust', num_clusters=4)
evaluate_model(hclustModel)
plot_model(hclustModel, plot='cluster')

In [None]:
meanshift = setup(data=myDataSet, verbose=False)
meanshiftModel = create_model('meanshift', num_clusters=3)
evaluate_model(meanshiftModel)
plot_model(meanshiftModel, plot='cluster')