In [57]:
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering, Birch
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score

In [28]:
data = pd.read_csv('Crop_recommendation.csv')

In [69]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [43]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [44]:
#K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
data['Cluster_KMeans'] = kmeans.fit_predict(scaled_features)

  super()._check_params_vs_input(X, default_n_init=10)


In [45]:
#DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=3)
data['Cluster_DBSCAN'] = dbscan.fit_predict(scaled_features)

In [46]:
#Hierarchical clustering
hierarchical = AgglomerativeClustering(n_clusters=3)
data['Cluster_Hierarchical'] = hierarchical.fit_predict(scaled_features)

In [59]:
#Birch clustering
birch = Birch()
data['Cluster_Birch'] = birch.fit_predict(scaled_features)

In [60]:
# Evaluate the clustering using Silhouette score
silhouette_kmeans = silhouette_score(scaled_features, data['Cluster_KMeans'])
silhouette_dbscan = silhouette_score(scaled_features, data['Cluster_DBSCAN'])
silhouette_hierarchical = silhouette_score(scaled_features, data['Cluster_Hierarchical'])
silhouette_birch = silhouette_score(scaled_features, data['Cluster_Birch'])

In [61]:
# Evaluate the clustering using Calinski and Harabasz score
calinski_kmeans = calinski_harabasz_score(scaled_features, data['Cluster_KMeans'])
calinski_dbscan = calinski_harabasz_score(scaled_features, data['Cluster_DBSCAN'])
calinski_hierarchical = calinski_harabasz_score(scaled_features, data['Cluster_Hierarchical'])
calinski_birch = calinski_harabasz_score(scaled_features, data['Cluster_Birch'])

In [63]:
# Evaluate the clustering using Davies Bouldin Index
davies_kmeans = davies_bouldin_score(scaled_features, data['Cluster_KMeans'])
davies_dbscan = davies_bouldin_score(scaled_features, data['Cluster_DBSCAN'])
davies_hierarchical = davies_bouldin_score(scaled_features, data['Cluster_Hierarchical'])
davies_birch = davies_bouldin_score(scaled_features, data['Cluster_Birch'])

In [66]:
#Outputs
print(f"Clustering methods used: K-Means, DBSCAN, Hierarchical, Birch")
print(f"Evaluation methods used: Silhouette score, Callinski score, Davies Bouldin Indices")

print(" ")
print(f"Evaluated using Silhouette Score")
print(f"K-Means     : {silhouette_kmeans}")
print(f"DBSCAN      : {silhouette_dbscan}")
print(f"Hierarchical: {silhouette_hierarchical}")
print(f"Birch       : {silhouette_birch}")

print(" ")
print(f"Evaluated using Calinski Harabasz Score")
print(f"K-Means     : {calinski_kmeans}")
print(f"DBSCAN      : {calinski_dbscan}")
print(f"Hierarchical: {calinski_hierarchical}")
print(f"Birch       : {calinski_birch}")

print(" ")
print(f"Evaluated using Davies Bouldin Indices")
print(f"K-Means     : {davies_kmeans}")
print(f"DBSCAN      : {davies_dbscan}")
print(f"Hierarchical: {davies_hierarchical}")
print(f"Birch       : {davies_birch}")


Evaluated using Silhouette Score
K-Means     : 0.5055757478653409
DBSCAN      : 0.6615760814079631
Hierarchical: 0.5037047648171559
Birch       : 0.5025765207051098
 
Evaluated using Calinski Harabasz Score
K-Means     : 3701.4661587231917
DBSCAN      : 2342.8181842732743
Hierarchical: 3680.352374459917
Birch       : 3643.3217023868933
 
Evaluated using Davies Bouldin Indices
K-Means     : 0.6638236421863241
DBSCAN      : 0.3633671426345738
Hierarchical: 0.6659892913950876
Birch       : 0.6614646108627372
