In [2]:
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering, MeanShift
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, calinski_harabasz_score

In [3]:
# Load the data
data = pd.read_csv('https://raw.githubusercontent.com/FelAmore/Data-Science/main/tsunami.csv')

In [4]:
# Choose columns to clusters
features = data[['Longitude', 'Latitude', 'Magnitude']]

In [5]:
# Standardize the data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [6]:
# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
data['Cluster_KMeans'] = kmeans.fit_predict(scaled_features)



In [7]:
# Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=3)
data['Cluster_DBSCAN'] = dbscan.fit_predict(scaled_features)

In [15]:
# Apply hierarchical clustering
hierarchical = AgglomerativeClustering(n_clusters=3)
data['Cluster_Hierarchical'] = hierarchical.fit_predict(scaled_features)

In [9]:
# Apply Mean Shift clustering
meanshift = MeanShift()
data['Cluster_MeanShift'] = meanshift.fit_predict(scaled_features)

In [17]:
# Evaluate the clustering using silhouette score
silhouette_kmeans = silhouette_score(scaled_features, data['Cluster_KMeans'])
silhouette_dbscan = silhouette_score(scaled_features, data['Cluster_DBSCAN'])
silhouette_hierarchical = silhouette_score(scaled_features, data['Cluster_Hierarchical'])
silhouette_meanshift = silhouette_score(scaled_features, data['Cluster_MeanShift'])

In [18]:
# Evaluate the clustering using calinski harabasz score
calinski_kmeans = calinski_harabasz_score(scaled_features, data['Cluster_KMeans'])
calinski_dbscan = calinski_harabasz_score(scaled_features, data['Cluster_DBSCAN'])
calinski_hierarchical = calinski_harabasz_score(scaled_features, data['Cluster_Hierarchical'])
calinski_meanshift = calinski_harabasz_score(scaled_features, data['Cluster_MeanShift'])

In [19]:
# Printing Outputs
print(f"Clustering methods used: K-Means, DBSCAN, Hierarchical, MeanShift")
print(f"Evaluation methods used: Silhouette score, Jaccard score")

print(" ")
print(f"Evaluated using Silhouette Score")
print(f"K-Means     : {silhouette_kmeans}")
print(f"DBSCAN      : {silhouette_dbscan}")
print(f"Hierarchical: {silhouette_hierarchical}")
print(f"Mean Shift  : {silhouette_meanshift}")

print(" ")
print(f"Evaluated using Calinski Harabasz Score")
print(f"K-Means     : {calinski_kmeans}")
print(f"DBSCAN      : {calinski_dbscan}")
print(f"Hierarchical: {calinski_hierarchical}")
print(f"Mean Shift  : {calinski_meanshift}")


Clustering methods used: K-Means, DBSCAN, Hierarchical, MeanShift
Evaluation methods used: Silhouette score, Jaccard score
 
Evaluated using Silhouette Score
K-Means     : 0.5058499355375675
DBSCAN      : 0.5117403319104924
Hierarchical: 0.49352168288924625
Mean Shift  : 0.5892701947157417
 
Evaluated using Calinski Harabasz Score
K-Means     : 24.69038169431235
DBSCAN      : 9.52082519960284
Hierarchical: 23.509005747983036
Mean Shift  : 23.15006142249963
