In [None]:
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn.metrics import silhouette_score

class CLARANS:
    def __init__(self, n_clusters=3, max_neighbors=5, num_local=5, random_state=None):
        self.n_clusters = n_clusters
        self.max_neighbors = max_neighbors
        self.num_local = num_local
        self.random_state = random_state
        self.medoids = None
        self.labels_ = None

    def fit(self, X):
        if self.random_state:
            np.random.seed(self.random_state)
        best_medoids = None
        best_cost = np.inf
        for _ in range(self.num_local):
            # Randomly initialize medoids
            medoids = np.random.choice(len(X), self.n_clusters, replace=False)
            current_cost = self._compute_cost(X, medoids)
            neighbors_examined = 0

            while neighbors_examined < self.max_neighbors:
                i = np.random.choice(self.n_clusters)
                j = np.random.choice(len(X))
                if j in medoids:
                    continue
                new_medoids = medoids.copy()
                new_medoids[i] = j
                new_cost = self._compute_cost(X, new_medoids)
                if new_cost < current_cost:
                    medoids = new_medoids
                    current_cost = new_cost
                    neighbors_examined = 0
                else:
                    neighbors_examined += 1

            if current_cost < best_cost:
                best_cost = current_cost
                best_medoids = medoids

        self.medoids = best_medoids
        self.labels_ = self._assign_labels(X, self.medoids)
        return self

    def _compute_cost(self, X, medoids):
        distances = pairwise_distances(X, X[medoids])
        min_distances = np.min(distances, axis=1)
        return np.sum(min_distances)

    def _assign_labels(self, X, medoids):
        distances = pairwise_distances(X, X[medoids])
        return np.argmin(distances, axis=1)



In [None]:
import pandas as pd 

df_engineered = pd.read_csv("/content/content/fire_data_engineered.csv")

non_numeric_cols = df_engineered.select_dtypes(
    include=['object', 'category']
).columns

print("Non-numeric columns still present:")
print(non_numeric_cols)

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler

exclude_cols = ['class']
X_df = df_engineered.drop(columns=exclude_cols, errors='ignore')
X_df = X_df.select_dtypes(include=[np.number])

X_df = X_df.replace([np.inf, -np.inf], np.nan)

X_df = X_df.dropna()

X = X_df.to_numpy()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled = X_scaled.astype(np.float32)


In [None]:
import time 
clarans = CLARANS(n_clusters=3, max_neighbors=10, num_local=5, random_state=42)

start = time.time()
clarans.fit(X_scaled)
t_clarans = time.time() - start

t_clarans

In [None]:
from sklearn_extra.cluster import CLARA
import numpy as np
import time 

clara = CLARA(
    n_clusters=3,
    random_state=42
)

start = time.time()
clara.fit(X_scaled)
t_clarans = time.time() - start

t_clarans

In [None]:
from sklearn.metrics import silhouette_score, davies_bouldin_score

def evaluate_clustering(X, labels, name="Model"):
    print(f"{name}")
    print("Silhouette Score :", silhouette_score(X, labels))
    print("Davies-Bouldin   :", davies_bouldin_score(X, labels))
    print("-" * 30)


In [None]:
evaluate_clustering(X_scaled, clarans.labels_, "CLARANS (From Scratch)")
evaluate_clustering(X_scaled, clara.labels_, "CLARA (sklearn-extra)")