In [None]:
#KMeans function

import numpy as np
class KMeans:
    def __init__(self, n_clusters=3, max_iter=300, tol=1e-4, random_seed = None):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.tol = tol # maximum change in centroid coordinates that are not counted as changed
        self.centroids = None
        self.labels = None
        self.random_seed = random_seed


    def fit(self, X):
        # Randomly initialize the centroids by selecting n_clusters points from the dataset
        np.random.seed(self.random_seed)  # For reproducibility
        random_indices = np.random.choice(X.shape[0], self.n_clusters, replace=False)
        self.centroids = X[random_indices]

        X = np.array(X)
        self.centroids = np.array(self.centroids)

        # loop as many times as self.max_iter
        for _ in range(self.max_iter):

          self.labels=[]

          # for x in X:
          #     assigned_cluster = None
          #     smallest_distance = float('inf')
          #     for idx, C in enumerate(centroids):
          #         D = np.sqrt(np.sum((C - x) ** 2))
          #         if D < smallest_distance:
          #             smallest_distance = D
          #             assigned_cluster = idx
          #     y.append(assigned_cluster)

          # Found this on the internet to have better computability. Computes distances between each point in X and each centroid using broadcasting
          distances = np.sqrt(np.sum((X[:, np.newaxis, :] - self.centroids) ** 2, axis=2))
          self.labels = np.argmin(distances, axis=1).tolist() # Assign each point to the nearest centroid

          #creates a lists of all Xs within each cluster
          cluster_lists = [[] for _ in range(self.n_clusters)]
          for index, Y in enumerate(self.labels):
            cluster_lists[Y].append(X[index])


          #compute the new centroid based on the mean of each cluster_lists.
          new_centroids = []
          for lst in cluster_lists:
            mean_point = np.mean(lst, axis=0)  # Compute mean along axis 0 (features)
            new_centroids.append(mean_point)

          new_centroids = np.array(new_centroids)
          if np.all(np.abs(new_centroids - self.centroids) < self.tol):
            break
          else:
            self.centroids = new_centroids

            def predict(self, X):
                # return array of cluster labels for each row of X (cluster labels should be integer from 0 to k-1)
                return self.labels




#DBSCAN function
class DBSCAN:
    def __init__(self, eps=0.5, min_samples=5):
        self.eps = eps
        self.min_samples = min_samples
        self.labels = None

    def fit(self, X):
      noise = -1
      self.labels = np.full_like(X[:, 0], noise)
      C = 0

      for i in range(X.shape[0]):         #loops through each row of X
        neighbor_count = 0                  #establish the count of the neighbors and the neighbors themselves.
        neighbors = []
        if self.labels[i] == -1:             #if untouched or seen as an outlier

          distances = np.sqrt(np.sum((X[i][:, np.newaxis] - X.T) ** 2, axis=0))  #create an array of that point to all its distances.
          for id, d in enumerate(distances):#loop through the distances
            if d <= self.eps:                    #if the distance is less than or equal to epsilon
              neighbor_count += 1            #add a count to neighbors
              neighbors.append(id)        #add the index of the d (and thereby the index of the X) to the list of neighbors.


          if neighbor_count >= self.min_samples: #minus one neighbor count to account for the overlap in distances.
            self.labels[i] = C           #label this point as a core point

            for n in neighbors:               #itterate through those neighbors. neighbors is a list of indexs of X, not values.
              neighbor_count = 0              #reistablish the count of neighbors to zero, but dont reistablish the neighbors themselves because we want this to grow through the loop
              if self.labels[n] == -1:              #only go through points without labels
                self.labels[n] = C                   #label this point as belonging to that cluster.

                #same distance method as above, just changed i to X[n], and the index to n also, because n is the index of the X array
                distances = np.sqrt(np.sum((X[n][:, np.newaxis] - X.T) ** 2, axis=0))  #create an array of that point to all its distances.
                for id2, d2 in enumerate(distances):#loop through the distances
                  if id2 == n:                     #if the index of the distace is the same as the index of the X we are looking at, pass, because that is reffering to itself.
                    continue
                  else:
                    if d2 <= self.eps:                    #if the distance is less than or equal to epsilon
                      neighbor_count +=1             #add a count to neighbor_count
                      if neighbor_count >= self.min_samples: #if this is a core point
                        neighbors.append(id2)          #add the index of the distance (and thereby the index of the X) to the list of neighbors.
          C += 1

    def predict(self, X):
      return self.labels # return array of cluster labels for each row of X (cluster labels should be integer from 0 to k-1)
