# Tubes 1
### Menulis kode clustering untuk kmeans, kmedoids, agglomerative, dbscan
___Alvin Sullivan 13515048___

___Albertus Djauhari Djohan 13515054___

___Kevin 13515138___

### Read data
Fungsi untuk membaca data dari file eksternal

In [1]:
def read_data(filename):
    file = open(filename,'r')

    data_file = [[float(val) for val in line.split()] for line in file if len(line.strip()) > 0]

    file.close()
    return data_file

### Matrix data structure
Struktur data untuk menyimpan jarak

In [2]:
import numpy
from enum import IntEnum

class type_matrix(IntEnum):
    
    EUCLIDEAN = 0
    MANHATTAN = 2

class distance_matrix:
    def __init__(self, matrix_type, **kwargs):
        self.__type = matrix_type
        self.__args = kwargs
        self.__func = self.__args.get('func', None)
        self.__calculator = self.__create_calculator_distance()

    def __call__(self, object1, object2):
        return self.__calculator(object1, object2)
    def __create_calculator_distance(self):

        if self.__type == type_matrix.EUCLIDEAN:
                return euclidean_distance
        elif self.__type == type_matrix.MANHATTAN:
                return manhattan_distance_numpy
    def get_distance_type(self):
        return (self.__type)

def euclidean_distance(object1, object2):
    return numpy.sqrt(numpy.sum(numpy.square(object1 - object2), axis=1).T)
def manhattan_distance_numpy(object1, object2):
    return numpy.sum(numpy.absolute(object1 - object2), axis=1).T

### Matrix with linkage data structure
Struktur data untuk menyimpan jarak sesuai dengan tipe linkage

In [3]:
import numpy
import math

class LinkageDistanceMatrix:
    def __init__(self, data, clusters, linkage = "single", affinity = "euclidean"):
        self.data = data
        self.clusters = clusters
        self.linkage = linkage
        self.affinity = affinity
        self.matrix = self.create_distance_matrix()
    
    def create_distance_matrix(self):
        if (self.linkage == "complete"):
            return self.create_complete_distance_matrix()
        elif (self.linkage == "average"):
            return self.create_average_distance_matrix()
        elif (self.linkage == "average_group"):
            return self.create_average_group_distance_matrix()
        else:
            return self.create_single_distance_matrix()
    
    def create_single_distance_matrix(self):
        matrix = numpy.zeros((len(self.clusters), len(self.clusters)))
        for matrix1 in range(len(self.clusters)):
            for matrix2 in range(matrix1, len(self.clusters)):
                distance = float('inf')
                for index1 in self.clusters[matrix1]:
                    for index2 in self.clusters[matrix2]:
                        pairdistance = 0
                        if (self.affinity == "manhattan"):
                            for feature in range(self.data[0].size):
                                pairdistance = pairdistance + abs(self.data[index2, feature] - self.data[index1, feature])
                        else:
                            for feature in range(self.data[0].size):
                                pairdistance = pairdistance + (self.data[index2, feature] - self.data[index1, feature]) ** 2
                            pairdistance = math.sqrt(pairdistance)
                        distance = min(distance, pairdistance)
                matrix[matrix1, matrix2] = distance
                matrix[matrix2, matrix1] = distance
        return matrix
    
    def create_complete_distance_matrix(self):
        matrix = numpy.zeros((len(self.clusters), len(self.clusters)))
        for matrix1 in range(len(self.clusters)):
            for matrix2 in range(matrix1, len(self.clusters)):
                distance = -1
                for index1 in self.clusters[matrix1]:
                    for index2 in self.clusters[matrix2]:
                        pairdistance = 0
                        if (self.affinity == "manhattan"):
                            for feature in range(self.data[0].size):
                                pairdistance = pairdistance + abs(self.data[index2, feature] - self.data[index1, feature])
                        else:
                            for feature in range(self.data[0].size):
                                pairdistance = pairdistance + (self.data[index2, feature] - self.data[index1, feature]) ** 2
                            pairdistance = math.sqrt(pairdistance)
                        distance = max(distance, pairdistance)
                matrix[matrix1, matrix2] = distance
                matrix[matrix2, matrix1] = distance
        return matrix

    def create_average_distance_matrix(self):
        matrix = numpy.zeros((len(self.clusters), len(self.clusters)))
        for matrix1 in range(len(self.clusters)):
            for matrix2 in range(matrix1, len(self.clusters)):
                distance = 0
                for index1 in self.clusters[matrix1]:
                    for index2 in self.clusters[matrix2]:
                        pairdistance = 0
                        if (self.affinity == "manhattan"):
                            for feature in range(self.data[0].size):
                                pairdistance = pairdistance + abs(self.data[index2, feature] - self.data[index1, feature])
                        else:
                            for feature in range(self.data[0].size):
                                pairdistance = pairdistance + (self.data[index2, feature] - self.data[index1, feature]) ** 2
                            pairdistance = math.sqrt(pairdistance)
                        distance += pairdistance
                distance /= (len(self.clusters[matrix1]) * len(self.clusters[matrix2]))
                matrix[matrix1, matrix2] = distance
                matrix[matrix2, matrix1] = distance
        return matrix

    def create_average_group_distance_matrix(self):
        matrix = numpy.zeros((len(self.clusters), len(self.clusters)))
        for matrix1 in range(len(self.clusters)):
            for matrix2 in range(matrix1, len(self.clusters)):
                center1 = numpy.zeros((1, self.data[0].size))
                center2 = numpy.zeros((1, self.data[0].size))
                for index1 in self.clusters[matrix1]:
                    center1 += self.data[index1]
                center1 /= len(self.clusters[matrix1])
                for index2 in self.clusters[matrix2]:
                    center2 += self.data[index2]
                center2 /= len(self.clusters[matrix2])
                distance = 0
                if (self.affinity == "manhattan"):
                    for feature in range(self.data[0].size):
                        distance = distance + abs(center2[0, feature] - center1[0, feature])
                else:
                    for feature in range(self.data[0].size):
                        distance = distance + (center2[0, feature] - center1[0, feature]) ** 2
                    distance = math.sqrt(distance)
                matrix[matrix1, matrix2] = distance
                matrix[matrix2, matrix1] = distance
        return matrix

### KMeans
Model pembelajaran KMeans

In [4]:
import numpy
import matrix
from matrix import distance_matrix, type_matrix


class kmeans:
    def __init__(self,data,initial_centroids, tolerance, **kwargs):
        self.__data = numpy.matrix(data)
        self.__clusters = []
        self.__centroids = numpy.matrix(initial_centroids)
        self.__tolerance = tolerance
        self.__matrix = kwargs.get('matrix', distance_matrix(type_matrix.EUCLIDEAN))
        
    def process(self):
        if (len(self.__data[0])) != len(self.__centroids[0]):
            raise NameError('Dimension of the input data and dimension of the initial cluster centers must be equal.')

        maximum_change = float('inf')
        stop_condition = self.__tolerance
        while maximum_change > stop_condition:
            self.__clusters = self.__update_clusters()
            update_centroids = self.__update_centroids()
            if len(self.__centroids) != len(update_centroids):
                maximum_change = float('inf')
            else:
                changes = self.__matrix(self.__centroids, update_centroids)
                maximum_change = numpy.max(changes)
            self.__centroids = update_centroids.tolist()
    def get_clusters(self):
        return self.__clusters
    def get_centroids(self):
        if isinstance(self.__centroids, list):
            return self.__centroids 
        return self.__centroids.tolist()
    def __update_clusters(self):
        clusters = [[] for _ in range(len(self.__centroids))]
        dataset_diff = numpy.zeros((len(clusters), len(self.__data)))
        for index_centroid in range(len(self.__centroids)):
            dataset_diff[index_centroid] = self.__matrix(self.__data, self.__centroids[index_centroid])

        optimum_indexes = numpy.argmin(dataset_diff, axis=0)
        for index_point in range(len(optimum_indexes)):
            index_cluster = optimum_indexes[index_point]
            clusters[index_cluster].append(index_point)
        clusters = [cluster for cluster in clusters if len(cluster)>0]
        return clusters
    def __update_centroids(self):
        dimension = self.__data.shape[1]
        centroids = numpy.zeros((len(self.__clusters),dimension))

        for index in range(len(self.__clusters)):
            cluster_points = self.__data[self.__clusters[index], :]
            centroids[index] = cluster_points.mean(axis = 0)
        return numpy.matrix(centroids)

### KMedoids
Model pembelajaran KMedoids

In [5]:
import matrix
import random
import numpy
from matrix import distance_matrix, type_matrix

class kmedoids:

    def __init__(self, data, initial_medoids_index, tolerance, **kwargs):
        self.__data = data
        self.__clusters = []
        self.index_medoids = initial_medoids_index
        self.__tolerance = tolerance
        self.__matrix = kwargs.get('matrix', distance_matrix(type_matrix.EUCLIDEAN))
        # self.distance_calculator = self.__create_distance_calculator()
        self.medoids_matrix_checker = numpy.zeros((len(initial_medoids_index), len(data)))
        for i in range(len(initial_medoids_index)):
            self.medoids_matrix_checker[i][initial_medoids_index[i]] = 1
    def process(self):

        diff = float('inf')
        stop_conditon = self.__tolerance
        counter = 0
        while (True):
            # Calculate Old Absolute Error
            self.__clusters = self.__update_clusters()
            old_absolute_error = self.calculate_absolute_error(self.__clusters, self.index_medoids)
            # Calculate New Absolute Error
            # temp_index_medoids = self.index_medoids
            temp_index_medoids = self.__update_medoids()
            new_temporary_clusters = self.__update_clusters()
            new_absolute_error = self.calculate_absolute_error(new_temporary_clusters, temp_index_medoids)
            # Calculate Difference
            diff_error =  new_absolute_error - old_absolute_error
            if diff_error < 0:
                self.index_medoids = temp_index_medoids
            else:
                break
    def get_clusters(self):

        return self.__clusters

    def get_medoids(self):
        
        return self.index_medoids

    def __update_clusters(self):
        clusters = [[self.index_medoids[i]] for i in range(len(self.index_medoids))]
        for index_point in range(len(self.__data)):
            if index_point in self.index_medoids:
                continue
            
            index_optim = -1
            dist_optim = float('Inf')

            for index in range(len(self.index_medoids)):
                dist = self.__matrix(numpy.matrix(self.__data[index_point]), numpy.matrix(self.__data[self.index_medoids[index]]))
                if dist < dist_optim:
                    index_optim = index
                    dist_optim = dist
            clusters[index_optim].append(index_point)
        return clusters

    def __update_medoids(self):
        medoid_indexes = self.index_medoids
        random_index_to_change = random.randint(0,len(self.index_medoids)-1)
        random_value = random.randint(0,len(self.__data)-1)
        while self.medoids_matrix_checker[random_index_to_change][random_value] == 1:
            random_index_to_change = random.randint(0,len(self.index_medoids)-1)
            random_value = random.randint(0,len(self.__data)-1)
        medoid_indexes[random_index_to_change] = random_value
        self.medoids_matrix_checker[random_index_to_change][random_value] = 1
        return medoid_indexes
    def calculate_absolute_error(self, clusters, index_medoids):
        medoids = []
        for i in index_medoids:
            medoids.append(self.__data[i])
        sum = 0
        for i in range(len(index_medoids)):
            for j in range(len(clusters[i])):
                data = self.__data[clusters[i][j]]
                sum += numpy.sum(numpy.absolute(numpy.array(data) - numpy.array(medoids[i])),axis=0)
        return (sum)

### DBSCAN
Model pembelajaran DBSCAN

In [6]:
import math
import collections

class dbscan:
	def __init__(self, data, epsilon, min_pts, distance_type=1):
		self.data = data
		self.clusters = []
		self.clusters_id = []
		self.epsilon = epsilon
		self.min_pts = min_pts
		self.neighborhood_list = [[] for _ in self.data]
		self.distance_type = distance_type
		self.core_points = []
		self.outlier = []

	def compare(self, cluster1, cluster2):
		first_element = cluster1[0]

		if first_element in cluster2:
			return True
		else:
			return False

	def process(self):
		for i in range(0, len(self.data)):
			for j in range(0, len(self.data)):
				if i!=j and self.calculate_distance(self.data[i], self.data[j]) <= self.epsilon:
					self.neighborhood_list[i].append(j)

		for i in range(0, len(self.neighborhood_list)):
			if len(self.neighborhood_list[i]) + 1 >= self.min_pts:
				self.core_points.append(i)
			else:
				self.outlier.append(i)

		for i in range(0, len(self.core_points)):
			found = False
			current_cluster = self.neighborhood_list[i]
			current_cluster.append(i)

			for cluster in self.clusters:
				if self.compare(cluster, current_cluster):
					found = True
					break

			if not found:
				self.clusters.append(current_cluster)

	def calculate_distance(self, instance1, instance2):
		EUCLIDEAN = 1
		EUCLIDEAN_SQUARE = 2
		MANHATTAN = 3

		diff = 0

		if self.distance_type == EUCLIDEAN :
			for val1, val2 in zip(instance1, instance2):
				diff += (val1 - val2)**2
			diff = math.sqrt(diff)
		elif self.distance_type == EUCLIDEAN_SQUARE :
			for val1, val2 in zip(instance1, instance2):
				diff += (val1 - val2)**2
		else:
			for val1, val2 in zip(instance1, instance2):
				diff += math.fabs(val1 - val2)
		
		return diff

	def get_clusters(self):
		return self.clusters

	def get_outliers(self):
		return self.outlier

### Agglomerative

Agglomerative merupakan hierarchical clustering untuk mengelompokkan data dari 1 instance dalam 1 cluster diiterasi hingga seluruh instance dalam 1 cluster. Algoritma menyatukan 2 cluster di setiap iterasinya dengan jarak terpendek antar cluster tersebut. Jarak antar cluster dapat dihitung dengan 4 linkage, yaitu single linkage, complete linkage, average linkage, atau average group linkage. Jarak antar titik dapat dihitung dengan 2 cara, yaitu euclidean atau manhattan.

### Pseudo-code Agglomerative

```python
def fit(data):
    initialize_clusters
    while current_n_clusters > desired_n_clusters:
    for each cluster in current_clusters:
        for each cluster in current_clusters:
            calculate_cluster_distance_to_distance_matrix
    initialize_index
    initialize_minimum_distance
    for each cluster in distance_matrix:
        for each cluster in distance_matrix:
            if distance_of_cluster_pair < minimum_distance:
                replace_index_with_cluster_pair
                replace_minimum_distance_with_distance
    initialize_new_clusters
    for cluster in current_clusters:
        if current_cluster = first_minimum_cluster:
            append_new_clusters_with_appended_minimum_cluster_pair
        else:
            append_new_clusters_with_current_cluster
    return new_clusters
```

### Penjelasan Kode Implementasi Agglomerative

Agglomerative diimplementasi dengan sebuah kelas Agglomerative yang dapat menerima parameter n_clusters, linkage, dan affinity, di mana n_clusters menyatakan banyak cluster yang akan terbentuk, linkage menyatakan jenis linkage antar cluster yang digunakan pada perhitungan distance matrix, dan affinity menyatakan cara perhitungan jarak antar titik. Kelas memanggil fit untuk melakukan pembelajaran terhadap data masukan sesuai dengan penjelasan algoritma sebelumnya. Setiap iterasi menghasilkan daftar setiap cluster yang terbentuk dan berisi indeks instance yang berada pada setiap cluster beserta dengan distance matrix-nya.

In [7]:
import numpy
from linkagematrix import LinkageDistanceMatrix

class Agglomerative:
    def __init__(self, n_clusters = 2, linkage = "single", affinity = "euclidean"):
        self.data = numpy.array([])
        self.clusters = []
        self.n_clusters = n_clusters
        self.linkage = linkage
        self.affinity = affinity
        self.distance_matrix = numpy.array([])
        
    def fit(self, data):
        self.data = numpy.matrix(data)
        for index in range(len(data)):
            self.clusters.append([index])
        while (len(self.clusters) > self.n_clusters):
            self.distance_matrix = LinkageDistanceMatrix(self.data, self.clusters, self.linkage, self.affinity)
            self.clusters = self.update_clusters()

    def get_clusters(self):
        return self.clusters

    def update_clusters(self):
        clusters = []
        minindexi = 0
        minindexj = 0
        mindistance = float('inf')
        for indexi in range(len(self.clusters)):
            for indexj in range(indexi, len(self.clusters)):
                if (indexi != indexj and self.distance_matrix.matrix[indexi, indexj] < mindistance):
                    minindexi = indexi
                    minindexj = indexj
                    mindistance = self.distance_matrix.matrix[indexi, indexj]
        iterator = -1
        for cluster in self.clusters:
            iterator += 1
            if (iterator == minindexi):
                clusters.append(self.clusters[minindexi] + self.clusters[minindexj])
            elif (iterator != minindexj):
                clusters.append(self.clusters[iterator])
        return clusters

### Evaluasi
Evaluasi model pembelajaran yang diimplementasi dibandingkan dengan label data iris sesungguhnya.

___Fungsi untuk evaluasi___

In [29]:
true_clusters = [[i for i in range(0, 50)], [i for i in range(50, 100)], [i for i in range(100, 150)]]
print(true_clusters)

def evaluate(prediction_clusters, true_clusters):
    accuracy = []
    
    for prediction_cluster in prediction_clusters:
        max_true = 0
        
        for true_cluster in true_clusters:
            counter = 0
            
            for element in prediction_cluster:
                if element in true_cluster:
                    counter+=1
                    
            if max_true < counter:
                max_true = counter
            
        accuracy.append(max_true/len(prediction_cluster) * 100)
        
    return sum(accuracy)/len(accuracy)     

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149]]


In [20]:
def kmeans_clustering(filename, tolerance, start_centroids):
    sample = read_data(filename)
    kmeans_instance = kmeans(sample, start_centroids, tolerance=0.25)
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    centroids = kmeans_instance.get_centroids()
    print ("Cluster Result: \n", clusters)
    print ("Centroids Result: \n",centroids)
    print ("Akurasi: ", evaluate(clusters, true_clusters))

In [21]:
def kmedoids_clustering(filename, tolerance, start_medoids):
    sample = read_data(filename)
    kmedoids_instance = kmedoids(sample,start_medoids,tolerance=0)
    kmedoids_instance.process()
    clusters = kmedoids_instance.get_clusters()
    medoids = kmedoids_instance.get_medoids()
    print ("Cluster Result: \n", clusters)
    print ("Centroids Result: \n",medoids)
    print ("Akurasi: ", evaluate(clusters, true_clusters))

In [26]:
def dbscan_clustering(filename, epsilon, min_pts):
    sample = read_data(filename)
    dbscan_instance = dbscan(sample, epsilon, min_pts)
    dbscan_instance.process()
    clusters = dbscan_instance.get_clusters()
    outliers = dbscan_instance.get_outliers()
    print("Clusters :\n", clusters)
    print("Outliers :\n", outliers)
    print("Akurasi: ", evaluate(clusters, true_clusters))

In [27]:
def agglomerative_clustering(filename, n_clusters, linkage, affinity):
    sample = read_data(filename)
    agglomerative_instance = Agglomerative(n_clusters, linkage, affinity)
    agglomerative_instance.fit(sample)
    clusters = agglomerative_instance.get_clusters()
    print(linkage, affinity)
    print("Cluster Result: \n", clusters)
    print ("Akurasi: ", evaluate(clusters, true_clusters))

In [13]:
start_centroids = [[5.1,3.5,1.4,0.2],[4.9,3.0,1.4,0.2],[4.7,3.2,1.3,0.2]]
start_medoids = [0,2,7]
filename = "./dataset/iris_without_label.data"

### Evaluasi KMeans

KMeans yang diimplementasi terhadap dataset iris menghasilkan 3 cluster dengan centroidnya masing-masing. Akurasi yang dihasilkan jika dibandingkan dengan label dataset iris sesungguhnya yaitu 90.03%.

In [30]:
kmeans_clustering(filename, 1e-3, start_centroids)

Cluster Result: 
 [[50, 51, 52, 54, 56, 58, 63, 65, 70, 72, 73, 75, 76, 77, 83, 85, 86, 91, 100, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149], [53, 55, 57, 59, 60, 61, 62, 64, 66, 67, 68, 69, 71, 74, 78, 79, 80, 81, 82, 84, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, 106], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]
Centroids Result: 
 [[6.570149253731341, 2.988059701492538, 5.338805970149254, 1.885074626865671], [5.636363636363636, 2.6363636363636362, 4.027272727272727, 1.2515151515151515], [5.005999999999999, 3.4180000000000006, 1.464, 0.2439999999999999]]
Akurasi:  90.03467510930197


### Evaluasi KMedoids

KMedoids yang diimplementasi terhadap dataset iris menghasilkan 3 cluster dengan centroidnya masing-masing. Akurasi yang dihasilkan jika dibandingkan dengan label dataset iris sesungguhnya yaitu 82.24%.

In [33]:
kmedoids_clustering(filename,0,start_medoids)

Cluster Result: 
 [[6, 1, 2, 3, 8, 11, 12, 13, 22, 29, 30, 38, 41, 42, 45, 47], [0, 4, 5, 7, 9, 10, 14, 15, 16, 17, 18, 19, 20, 21, 27, 28, 32, 33, 34, 35, 36, 37, 39, 40, 46, 48, 49], [23, 24, 25, 26, 31, 43, 44, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149]]
Centroids Result: 
 [14, 0, 23]
Akurasi:  82.2429906542056


### Evaluasi DBSCAN

DBSCAN yang diimplementasi terhadap dataset iris menghasilkan 3 cluster tanpa ada outlier. Akurasi yang dihasilkan jika dibandingkan dengan label dataset iris sesungguhnya yaitu 75.07%.

In [37]:
dbscan_clustering(filename, 2.7, 50)

Clusters :
 [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 57, 64, 79, 93, 98, 0], [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 50], [50, 52, 56, 58, 76, 77, 83, 86, 100, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 114, 115, 116, 118, 120, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 139, 140, 141, 143, 144, 145, 146, 147, 148, 149, 117]]
Outliers :
 []
Akurasi:  75.07427213309566


### Evaluasi Agglomerative

Agglomerative yang diimplementasi terhadap dataset iris menghasilkan 3 cluster. Akurasi yang dihasilkan jika dibandingkan dengan label dataset iris sesungguhnya yaitu 83.67%.

In [38]:
agglomerative_clustering(filename, 3, "single", "euclidean")

single euclidean
Cluster Result: 
 [[0, 17, 40, 4, 7, 39, 49, 27, 28, 35, 10, 48, 23, 26, 43, 1, 45, 12, 9, 34, 37, 29, 30, 2, 3, 47, 25, 8, 38, 42, 11, 6, 19, 21, 46, 13, 24, 36, 20, 31, 5, 18, 16, 32, 33, 44, 15, 14, 22, 41], [50, 52, 86, 51, 56, 54, 58, 65, 75, 74, 97, 77, 76, 71, 53, 89, 69, 80, 81, 67, 82, 92, 88, 94, 95, 96, 99, 90, 61, 55, 66, 84, 63, 91, 78, 73, 79, 85, 59, 70, 127, 138, 123, 126, 146, 149, 101, 142, 113, 121, 72, 83, 133, 103, 116, 137, 104, 128, 132, 110, 147, 111, 141, 145, 112, 139, 120, 143, 140, 144, 124, 115, 136, 148, 102, 125, 129, 64, 100, 119, 107, 130, 114, 62, 68, 87, 105, 122, 118, 135, 134, 108, 109, 57, 93, 60, 98, 106], [117, 131]]
Akurasi:  83.6734693877551


### Pembagian Tugas
1. Alvin Sullivan - 13515048 - Agglomerative
2. Albertus Djauhari - 13515054 - DBSCAN
3. Kevin - 13515138 - KMeans & KMedoids