### Partitioning Around Medoids (PAM) - Compactness
Exercise for the course "Data Mining: Basic concepts" at the University of Konstanz (GER)
                  
#### Authors: Gunda Ehmke, Judith Kunz

In [None]:
import pandas as pd 
import numpy as np 
from pyclustering.cluster.kmedoids import kmedoids
from scipy.spatial.distance import pdist #, cdist, squareform
from random import randrange

In [None]:
def compactnessPAM(data, instance):
    '''
    This function takes a numpy array and a kmedoids instance (see pyclustering.cluster.kmedoids)
    and returns the compactness of the PAM clustering. 
    
    Input
    data: two dimensional numpy dataframe
    instance: kmedoids instance created with the function kmedoids from the pyclustering library
    
    Output:
    TD: compactness of PAM clustering as sum of distances
    '''
    
    clusters = instance.get_clusters() # get clusters
    medoids = instance.get_medoids() # get medoids
    distances = []  # empty list for the distances
    index = 0
    for inner_list in clusters:  # iterate through the clusters
        distance = 0
        for i in inner_list:
            compare_list = [data[i].tolist(), data[medoids[index]].tolist()]
            dist = pdist(compare_list, 'euclidean')
            distance += dist[0]
        distances.append(distance)
        index += 1
        
    TD = sum(distances) # compactness is the sum of all distances
    return(TD)

In [None]:
def completePAM(df, k):
    '''
    This function takes a pandas dataframe and a integer k and computes the PAM clustering with the given data.
    I.o.t. compute compactness, the respective function form above is called.
    
    Input:
    df: pandas dataframe
    k: integer, number of clusters 
    
    Output:
    TD: compactness as sum of distances (see above)
    '''
    no_rows = len(df.index)
    data = df.to_numpy()
    startpoints = [] # empty list for starting points
    for i in range(k):
        ran_num = randrange(no_rows) #choose random number in dataset 
        startpoints.append(ran_num)
    
    kmedoids_instance = kmedoids(data, startpoints) #create instance for kmedoid algorithm
    kmedoids_instance.process() #analysis

    TD = compactnessPAM(data, kmedoids_instance) # call compactness function
    return(TD)