## Alter distance metrics

In [1]:
import pandas as pd
import openensembles as oe
import numpy as np
import re
import sklearn as sk
import sklearn.cluster as skc


## Generate Half Rings points.
halfRings = []
while(True):
    x = np.random.normal(0,20)
    y = np.random.normal(0,20)
    z = np.random.normal(0,20)
    if ((x ** 2 + y ** 2) >= 0.5) and ((x ** 2 + y ** 2) <= 1):
        halfRings.append(list([x,y]))
    elif ((x ** 2 + y ** 2) >= 10) and ((x ** 2 + y ** 2) <= 11):
        halfRings.append(list([x,y]))
        
    if len(halfRings) == 400:
        break

halfRings = np.asarray(halfRings)
df = pd.DataFrame(halfRings)
#create an open ensembles data object from the dataframe
dataObj = oe.data(df, [1,2])

In [2]:
c = oe.cluster(dataObj)

In [3]:
c.algorithms_available()

{'AffinityPropagation': '',
 'DBSCAN': '',
 'agglomerative': '',
 'kmeans': '',
 'spectral': ''}

In [4]:
algorithms_wDist = ['agglomerative', 'DBSCAN', 'AffinityPropagation']
algorithms_noDist = ['kmeans', 'spectral']

In [5]:
# Create an ensemble of those that can take distance, for all distances
dDict = sk.metrics.pairwise.distance_metrics()
#remove precomputed from dictionary of distances
del dDict['precomputed']
dDict

{'cityblock': <function sklearn.metrics.pairwise.manhattan_distances>,
 'cosine': <function sklearn.metrics.pairwise.cosine_distances>,
 'euclidean': <function sklearn.metrics.pairwise.euclidean_distances>,
 'l1': <function sklearn.metrics.pairwise.manhattan_distances>,
 'l2': <function sklearn.metrics.pairwise.euclidean_distances>,
 'manhattan': <function sklearn.metrics.pairwise.manhattan_distances>}

In [6]:
#PREFORM ensemble clustering 
c = oe.cluster(dataObj)

K = [2]

for distance in dDict:
    for algorithm in algorithms_wDist:
        for k in K:
            for transform in list(dataObj.D.keys()):
                name = "%s_%d_%s_%s"%(algorithm, k, transform, distance)
                if algorithm=='agglomerative': #can't perform distance calculation on ward, which is default linkage
                    c.cluster(transform, algorithm, name, K=k, distance=distance, linkage='complete')
                else:
                    c.cluster(transform, algorithm, name, K=k, distance=distance)



In [36]:
# test argument handling
listName = ['K=k']
c.cluster(transform, algorithm, name, *listName)

TypeError: unorderable types: str() <= int()

## What happens when you call a clustering algorithm with no distance metric

In [4]:
# If you call a clustering algorithm with parameters that includes 'distance', but no distance metric is accepted, 
# your clustering will complete, but a warning will be given
params = {}
params['distance'] = 'euclidean'
c_noDist = oe.cluster(dataObj)
c_noDist.cluster('parent', 'kmeans', 'kmeans', distance='euclidean', K=2)



