In [None]:
import random

import pandas as pd
import numpy as np

from pyclustering.cluster import cluster_visualizer
from pyclustering.cluster.optics import optics, ordering_analyser, ordering_visualizer

from pyclustering.utils import read_sample, timedcall

from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES

In [None]:
def template_clustering(path_sample, radius, neighbors, amount_clusters=None, visualize=True, ccore=False):
    sample = read_sample(path_sample)
    
    optics_instance = optics(sample, eps=radius, minpts=neighbors, amount_clusters=amount_clusters, ccore=ccore)
    (ticks, _) = timedcall(optics_instance.process)
    
    print("\t\tExecution time: ", ticks, "\n")
    
    if (visualize is True):
        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()

        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
        visualizer.append_cluster(noise, sample, marker = 'x')
        visualizer.show()
    
        ordering = optics_instance.get_ordering()
        analyser = ordering_analyser(ordering)
        
        ordering_visualizer.show_ordering_diagram(analyser, amount_clusters)
    
    return clusters
    

In [None]:
def cluster_sample1():
    return template_clustering(path_sample=SIMPLE_SAMPLES.SAMPLE_SIMPLE1, radius=0.5, neighbors=2)
    
def cluster_sample2():
    return template_clustering(path_sample=SIMPLE_SAMPLES.SAMPLE_SIMPLE2, radius=3.0, neighbors=3)
    
def cluster_sample3():
    return template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE3, 0.7, 3)
    
def cluster_sample4():
    return template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE4, 0.7, 3)

def cluster_sample5():
    return template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE5, 0.7, 3, 4)
    
def cluster_sample6():
    return template_clustering(SIMPLE_SAMPLES.SAMPLE_SIMPLE6, 1.0, 3)

def cluster_elongate():
    return template_clustering(SIMPLE_SAMPLES.SAMPLE_ELONGATE, 0.5, 3)

def cluster_lsun():
    return template_clustering(FCPS_SAMPLES.SAMPLE_LSUN, 0.5, 3)

def cluster_lsun_radius_calculation():
    return template_clustering(FCPS_SAMPLES.SAMPLE_LSUN, 1.0, 3, 3)

def cluster_target():
    return template_clustering(FCPS_SAMPLES.SAMPLE_TARGET, 0.5, 2)

def cluster_target_radius_calculation():
    return template_clustering(FCPS_SAMPLES.SAMPLE_TARGET, 10.0, 2, 6)

def cluster_two_diamonds():
    return template_clustering(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, 0.15, 7)

def cluster_two_diamonds_radius_calculation():
    return template_clustering(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, 1.0, 7, 2)

def cluster_wing_nut():
    return template_clustering(FCPS_SAMPLES.SAMPLE_WING_NUT, 0.25, 2)

def cluster_wing_nut_radius_calculation():
    return template_clustering(FCPS_SAMPLES.SAMPLE_WING_NUT, 1.0, 2, 2)

def cluster_chainlink():
    return template_clustering(FCPS_SAMPLES.SAMPLE_CHAINLINK, 0.15, 3)
    
def cluster_hepta():
    return template_clustering(FCPS_SAMPLES.SAMPLE_HEPTA, 1, 3)
    
def cluster_golf_ball():
    return template_clustering(FCPS_SAMPLES.SAMPLE_GOLF_BALL, 0.5, 3)
    
def cluster_atom():
    return template_clustering(FCPS_SAMPLES.SAMPLE_ATOM, 15, 3)

def cluster_tetra():
    return template_clustering(FCPS_SAMPLES.SAMPLE_TETRA, 0.4, 3)
     
def cluster_engy_time():
    return template_clustering(FCPS_SAMPLES.SAMPLE_ENGY_TIME, 0.2, 20)

print(cluster_sample1())
print(cluster_sample2())
print(cluster_sample3())
print(cluster_sample4())
print(cluster_sample5())
print(cluster_sample6())
_ = cluster_elongate()

_ = cluster_lsun()
_ = cluster_lsun_radius_calculation()

# cluster_target()
# cluster_target_radius_calculation()

# cluster_two_diamonds()
# cluster_two_diamonds_radius_calculation()

# cluster_wing_nut()
# cluster_wing_nut_radius_calculation()

print(cluster_chainlink())
cluster_hepta()
cluster_golf_ball()
cluster_atom()
cluster_tetra()
print(cluster_engy_time())

In [None]:
def experiment_execution_time(ccore):
    template_clustering(FCPS_SAMPLES.SAMPLE_LSUN, 1.0, 3, 3, False, ccore)
    template_clustering(FCPS_SAMPLES.SAMPLE_TARGET, 10.0, 2, 6, False, ccore)
    template_clustering(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS, 1.0, 7, 2, False, ccore)
    template_clustering(FCPS_SAMPLES.SAMPLE_CHAINLINK, 2.0, 3, 2, False, ccore)
    template_clustering(FCPS_SAMPLES.SAMPLE_WING_NUT, 1.0, 2, 2, False, ccore)
    template_clustering(FCPS_SAMPLES.SAMPLE_HEPTA, 1, 3, None, False, ccore)
    template_clustering(FCPS_SAMPLES.SAMPLE_TETRA, 1.0, 3, 4, False, ccore)
    template_clustering(FCPS_SAMPLES.SAMPLE_ATOM, 30, 3, 2, False, ccore)


def clustering_random_points(amount, ccore):
    sample = [ [ random.random(), random.random() ] for _ in range(amount) ]
    
    optics_instance = optics(sample, 0.05, 20, None, ccore)
    (ticks, _) = timedcall(optics_instance.process)
    
    print("Execution time ("+ str(amount) +" 2D-points):", ticks)


def performance_measure_random_points(ccore):
    clustering_random_points(1000, ccore)
    clustering_random_points(2000, ccore)
    clustering_random_points(3000, ccore)
    clustering_random_points(4000, ccore)
    clustering_random_points(5000, ccore)
    clustering_random_points(10000, ccore)
    clustering_random_points(20000, ccore)
    

experiment_execution_time(ccore=False)
experiment_execution_time(ccore=True)

performance_measure_random_points(ccore=False)
performance_measure_random_points(ccor=True)

In [None]:
path_sample = SIMPLE_SAMPLES.SAMPLE_SIMPLE1
    
print(sample)
print(type(sample))
test = template_clustering(path_sample=sample, eps=1, minpts=2)

In [None]:
def template_clustering(path_sample, radius, neighbors, amount_clusters=None, visualize=True, ccore=False):  
    optics_instance = optics(sample, eps=radius, minpts=neighbors, amount_clusters=amount_clusters, ccore=ccore)
    (ticks, _) = timedcall(optics_instance.process)
    
    print("\t\tExecution time: ", ticks, "\n")
    
    if (visualize is True):
        clusters = optics_instance.get_clusters()
        noise = optics_instance.get_noise()

        visualizer = cluster_visualizer()
        visualizer.append_clusters(clusters, sample)
#         visualizer.append_cluster(noise, sample, marker = 'x')
        visualizer.show()
    
        ordering = optics_instance.get_ordering()
        analyser = ordering_analyser(ordering)
        
        ordering_visualizer.show_ordering_diagram(analyser, amount_clusters)
    
    return clusters
    

In [None]:
df = pd.read_hdf("./results/H2O/H2O_CF_vdw.5/H2O_CF_coordinates_contact.hdf", 'F')

sample = [[row.x, row.y, row.z] for i, row in df.iterrows()]
print(type(sample))
print(sample[:4])

test = template_clustering(path_sample=sample, radius=0.25, neighbors=6)
print(len(test))

test = template_clustering(path_sample=sample, radius=0.4, neighbors=8)
print(len(test))

test = template_clustering(path_sample=sample, radius=0.3, neighbors=6)
print(len(test))

In [None]:
test = template_clustering(path_sample=sample, radius=0.3, neighbors=12, amount_clusters=3)
print(len(test))

In [None]:
df = pd.read_hdf("./results/H2O/H2O_CF_vdw.5/H2O_CF_coordinates_contact.hdf", 'F')

sample = [[row.x, row.y, row.z] for i, row in df.iterrows()]
print(type(sample))
print(sample[:4])

test = template_clustering(path_sample=sample, eps=1, minpts=2, amount_clusters=3, visualize=True)


In [None]:
print(len(test))

print(test)


In [None]:
df = pd.read_hdf("./results/H2O/H2O_CF_vdw.5/H2O_CF_coordinates_contact.hdf", 'F')

sample = [[row.x, row.y, row.z] for i, row in df.iterrows()]
print(type(sample))
print(sample[:4])

test2 = template_clustering(path_sample=sample, eps=1, minpts=2, amount_clusters=4, visualize=True)

print(test2)
print(len(test2))

In [None]:
# Read sample for clustering from some file
sample = [[row.x, row.y, row.z] for i, row in df.iterrows()]

# Run cluster analysis where connectivity radius is bigger than real
radius = 0.25
neighbors = 8
amount_of_clusters = None
optics_instance = optics(sample, radius, neighbors, amount_of_clusters)

# Performs cluster analysis
optics_instance.process()

# Obtain results of clustering
clusters = optics_instance.get_clusters()
noise = optics_instance.get_noise()
ordering = optics_instance.get_ordering()

print(len(clusters))

# Visualize clustering results
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, sample)
visualizer.show()

print("End")

In [None]:
# Read sample for clustering from some file
sample = [[row.x, row.y, row.z] for i, row in df.iterrows()]

# Run cluster analysis where connectivity radius is bigger than real
radius = 0.4
neighbors = 15
amount_of_clusters = None
optics_instance = optics(sample, radius, neighbors, amount_of_clusters)

# Performs cluster analysis
optics_instance.process()

# Obtain results of clustering
clusters = optics_instance.get_clusters()
noise = optics_instance.get_noise()
ordering = optics_instance.get_ordering()

print(len(clusters))

# Visualize clustering results
visualizer = cluster_visualizer()
visualizer.append_clusters(clusters, sample)
visualizer.show()

print("End")

# TODO: try on binned data