In [None]:
import sys
from typing import Dict, Tuple, Any
# Librarys
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, load_wine, load_breast_cancer, make_blobs
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
%matplotlib widget
import matplotlib.pyplot as plt
# Theme for matplotlib and 
from jupyterthemes import jtplot
jtplot.style()
# NiaPy
from NiaPy.algorithms.basic import ParticleSwarmOptimization, ComprehensiveLearningParticleSwarmOptimizer, OppositionVelocityClampingParticleSwarmOptimization, DifferentialEvolution, BatAlgorithm, BlackHole
from NiaPy.algorithms.modified import SelfAdaptiveDifferentialEvolution
from NiaPy.benchmarks import Clustering, ClusteringMin, ClusteringMinPenalty, ClusteringClassification
from NiaPy.util import StoppingTask, groupdatabylabel, classifie, clusters2labels
# Output options
np.set_printoptions(threshold=sys.maxsize)
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [None]:
def grupdata(data: np.ndarray, C: np.ndarray) -> np.ndarray:
   G = [[] for _ in range(C.shape[0])]
   for e in data: G[np.argmin([np.sqrt(np.sum((e - C[i]) ** 2)) for i in range(C.shape[0])])].append(e)
   return np.asarray([np.asarray(e) for e in G])

def labeltransform(l): return LabelEncoder().fit(l)

# Algorithms

## KMeans

In [None]:
def kmeans(data, C, benchmark, nFES):
   print ('KMeans')
   kmeans = KMeans(n_clusters=noc, init='random', n_init=100, max_iter=nFES, random_state=None, algorithm='full').fit(data)
   C['km'] = kmeans.cluster_centers_
   print('Min val: %f' % benchmark.function()(C['km'].flatten()))
   print (C['km'])

## Differetial evolution clustering

In [None]:
def de(data, C, benchmark, nFES):
   algo = DifferentialEvolution()
   print (algo.Name[0])
   task = StoppingTask(D=noc * len(data[0]), nFES=nFES, benchmark=benchmark)
   res = algo.runTask(task)
   print('Min val: %f' % res[1])
   C['de'] = res[0].reshape((noc, len(data[0])))
   print (C['de'])

## Self Adaptive Differential Evolution clustering

In [None]:
def jde(data, C, benchmark, nFES):
   algo = SelfAdaptiveDifferentialEvolution()
   print (algo.Name[0])
   task = StoppingTask(D=noc * len(data[0]), nFES=nFES, benchmark=benchmark)
   res = algo.runTask(task)
   print('Min val: %f' % res[1])
   C['jde'] = res[0].reshape((noc, len(data[0])))
   print (C['jde'])

## Particle Swarm Optimization clustering

In [None]:
def pso(data, C, benchmark, nFES):
   algo = ParticleSwarmOptimization()
   print (algo.Name[0])
   task = StoppingTask(D=noc * len(data[0]), nFES=nFES, benchmark=benchmark)
   res = algo.runTask(task)
   print('Min val: %f' % res[1])
   C['pso'] = res[0].reshape((noc, len(data[0])))
   print (C['pso'])

## Comprehensive Learning Particle Swarm Optimizer clustering

In [None]:
def clpso(data, C, benchmark, nFES):
   algo = ComprehensiveLearningParticleSwarmOptimizer()
   print (algo.Name[0])
   task = StoppingTask(D=noc * len(data[0]), nFES=nFES, benchmark=benchmark)
   res = algo.runTask(task)
   print('Min val: %f' % res[1])
   C['clpso'] = res[0].reshape((noc, len(data[0])))
   print (C['clpso'])

## Opposition Velocity Clamping Particle Swarm Optimization clustering

In [None]:
def ovcpso(data, C, benchmark, nFES):
   algo = OppositionVelocityClampingParticleSwarmOptimization()
   print (algo.Name[0])
   task = StoppingTask(D=noc * len(data[0]), nFES=nFES, benchmark=benchmark)
   res = algo.runTask(task)
   print('Min val: %f' % res[1])
   C['ovcpso'] = res[0].reshape((noc, len(data[0])))
   print (C['ovcpso'])

## Bat Algorithm clustering

In [None]:
def ba(data, C, benchmark, nFES):
   algo = BatAlgorithm()
   print (algo.Name[0])
   task = StoppingTask(D=noc * len(data[0]), nFES=nFES, benchmark=benchmark)
   res = algo.runTask(task)
   print('Min val: %f' % res[1])
   C['ba'] = res[0].reshape((noc, len(data[0])))
   print (C['ba'])

## Black Hole clustering

In [None]:
def bh(data, C, benchmark, nFES):
   algo = BlackHole()
   print (algo.Name[0])
   task = StoppingTask(D=noc * len(data[0]), nFES=nFES, benchmark=benchmark)
   res = algo.runTask(task)
   print('Min val: %f' % res[1])
   C['bh'] = res[0].reshape((noc, len(data[0])))
   print (C['bh'])

# Data preparation

## Generated data

In [None]:
nof, noc = 9,  4
data, labels = make_blobs(n_samples=500, n_features=nof, centers=noc, random_state=1)

## Iris

In [None]:
data, labels = load_iris(True)
noc = len(np.unique(labels))
C_best = {
   'report_bh': np.asarray([[6.73305, 3.06805, 5.62938, 2.10908], [5.01186, 3.40303, 1.47143, 0.23532], [5.93229, 2.79775, 4.41857, 1.41608]]),
   'bh': np.asarray([[5.650, 2.878, 5.605, 1.298], [5.048, 3.426, 1.347, 0.397], [6.082, 2.698, 4.174, 0.834]]),
   'clpso': np.asarray([[5.793, 3.636, 1.792, 0.813], [5.146, 2.828, 4.051, 0.539], [4.603, 2.588, 5.491, 1.561]])
}

## The Cancer Wisconsin Dataset

In [None]:
data, labels = load_breast_cancer(True)
noc = len(np.unique(labels))
C_best = {
   'jde': np.asarray([[17.654, 9.710, 172.041, 1157.277, 0.053, 0.303, 0.000, 0.184, 0.177, 0.075, 1.617, 1.369, 0.847, 44.476, 0.010, 0.042, 0.318, 0.000, 0.021, 0.001, 14.003, 15.286, 92.127, 1787.341, 0.201, 0.265, 0.159, 0.191, 0.289, 0.206], [6.981, 11.369, 43.790, 1430.514, 0.130, 0.168, 0.315, 0.201, 0.254, 0.091, 1.317, 3.862, 9.089, 138.556, 0.010, 0.129, 0.053, 0.021, 0.062, 0.025, 7.930, 49.540, 190.337, 185.200, 0.160, 0.653, 0.625, 0.000, 0.247, 0.146]]),
   'bh': np.asarray([[14.366, 23.217, 71.421, 1229.990, 0.123, 0.237, 0.140, 0.109, 0.242, 0.076, 0.890, 2.616, 6.428, 107.904, 0.021, 0.063, 0.266, 0.017, 0.049, 0.009, 19.898, 30.594, 176.826, 1727.838, 0.117, 0.596, 0.755, 0.101, 0.538, 0.144], [13.153, 25.890, 153.234, 1569.438, 0.109, 0.115, 0.140, 0.084, 0.166, 0.081, 2.177, 3.526, 7.352, 338.804, 0.014, 0.054, 0.280, 0.041, 0.058, 0.015, 10.863, 41.038, 138.735, 1286.425, 0.113, 0.304, 0.565, 0.175, 0.297, 0.099]])
}

## Wine

In [None]:
data, labels = load_wine(True)
noc = len(np.unique(labels))
C_best = {
   'report_bh': np.asarray([[12.87096, 2.11606, 2.39431, 19.46178, 98.84497, 2.03580, 1.44765, 0.43320, 1.49193, 5.36444, 0.88652, 2.12046, 686.93205], [12.63469, 2.44139, 2.37083, 21.26462, 92.39332, 2.12789, 1.58430, 0.40206, 1.13521, 4.83774, 0.81497, 2.71348, 463.69590], [13.31401, 2.26752, 2.56857, 17.34232, 105.03031, 2.82361, 3.24277, 0.28947, 2.67352, 5.20622, 1.03286, 3.38781, 1137.44167]]),
   'jde': np.asarray([[14.472, 1.100, 2.389, 21.122, 121.500, 2.752, 3.342, 0.130, 2.343, 5.531, 1.104, 1.970, 1066.214], [14.546, 4.664, 2.626, 14.452, 156.808, 2.860, 4.975, 0.288, 3.475, 3.524, 1.185, 2.614, 313.572], [14.450, 5.324, 2.324, 10.798, 115.543, 1.063, 1.835, 0.236, 2.812, 8.268, 1.657, 3.978, 731.918]])
}

## Glass

In [None]:
df = pd.read_csv('glass.csv')
data, labels = df.iloc[:, :-1].values, df.iloc[:, -1].values
noc = len(np.unique(labels))
C_best = {
   'report_bh': np.asarray([[1.51474, 14.59500, 0.06789, 2.25305, 73.29150, 0.00937, 8.71261, 1.01385, -0.01161], [1.52117, 13.79589, 3.55131, 0.95428, 71.84335, 0.19175, 9.54099, 0.08156, 0.00710], [1.51745, 13.31326, 3.59522, 1.42358, 72.67659, 0.57686, 8.20015, -0.00741, 0.03106], [1.51326, 13.01074, -0.00358, 3.02527, 70.66960, 6.22227, 6.94351, -0.00710, -0.00041], [1.51743, 12.85016, 3.45851, 1.30894, 73.02754, 0.60704, 8.58511, 0.02745, 0.05789], [1.52095, 13.02689, 0.26652, 1.51925, 72.75985, 0.35290, 11.95589, -0.04668, 0.03072]]),
   'bh': np.asarray([[1.523, 13.305, 3.020, 2.195, 72.693, 2.201, 9.734, 0.939, 0.276], [1.518, 14.215, 3.066, 2.046, 71.789, 1.707, 10.576, 0.970, 0.231], [1.521, 12.349, 2.250, 1.126, 73.202, 1.175, 10.913, 1.403, 0.297], [1.519, 14.941, 1.919, 2.003, 72.113, 2.191, 8.563, 1.805, 0.300], [1.526, 13.190, 3.412, 1.522, 72.512, 1.194, 8.402, 0.587, 0.135], [1.529, 13.265, 2.718, 1.620, 70.936, 2.665, 11.614, 1.322, 0.219]]),
   'jde': np.asarray([[1.529, 16.186, 1.588, 2.817, 72.258, 0.000, 8.043, 0.824, 0.374], [1.532, 10.730, 0.495, 0.768, 71.836, 3.082, 8.435, 1.831, 0.504], [1.533, 16.842, 4.488, 3.237, 71.848, 5.223, 10.575, 0.154, 0.416], [1.516, 14.662, 4.253, 0.507, 72.073, 0.000, 7.095, 0.072, 0.114], [1.524, 11.501, 2.029, 1.710, 75.310, 3.203, 15.281, 1.354, 0.353], [1.512, 11.318, 3.625, 0.290, 75.101, 4.514, 5.430, 0.544, 0.085]])
}

## CMC

In [None]:
df = pd.read_csv('cmc.csv')
data, labels = df.iloc[:, :-1].values, df.iloc[:, -1].values
noc = len(np.unique(labels))
C_best = {
   'report_bh': np.asarray([[24.42273, 3.03421, 3.51476, 1.79348, 0.92053, 0.82924, 2.29826, 2.95830, 0.02510], [43.63258, 2.99608, 3.45429, 4.57393, 0.82686, 0.83295, 1.82888, 3.47833, 0.11822], [33.49565, 3.13181, 3.56438, 3.64850, 0.79404, 0.66550, 2.09068, 3.29362, 0.06771]]),
   'pso': np.asarray([[25.671, 2.096, 4.000, 1.963, 0.913, 0.700, 1.000, 3.505, 0.234], [46.014, 4.000, 4.000, 16.000, 0.000, 1.000, 2.017, 3.675, 1.000], [46.394, 2.021, 2.468, 15.206, 1.000, 0.000, 4.000, 3.004, 0.145]])
}

# Benchmark preparation

## Basic

In [None]:
benchmark = Clustering(data)

## Min

In [None]:
benchmark = ClusteringMin(data)

## Min with penalty

In [None]:
benchmark = ClusteringMinPenalty(data)

## Clustering for classification

In [None]:
benchmark = ClusteringClassification(data, labels)

# Klasifikacija

In [None]:
funcs, C, nFES = [kmeans, de, jde, pso, clpso, ovcpso, ba, bh], {}, 1000
for algo in funcs: algo(data, C, benchmark, nFES); print ()
lt = labeltransform(labels)
gl = groupdatabylabel(data, labels, lt)
for k, V in C.items():
   l, ok = clusters2labels(V, gl), 0
   for i, d in enumerate(data): ok += 1 if lt.inverse_transform([l[classifie(d, V)]])[0] == labels[i] else 0
   print ('Sucess of %s: %f' % (k, ok / len(data)))

In [None]:
lt = labeltransform(labels)
gl = groupdatabylabel(data, labels, lt)
for k, V in C_best.items():
   l, ok = clusters2labels(V, gl), 0
   for i, d in enumerate(data): ok += 1 if lt.inverse_transform([l[classifie(d, V)]])[0] == labels[i] else 0
   print ('Sucess of %s: %f' % (k, ok / len(data)))