In [14]:
import skfuzzy
import numpy as np
import matplotlib.pyplot as plt
from utils.datastore import *

In [15]:
ds = DataStore()
D = ds.build_datasets(labeled=False)

In [69]:
# Hyperparameters:
metric = 'cityblock'
c = 7
epsilon = 1e-06
T = 100
m_list = [1.1,1.6,2.0]
niter = 50

all_results = []

for Di in D:
    results = []
    for i in range(niter):
        for m in m_list:
            results.append(
                skfuzzy.cmeans(Di.transpose(), c, m, error=epsilon, metric=metric, 
                        maxiter=T, init=None)
            )
    all_results.append(results)        

In [87]:
# Sort results by objective function
for results in all_results:
    results.sort(key=lambda r: r[4][-1])

# Get the best
best_results = [ results[0] for results in all_results ]

In [88]:
# Modified Partition Coefficient
def modified_partition_coefficient(ncenters, partition_coefficient):
    return 1 - (ncenters/(ncenters+1))*(1-partition_coefficient)

# Partition Entropy
def partition_entropy(U):
    C, N = U.shape
    s = 0
    for i in range(C):
        for j in range(N):
            s += U[i][j] * np.log(U[i][j])
    return -(1/N)*s
    
for i in range(3):
    print("Partition Entropy D{0}: {1}".format(i,partition_entropy(best_results[i][1])))
    print("Modified Partition Coefficient D{0}: {1}\n".format(i,modified_partition_coefficient(7,best_results[i][-1])))
    

Partition Entropy D0: 0.23285033666164226
Modified Partition Coefficient D0: 0.8860205697375879

Partition Entropy D1: 0.5992513522741237
Modified Partition Coefficient D1: 0.7533267885183923

Partition Entropy D2: 0.6925175063317678
Modified Partition Coefficient D2: 0.7128704579554344



In [125]:
# Crisp partitions

def build_crisp_partition(U):
    C = []
    M,N = U.shape
    for j in range(N):
        cut_value = np.sort(U[:,j])[-2]
        C.append(skfuzzy.lambda_cut(U[:,j],cut_value))
    return C

for idx, best_result in enumerate(best_results):
    C = build_crisp_partition(best_result[1])
    print("Crisp partition D{0}: {1}".format(idx,np.sum(C,axis=0)))

Crisp partition D0: [ 205    8 2069    3    5    4   14]
Crisp partition D1: [311 312 265 175 647 443 155]
Crisp partition D2: [451 299 329   7 258 655 309]


In [None]:
# Adjusted Rand Index