Fuzzy classification

In [1]:
import numpy as np

In [2]:
# lets suppose we have a fuzzy set formed by composition, describing 5 data points
R = np.array([
    [1, .8, 0, .1, .2],
    [.8, 1, .4, 0, .9],
    [0, .4, 1, 0, 0],
    [.1, 0, 0, 1, .5],
    [.2, .9, 0, .5, 1],
])

In [3]:
# Lets create a composition max-min matrix (R_t . R_t)
test = np.array([
    [
        max(
            min(
                R[i][k], R[k][j]
            ) for k in range(R.shape[1])
        ) for j in range(R.shape[0])
    ] for i in range(R.shape[1])
])
print(test)

[[1.  0.8 0.4 0.2 0.8]
 [0.8 1.  0.4 0.5 0.9]
 [0.4 0.4 1.  0.  0.4]
 [0.2 0.5 0.  1.  0.5]
 [0.8 0.9 0.4 0.5 1. ]]


In [4]:
# Now, creating a random matrix
rng = np.random.default_rng()
test_random = rng.random((4, 5)).round(1)
print(test_random)

[[0.  0.5 0.9 1.  0.3]
 [0.4 0.7 0.7 0.2 0.4]
 [0.3 0.2 1.  0.3 0.1]
 [0.6 0.5 0.6 0.9 0.3]]


In [5]:
def fuzzy_equivalence_relations(
    test_composed: np.array,
    R_t: np.array
) -> dict:

    """
    Divide the universe X into mutually exclusive classes
    finding by lambda-cuts the fuzzy equivalence relations

    Args
        test_composed (np.array): composition of values
        R_t (np.array): universe X

    Returns
        dict: equivalence relations by lambda-cuts
    """
        
    classification = dict()
    
    for lambda_i in np.unique(R_t):
    
        R_lambda_composed = np.zeros((4, 5))
        R_lambda_random = np.zeros((4, 5))
        
        for row in range(R_t.shape[1] - 1):
            for column in range(R_t.shape[0] - 1):
                
                if (test_composed[row][column] == R_t[row][column] and test_composed[row][column] == 1):
                    R_lambda_composed[row][column] = 1
                elif (test_composed[row][column] == R_t[row][column] and test_composed[row][column] == lambda_i):
                    R_lambda_composed[row][column] = 1
        
        print(f"\nR (Lambda: {lambda_i})")
        print(R_lambda_composed)
    
        rows = list()
        relationships = list()
        
        for row in range(R_lambda_composed.shape[1] - 1):
    
            lambda_row = R_lambda_composed[row].tolist()
    
            if lambda_row not in rows:
                rows.append(lambda_row)
    
            else:
                idx = rows.index(lambda_row)
                new_idx = len(rows)
                relationship = [idx, new_idx]
    
                if relationship not in relationships:
                    relationships.append(relationship)
    
                else:
                    idx = relationships.index(relationship)
                    relationship[idx].append(new_idx)
            
        classification[f'{lambda_i}'] = relationships
        
    return {key: val for key, val in classification.items() if len(val) > 0}

In [6]:
r_t = fuzzy_equivalence_relations(test, R)
print("\nClassification")
print(r_t)


R (Lambda: 0.0)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 1. 0.]
 [0. 0. 1. 1. 0.]]

R (Lambda: 0.1)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.2)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.4)
[[1. 0. 0. 0. 0.]
 [0. 1. 1. 0. 0.]
 [0. 1. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.5)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.8)
[[1. 1. 0. 0. 0.]
 [1. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.9)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 1.0)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

Classification
{'0.0': [[2, 3]], '0.4': [[1, 2]], '0.8': [[0, 1]]}


In [7]:
data = np.array([
    [.3, .2, .1, .7, .4],
    [.6, .4, .6, .2, .6],
    [.1, .4, .3, .1, 0]
])

In [8]:
# cosine amplitude approach

def cosine_amplitude(
    np_array: np.array
) -> np.array:

    """
    Each element of a relation, r_ij, results from a pairwise comparison of two data samples, x_i, x_j, 
    where the strength of the relationship between data sample x_i and data sample x_j is given 
    by the membership value expressing that strength: r_ij = mu_r(x_i, y_i)
    The relation matrix will be of size n x n and, as will be the case for all similarity relations, 
    the matrix will be reflexive and symmetric, hance a tolerance relation.

    Args
        np_array (np.array): array with data

    Returns
        relation matrix
    """
    n = np_array.shape[0]
    R_ij = np.zeros((n, n))

    for i in range(np_array.shape[0]):
        for j in range(np_array.shape[0]):

            if j >= i:
                num = 0
                den_i = 0
                den_j = 0
                
                for k in range(np_array.shape[1]):
                    
                    num += np_array[i][k]*np_array[j][k]
                    den_i += np_array[i][k]**2
                    den_j += np_array[j][k]**2
                
                R_ij[i][j] = num / np.sqrt(den_i * den_j)
                R_ij[j][i] = num / np.sqrt(den_i * den_j)

    return R_ij

In [9]:
r_ij = cosine_amplitude(data.T)
print(r_ij)

[[1.         0.83550442 0.91304348 0.6821865  0.98143298]
 [0.83550442 1.         0.93379906 0.58969198 0.73960026]
 [0.91304348 0.93379906 1.         0.44141479 0.81786082]
 [0.6821865  0.58969198 0.44141479 1.         0.75485136]
 [0.98143298 0.73960026 0.81786082 0.75485136 1.        ]]


In [10]:
# max_min composition 3 times <-> 3 parameters

def max_min_composition(
    np_array: np.array
) -> np.array:

    """
    A way (composition) to find a relation T that relates 
    the same elements in universe X that R countains 
    to the same elements in universe Z that S countains.

    NOTE:
    This case is exclusive for R . R composition
    Don't use for R . S compositions
    """

    return np.array([
        [
            max(
                min(
                    np_array[i][k], np_array[k][j]
                ) for k in range(np_array.shape[1])
            ) for j in range(np_array.shape[0])
        ] for i in range(np_array.shape[1])
    ])

In [11]:
r_2 = max_min_composition(r_ij)
r_3 = max_min_composition(r_2)
print("\nComposition R3")
print(r_3)


Composition R3
[[1.         0.91304348 0.91304348 0.75485136 0.98143298]
 [0.91304348 1.         0.93379906 0.75485136 0.91304348]
 [0.91304348 0.93379906 1.         0.75485136 0.91304348]
 [0.75485136 0.75485136 0.75485136 1.         0.75485136]
 [0.98143298 0.91304348 0.91304348 0.75485136 1.        ]]


In [12]:
equivalence = fuzzy_equivalence_relations(r_3, r_ij)
print("\nClassification")
print(equivalence)


R (Lambda: 0.44141479464782046)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.5896919751144688)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.6821865008193588)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.7396002616336387)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.7548513560963972)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.8178608201095308)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.8355044182110836)
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.9130434782608697)
[[1. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 [1. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.9337990556476817)
[[1. 0. 0. 0. 0.]
 [0. 1. 1. 0. 0.]
 [0. 1. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]

R (Lambda: 0.9814329841314368)
[[1. 0. 0. 0. 0.]
 [0.

FUZZY CLUSTERING

In [13]:
n = 5
c = 2

In [14]:
from scipy.special import stirling2
# for calculation of "N subset K"

In [15]:
def unique_c_partitions(
    n: int,
    c: int
) -> int:

    """
    Calculates the unique C partitions

    Args
        n (int): number of points
        c (int): number of clusters

    Returns
        int: number of cluster partitions
    """

    return int(stirling2(n, c, exact=True))

In [16]:
n_Mc = unique_c_partitions(n, c)
print(n_Mc)

15


In [None]:
from typing import Union

In [None]:
def HCM_algo(
    U: np.array,
    v: np.array
) -> int:

    """
    within-class sum of squared errors approach
    that uses a Euclidean normalization to characteriza distances
    J(U, v)
    U = partition matrix
    v = vector of cluster centers
    """

    def euclidean_distance(
        x_k: Union[int, float],
        v_i: Union[int, float]
    ) -> Union[int, float]:
        
        """
        Measure (in m-dimensional feature space, Rm) between the x_k and v_i
        x_k = kth data sample
        v_i = ith cluster center
        """
    
