In [22]:
import numpy as np
from sklearn.datasets import make_classification
from cop_kmeans import CopKMeans
import matplotlib.pyplot as plt
from active_semi_clustering.semi_supervised.pairwise_constraints import COPKMeans
from sklearn.metrics import mutual_info_score, adjusted_mutual_info_score, normalized_mutual_info_score

In [23]:
def without_constraints(X, y):
    model = CopKMeans(3, 200)

    model.fit(X, [], [])

    try:
        prediction = model.predict(X)
    except:
        return -1, -1, -1

    mi = mutual_info_score(y, prediction)
    nmi = normalized_mutual_info_score(y, prediction)
    ami = adjusted_mutual_info_score(y, prediction)
    
    print(f"Without constraints : {mi = }, {nmi = }, {ami = }")
    
    return mi, nmi, ami

In [24]:
def calculate_constraints(X, y):
    ml = []
    cl = []

    for i, pt1 in enumerate(y):
        for j, pt2 in enumerate(y[i+1:]):
            if pt1 == pt2 :
                ml.append((i, j+i+1))
            else:
                cl.append((i, j+i+1))
    
    # Sampling 10% of ml and 10% of cl
    ml, cl = np.array(ml), np.array(cl)
    ml_subset = ml[np.random.choice(len(ml), int(0.01*len(ml)))]
    cl_subset = cl[np.random.choice(len(cl), int(0.01*len(cl)))]

    ml_subset = [tuple(l) for l in ml_subset]
    cl_subset = [tuple(l) for l in cl_subset]
    
    return ml_subset, cl_subset

In [25]:
def our_cop(X, y, ml, cl):
    model = CopKMeans(3, 200)

    # model.fit(X, pairwise_constraints[0], pairwise_constraints[1])
    model.fit(X, ml, cl)

    try:
        prediction = model.predict(X)
    except:
        return -1, -1, -1

    mi = mutual_info_score(y, prediction)
    nmi = normalized_mutual_info_score(y, prediction)
    ami = adjusted_mutual_info_score(y, prediction)

    print(f"Our model : {mi = }, {nmi = }, {ami = }")
    
    return mi, nmi, ami

In [26]:
def theirs(X, y, ml, cl):

    model = COPKMeans(3, 200)

    try:
        model.fit(X, ml, cl)
        prediction = model.predict(X)
    except:
        return -1, -1, -1

    mi = mutual_info_score(y, prediction)
    nmi = normalized_mutual_info_score(y, prediction)
    ami = adjusted_mutual_info_score(y, prediction)

    print(f"Theirs {mi = }, {nmi = }, {ami = }")
    
    return mi, nmi, ami

In [27]:
# Dataset, algorithm, metric
scores = np.zeros((10, 3, 3))
for i, random_state in enumerate(list(range(10))):
    X, y = make_classification(n_samples=100, n_features=10, n_informative=10, n_redundant=0, n_classes=4, n_clusters_per_class=1, random_state=random_state)
    
    ml, cl = calculate_constraints(X, y)
    
    mi, nmi, ami = without_constraints(X, y)
    scores[i, 0] = [mi, nmi, ami]
    mi, nmi, ami = our_cop(X, y, ml, cl)
    scores[i, 1] = [mi, nmi, ami]
    mi, nmi, ami = theirs(X, y, ml, cl)
    scores[i, 2] = [mi, nmi, ami]

Itération 0
Itération 50
Itération 100
Itération 150
Without constraints : mi = 0.275428887070477, nmi = 0.2284699220268707, ami = 0.2078461198812633
Itération 0
67 [[ 3.66445625 -0.66305392  2.34717219 -0.0985317  -2.82522693 -3.71254615
   1.33004371 -0.60298383  0.34655109  0.94544628]]
FAIL
Itération 0
Itération 50
Itération 100
Itération 150
Without constraints : mi = 0.3183776882210797, nmi = 0.26460572320868364, ami = 0.24490281381261123
Itération 0
Itération 50
Itération 100
Itération 150
Our model : mi = 0.37916808778082817, nmi = 0.3056305860618092, ami = 0.28777735471258875
Itération 0
Itération 50
Itération 100
Itération 150
Without constraints : mi = 0.20156718332878623, nmi = 0.16838028286506215, ami = 0.1459310028070149
Itération 0
74 [[ 0.60214736  0.04117629  1.40650512 -0.52834433  1.35492984  0.49383318
  -0.80227593 -1.16841291 -0.96878728  2.66345099]]
FAIL
Itération 0
Itération 50
Itération 100
Itération 150
Without constraints : mi = 0.2851618024757442, nmi = 0.2

In [28]:
print(scores)

[[[ 0.27542889  0.22846992  0.20784612]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]]

 [[ 0.31837769  0.26460572  0.24490281]
  [ 0.37916809  0.30563059  0.28777735]
  [-1.         -1.         -1.        ]]

 [[ 0.20156718  0.16838028  0.145931  ]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]]

 [[ 0.2851618   0.23348086  0.21334517]
  [ 0.26189297  0.21229706  0.19189702]
  [-1.         -1.         -1.        ]]

 [[ 0.26229513  0.21138549  0.19111372]
  [ 0.3429411   0.27623941  0.25764712]
  [-1.         -1.         -1.        ]]

 [[ 0.31085076  0.26259318  0.24245154]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]]

 [[ 0.37396495  0.30264062  0.28461918]
  [ 0.39261447  0.31601204  0.29845844]
  [-1.         -1.         -1.        ]]

 [[ 0.35077701  0.28745565  0.26870226]
  [-1.         -1.         -1.        ]
  [-1.         -1.         -1.        ]]

 [[ 0.30311206  0.249074