In [1]:
import numpy as np 
from itertools import combinations, product
from scipy.special import softmax
import scipy as sp

In [2]:
def is_bad(i, j, k):
    if np.sum([i,j,k]) == 1:
        return True
    else:
        return False

In [3]:
def clustering_cost(i, j, k, s_i, s_j, s_k):
    return np.abs(np.minimum(0, i*s_i)) + np.abs(np.minimum(0, j*s_j)) + np.abs(np.minimum(0, k*s_k))

In [4]:
def prob_clustering(s_i, s_j, s_k, beta=1):
    clustering_costs = []
    for (i, j, k) in product([-1, 1], repeat=3):
        if is_bad(i, j, k):
            continue
        clustering_costs.append(clustering_cost(i, j, k, s_i, s_j, s_k))
    
    clustering_probs = softmax(-beta*np.array(clustering_costs))

    probs = {}
    l = 0
    for (i, j, k) in product([-1, 1], repeat=3):
        if is_bad(i, j, k):
            continue
        else:
            probs[(i, j, k)] = clustering_probs[l]
            l += 1
    return probs, clustering_costs

In [5]:
def marginal_prob(ind, val, probs):
    marginal_probability = 0
    if len(ind) == 1:
        for (i, j, k) in probs.keys():
            if ind[0] == 0:
                if i == val[0]:
                    marginal_probability += probs[(i, j, k)]
            elif ind[0] == 1:
                if j == val[0]:
                    marginal_probability += probs[(i, j, k)]
            elif ind[0] == 2:
                if k == val[0]:
                    marginal_probability += probs[(i, j, k)]
    elif len(ind) == 2:
        for (i, j, k) in probs.keys():
            if ind[0] == 0 and ind[1] == 1:
                if i == val[0] and j == val[1]:
                    marginal_probability += probs[(i, j, k)]
            if ind[0] == 0 and ind[1] == 2:
                if i == val[0] and k == val[1]:
                    marginal_probability += probs[(i, j, k)]
            if ind[0] == 1 and ind[1] == 2:
                if j == val[0] and k == val[1]:
                    marginal_probability += probs[(i, j, k)]
    return marginal_probability



In [183]:
def info_gain2(e, i, j, k, beta):
    probs, costs = prob_clustering(i, j, k, beta=beta)
    entropy = sp.stats.entropy(list(probs.values()))    
    cond_entropy = 0
    for e_val in [-1, 1]:
        e_entropy = 0
        #for (ii, jj, kk) in probs.keys():
        for ijk in probs.keys():
            cond_prob_sum = 0
            cond_prob = probs[ijk] * (ijk[e] == e_val)
            #for (ii2, jj2, kk2) in probs.keys():
            for ijk2 in probs.keys():
                if ijk2[e] == e_val:
                    cond_prob_sum += probs[ijk2]
            cond_prob = cond_prob / cond_prob_sum
            marg_prob = marginal_prob([e], [e_val], probs)
            #print(cond_prob)
            #cond_entropy -= marg_prob*cond_prob * np.log(cond_prob+0.00000001)
            e_entropy -= cond_prob * np.log(cond_prob+0.00000001)
        print(e_val, e_entropy)
        cond_entropy += marg_prob*e_entropy
            
    print(probs)
    print("e1 marginal: ", marginal_prob([0], [1], probs))
    print("e2 marginal: ", marginal_prob([1], [1], probs))
    print("e3 marginal: ", marginal_prob([2], [1], probs))
    print("entropy: ", entropy)
    print("cond_entropy: ", cond_entropy)
    return entropy - cond_entropy

In [184]:
p = 0
entropy = -p*np.log(p+0.0000000000000001) - (1-p)*np.log(1-p+0.0000000000000000001)
entropy

-0.0

In [185]:
inf_gain = info_gain2(e=0, i=0, j=-1, k=-1, beta=1)
print(inf_gain)

-1 0.9753277991662225
1 0.36533383508720807
{(-1, -1, -1): 0.3482992693129455, (-1, -1, 1): 0.1281321405552681, (-1, 1, -1): 0.1281321405552681, (1, -1, -1): 0.3482992693129455, (1, 1, 1): 0.04713718026357273}
e1 marginal:  0.39543644957651825
e2 marginal:  0.1752693208188408
e3 marginal:  0.1752693208188408
entropy:  1.4052318411170015
cond_entropy:  0.7341139517477109
0.6711178893692906


In [119]:
inf_gain = info_gain2(e=0, i=-0.2, j=1, k=1, beta=1)
print(inf_gain)

-1 1.0173571775552153
1 0.36533383508720807
{(-1, -1, -1): 0.07516004274497577, (-1, -1, 1): 0.20430617841987275, (-1, 1, -1): 0.20430617841987275, (1, -1, -1): 0.061535838397967335, (1, 1, 1): 0.4546917620173114}
e1 marginal:  0.5162276004152787
e2 marginal:  0.6589979404371842
e3 marginal:  0.6589979404371842
entropy:  1.37338517492598
cond_entropy:  0.6807647320582064
0.6926204428677736


In [11]:
inf_gain = info_gain2(e=0, i=1, j=1, k=1, beta=1)
print(inf_gain)

{(-1, -1, -1): 0.03419927913484093, (-1, -1, 1): 0.09296327901863667, (-1, 1, -1): 0.09296327901863667, (1, -1, -1): 0.09296327901863667, (1, 1, 1): 0.686910883809249}
e1 marginal:  0.7798741628278857
e2 marginal:  0.7798741628278857
e3 marginal:  0.7798741628278857
entropy:  1.0359282245806907
cond_entropy:  0.4960470009330862
0.5398812236476045


In [692]:
inf_gain = info_gain2(e=0, i=-0.1, j=-0.1, k=0.55, beta=1)
print(inf_gain)

{(-1, -1, -1): 0.1677290793574269, (-1, -1, 1): 0.29071693298038, (-1, 1, -1): 0.15176754709532272, (1, -1, -1): 0.15176754709532272, (1, 1, 1): 0.2380188934715477}
e1 marginal:  0.3897864405668704
e2 marginal:  0.3897864405668704
e3 marginal:  0.5287358264519277
entropy:  1.572557807692012
cond_entropy:  0.9039053183427251
0.6686524893492868


In [700]:
inf_gain = info_gain2(e=0, i=0.1, j=-0.55, k=-0.55, beta=2)
print(inf_gain)

{(-1, -1, -1): 0.21343723080861984, (-1, -1, 1): 0.07104708232078241, (-1, 1, -1): 0.07104708232078241, (1, -1, -1): 0.5801825460236903, (1, 1, 1): 0.06428605852612503}
e1 marginal:  0.6444686045498154
e2 marginal:  0.13533314084690745
e3 marginal:  0.13533314084690745
entropy:  1.1976767957499692
cond_entropy:  0.5468730657832203
0.650803729966749


In [124]:
probs, costs = prob_clustering(-0.5, 1, 1, beta=1)
expected_loss = np.sum(costs*np.array(list(probs.values())))
print("probs: ", probs)
print("entropy of clustering: ", sp.stats.entropy(list(probs.values())))
print("marginal prob: ", marginal_prob([0], [-1], probs))
print("expected loss: ", expected_loss)

probs:  {(-1, -1, -1): 0.08676952671672007, (-1, -1, 1): 0.23586402773805182, (-1, 1, -1): 0.23586402773805182, (1, -1, -1): 0.052628378282445196, (1, 1, 1): 0.38887403952473104}
entropy of clustering:  1.4157748680271673
marginal prob:  0.5584975821928237
expected loss:  0.9712750743780223


In [125]:
probs, costs = prob_clustering(-0.5, -1, -1, beta=1)
expected_loss = np.sum(costs*np.array(list(probs.values())))
print("probs: ", probs)
print("entropy of clustering: ", sp.stats.entropy(list(probs.values())))
print("marginal prob: ", marginal_prob([0], [-1], probs))
print("expected loss: ", expected_loss)

probs:  {(-1, -1, -1): 0.4124775207875908, (-1, -1, 1): 0.1517419998431209, (-1, 1, -1): 0.1517419998431209, (1, -1, -1): 0.25018026279992894, (1, 1, 1): 0.03385821672623858}
entropy of clustering:  1.3987932426544998
marginal prob:  0.7159615204738325
expected loss:  0.5132196729018027


In [127]:
probs, costs = prob_clustering(0.5, 1, 1, beta=1)
expected_loss = np.sum(costs*np.array(list(probs.values())))
print("probs: ", probs)
print("entropy of clustering: ", sp.stats.entropy(list(probs.values())))
print("marginal prob: ", marginal_prob([0], [-1], probs))
print("expected loss: ", expected_loss)

probs:  {(-1, -1, -1): 0.04933939754869739, (-1, -1, 1): 0.13411838778374088, (-1, 1, -1): 0.13411838778374088, (1, -1, -1): 0.08134691422206715, (1, 1, 1): 0.6010769126617537}
entropy of clustering:  1.1974298638232734
marginal prob:  0.31757617311617914
expected loss:  0.6883974856671004


In [59]:
marginal_prob([0], [-1], probs)

0.6038169741953447

In [187]:
probs = softmax([0,2,3])
probs

array([0.03511903, 0.25949646, 0.70538451])

In [116]:
probs[1] / (probs[1] + probs[2])

0.2689414213699951

In [117]:
probs = softmax([2,3])
probs

array([0.26894142, 0.73105858])

In [435]:
marginal_prob([0], [-1], probs)

0.731076429375734

In [408]:
marginal_prob([0, 2], [-1, -1], probs)

0.7310432415706374

In [76]:
marginal_prob([0], [1], probs)

0.3069411706206717

In [486]:
def information_gain(e, i, j, k, beta):
    if e == 0:
        e1, e2 = 1, 2
    if e == 1:
        e1, e2 = 0, 2
    if e == 2:
        e1, e2 = 0, 1
    probs, costs = prob_clustering(i, j, k, beta)
    #print(probs)
    marginal_e = marginal_prob([e], [1], probs)
    #print(marginal_e)
    marginal_e_dist = np.array([1-marginal_e, marginal_e])
    
    entropy_e = -np.sum(marginal_e_dist * np.log(marginal_e_dist))# / np.log(2) # divides by 1 is same as np.log(e)
    print(entropy_e)
    #print(sp.stats.entropy(marginal_e_dist))
    conditional_entropy = 0

    for (i, j, k) in probs.keys():
        e1_val = i if e1 == 0 else j if e1 == 1 else k
        e2_val = i if e2 == 0 else j if e2 == 1 else k
        #print(e1_val, e2_val, marginal_prob([e1, e2], [e1_val, e2_val], probs), probs[(i, j, k)])
        conditional_entropy -= probs[(i, j, k)] * np.log(probs[(i, j, k)] / marginal_prob([e1, e2], [e1_val, e2_val], probs))
    
    print(conditional_entropy)
    return entropy_e - conditional_entropy


In [495]:
info_gain = information_gain(e=0, i=0.1, j=-1, k=-1, beta=1)
print(info_gain)

0.6801545769159463
0.4870643297871179
0.19309024712882839


In [336]:
info_gain = information_gain(e=0, i=0.1, j=0.1, k=0.1, beta=1)
print(info_gain)

NameError: name 'information_gain' is not defined

In [494]:
info_gain = information_gain(e=0, i=-0.1, j=1, k=1, beta=1)
print(info_gain)

0.68975833788507
0.09395643018641417
0.5958019076986558


In [351]:
info_gain = information_gain(e=0, i=0.1, j=1, k=1, beta=1)
print(info_gain)

0.0927281617764718


In [225]:
import matplotlib.pyplot as plt
import numpy as np

def heatmap2d(arr: np.ndarray):
    plt.imshow(arr, cmap='viridis')
    plt.colorbar()
    plt.show()

In [313]:
def plot_heatmap(e, ks=[1], betas=[1]):
    n = 100
    data = np.zeros((n, n))
    ii = np.linspace(-1, 1, n)
    jj = np.linspace(-1, 1, n)

    fig, axes = plt.subplots(nrows=len(betas), ncols=len(ks), figsize=(20, 10))
    m = 0
    for beta in betas:
        l = 0
        for k in ks:
            for i in range(n):
                for j in range(n):
                    data[i, j] = info_gain2(e=e, i=ii[i], j=jj[j], k=k, beta=beta)
            img = axes[m, l].imshow(data, cmap='viridis')
            plt.colorbar(img, ax=axes[m, l])
            l += 1
        m += 1


    fig.tight_layout()
    plt.show()

In [None]:
plot_heatmap(e=2, ks=[-1, -0.9, -0.5, -0.1, 0, 0.1, 0.5, 0.9, 1], betas=[1, 2, 5])

In [59]:
triangles = np.random.uniform(-1, 1, (100000, 3))
triangles[np.abs(triangles) < 0.1] = np.sign(triangles[np.abs(triangles) < 0.1]) * 0.1

In [97]:
extended_similarity_values = [1, -1, 0, 0.1]

# Compute information gain for (a, b) in all configurations
ab_extended_info_gain_corrected = []

triangles = []
for s_ab, s_ac, s_bc in product(extended_similarity_values, repeat=3):
    triangles.append([s_ab, s_ac, s_bc])

In [98]:
triangles_info_gains = {}
for (i, j, k) in triangles:
    triangles_info_gains[(i, j, k)] = info_gain2(e=0, i=i, j=j, k=k, beta=10)

In [99]:
triangles_info_gains = {k: v for k, v in sorted(triangles_info_gains.items(), key=lambda item: -item[1])}

In [100]:
triangles_info_gains

{(0, 0, 0): 0.7725302239683676,
 (0.1, 0, 0): 0.7482000417111198,
 (0, 0, 0.1): 0.7291039910927394,
 (0, 0.1, 0): 0.7291039910927394,
 (0, 0.1, 0.1): 0.6845381716287489,
 (0.1, 0, 0.1): 0.6694619050630789,
 (0.1, 0.1, 0): 0.6694619050630789,
 (0.1, 0.1, 0.1): 0.5398812236476045,
 (0.1, -1, 0): 0.5137206988075713,
 (0.1, 0, -1): 0.5137206988075713,
 (0, -1, 0): 0.4884406296275041,
 (0, 0, -1): 0.4884406296275041,
 (1, 1, -1): 0.4883871592091301,
 (1, -1, 1): 0.4883871592091301,
 (-1, 1, 1): 0.4883256361248133,
 (0.1, 0.1, -1): 0.48654687782026473,
 (0.1, -1, 0.1): 0.4865468778202645,
 (0, 0.1, -1): 0.39639346479176185,
 (0, -1, 0.1): 0.39639346479176163,
 (0, 1, 0): 0.3469481375969082,
 (0, 0, 1): 0.3469481375969082,
 (0, -1, -1): 0.3468233000946832,
 (0, 1, 0.1): 0.3201827877013531,
 (0, 0.1, 1): 0.3201827877013531,
 (0.1, 1, 0): 0.32009106048888475,
 (0.1, 0, 1): 0.32009106048888475,
 (0.1, -1, -1): 0.31999546639062987,
 (0.1, 1, 0.1): 0.23685873785627454,
 (0.1, 0.1, 1): 0.2368587378