In [None]:
def compute_entropy(a):
    """
    Compute entropy of a.
    :param a: array; (1, n_values)
    :return float; 0 <= float
    """

    p = compute_probabilities(a)
    return -(p * np.log(p)).sum()


def compute_brier_entropy(a, n=1):
    """
    Compute brier entropy of a.
    :param a: array; (1, n_values)
    :param n: int;
    :return float; 0 <= float
    """
    
    p = compute_probabilities(a)
    p = np.sort(p)
    p = p[::-1]
    
    brier_error = 0
    for i in range(n):
        brier_error += (1 - p[i])**2 + np.sum([p[not_i]**2 for not_i in range(len(p)) if not_i != i])
    return brier_error


def compute_probabilities(a):
    """
    :return array; (1, n_values); 0 <= array <= 1
    """
    
    return a / a.sum()


def normalize_information_coefficients(a, method, clip_min=None, clip_max=None):
    """
    :param a: array; (n_rows, n_columns)
    :return array; (n_rows, n_columns); 0 <= array <= 1
    """
    
    if method == '0-1':
        return (a - a.min()) / (a.max() - a.min())
    
    elif method == 'p1d2':
        return (a + 1) / 2
    
    elif method == 'clip':
        return a.clip(clip_min, clip_max)
    
    else:
        raise ValueError('Unknown method {}.'.format(method))
c_x_gs = []
for i in range(3, 12):
    h_matrix = ccal.read_gct('../results/nmf/matrices/nmf_k{}_h.gct'.format(i))
    c_x_g = ccal.association.compute_similarity_matrix(h_matrix,
                                                       reference_dataset_signature_genes,
                                                       ccal.mathematics.information.information_coefficient,
                                                       axis=1)
    
    c_x_gs.append(c_x_g)

for m in c_x_gs:
    print(m.shape)
    m.to_csv('../results/nmf/for_entropy_{}.txt'.format(m.shape[0]))

import seaborn as sns
sns.set_style('white')

for cm in [0, 0.25, 0.5]:
    for sm in ['bs', 's']:
        for power in [1, 2, 3, 5]:
            s_sums = []
            for i, nmf_k in enumerate(range(3, 12)):
                c_x_g = c_x_gs[i]
                c_x_g = c_x_g.apply(normalize_information_coefficients, method='clip', clip_min=cm)

                c_x_g_power = c_x_g ** power

                if sm == 'bs':
                    s = c_x_g_power.apply(compute_brier_entropy, axis=1) / nmf_k
                elif sm == 's':
                    s = c_x_g_power.apply(compute_entropy, axis=1) / nmf_k

                s_sums.append(s.sum())

            fig = plt.figure(figsize = (16, 10))
            plt.plot(range(3, 12), s_sums)
            plt.suptitle('min={} method={} power={}'.format(cm, sm, power), size=30, weight='bold')
            plt.show()