Pruebas de Codigo RAFA
Transcripcion de codigo a python

In [2]:
import numpy as np
from scipy.special import psi

def findpt(c, target):
    left = 0
    right = len(c) - 1

    if target < c[left]:
        return 0.5
    elif target > c[right]:
        return right + 0.5

    while left != right:
        pt = (left + right) // 2
        if c[pt] < target:
            left = pt
        else:
            right = pt

        if left + 1 == right:
            if c[left] == target:
                return left
            elif c[right] == target:
                return right
            else:
                return (right + left) / 2
    return pt

def discrete_continuous_info_fast(d, c, k=3, base=np.e):
    first_symbol = []
    symbol_IDs = np.zeros(len(d), dtype=int)
    c_split = []
    cs_indices = []
    num_d_symbols = 0

    # Sort the lists by the continuous variable 'c'
    c = np.sort(c)
    d = d[np.argsort(c)]

    # Bin the continuous data 'c' according to the discrete symbols 'd'
    for c1 in range(len(d)):
        symbol_IDs[c1] = num_d_symbols + 1
        for c2 in range(num_d_symbols):
            if d[c1] == d[first_symbol[c2]]:
                symbol_IDs[c1] = c2 + 1
                break
        if symbol_IDs[c1] > num_d_symbols:
            num_d_symbols += 1
            first_symbol.append(c1)
            c_split.append([])
            cs_indices.append([])

        c_split[symbol_IDs[c1] - 1].append(c[c1])
        cs_indices[symbol_IDs[c1] - 1].append(c1)

    # Compute the neighbor statistic for each data pair (c, d) using the binned c_split list
    m_tot = 0
    av_psi_Nd = 0
    V = np.zeros(len(d))
    psi_ks = 0

    for c_bin in range(num_d_symbols):
        one_k = min(k, len(c_split[c_bin]) - 1)

        if one_k > 0:
            for pivot in range(len(c_split[c_bin])):
                left_neighbor = pivot
                right_neighbor = pivot
                one_c = c_split[c_bin][pivot]
                for ck in range(one_k):
                    if left_neighbor == 0:
                        right_neighbor += 1
                        the_neighbor = right_neighbor
                    elif right_neighbor == len(c_split[c_bin]) - 1:
                        left_neighbor -= 1
                        the_neighbor = left_neighbor
                    elif abs(c_split[c_bin][left_neighbor - 1] - one_c) < abs(c_split[c_bin][right_neighbor + 1] - one_c):
                        left_neighbor -= 1
                        the_neighbor = left_neighbor
                    else:
                        right_neighbor += 1
                        the_neighbor = right_neighbor

                distance_to_neighbor = abs(c_split[c_bin][the_neighbor] - one_c)

                if the_neighbor == left_neighbor:
                    m = int(findpt(c, one_c + distance_to_neighbor) - findpt(c, c_split[c_bin][left_neighbor]))
                else:
                    m = int(findpt(c, c_split[c_bin][right_neighbor]) - findpt(c, one_c - distance_to_neighbor))
                if m < one_k:
                    m = one_k

                m_tot += psi(m)
                V[cs_indices[c_bin][pivot]] = 2 * distance_to_neighbor

        else:
            m_tot += psi(num_d_symbols * 2)
            V[cs_indices[c_bin][0]] = 2 * (c[-1] - c[0])

        p_d = len(c_split[c_bin]) / len(d)
        av_psi_Nd += p_d * psi(p_d * len(d))
        psi_ks += p_d * psi(max(one_k, 1))

    f = (psi(len(d)) - av_psi_Nd + psi_ks - m_tot / len(d)) / np.log(base)
    return f, V



In [4]:
# Ejemplo de uso
d = np.random.randint(0, 2, 100)
c = np.random.randint(0, 2, 100)
f, V = discrete_continuous_info_fast(c, d)
print("f:", f)
print("V:", V)

f: 0.696371218156502
V: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]


In [5]:
# Ejemplo de uso
d = np.random.randint(0, 2, 100)
c = np.random.randint(0, 2, 100)
f, V = discrete_continuous_info_fast(c, c)
print("f:", f)
print("V:", V)

f: 0.6949689718641052
V: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]


In [6]:
# Ejemplo de uso
d = np.random.randint(0, 2, 100)
c = np.random.randint(0, 2, 100)
f, V = discrete_continuous_info_fast(d, d)
print("f:", f)
print("V:", V)

f: 0.696371218156502
V: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]


In [9]:
# Ejemplo de uso
d = np.random.randint(0, 2, 100000)
c = np.random.randint(0, 10, 100000)
f, V = discrete_continuous_info_fast(c, d)
print("f:", f)
print("V:", V)

f: 2.3025985322586444
V: [0. 0. 0. ... 0. 0. 0.]


In [10]:
# Ejemplo de uso
d = np.random.randint(0, 2, 100000)
c = np.random.randint(0, 10, 100000)
f, V = discrete_continuous_info_fast(d, c)
print("f:", f)
print("V:", V)

f: 0.6931516187859567
V: [0. 0. 0. ... 0. 0. 0.]
