In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from collections import Counter
from KC_methods import methods

In [None]:
def discretize(arr):
    return [1 if num >= 0.5 else 0 for num in arr]

def to_bin(arr):
    return ''.join(list(map(str,arr)))

def f(x,r):
    return r * x * (1-x)

def logistic_map(x_0,r,iterations,epsilon=0,e_freq=0):
    res = [x_0]
    cur = x_0
    kicks = np.random.rand(iterations) < e_freq
    epsilons = np.random.uniform(-epsilon,epsilon,size=iterations)
    for i,is_kick in enumerate(kicks):
        cur = r * cur * (1-cur)

        # When kick occurs
        if is_kick:
            added_eps = epsilons[i]
            # Keep sampling for epsilon until we get a valid value
            while  0>= cur + added_eps or cur + added_eps>=1:
                added_eps = np.random.uniform(-epsilon,epsilon)
            cur += added_eps
        assert 0<= cur <=1, f"Cur os {cur}"
        res.append(cur)
    return res

In [None]:
SAMPLES = 10**5

# @njit
def generate_patterns(iterations,epsilon=0,e_freq=0,r=-1):

    '''Return list of patterns  from random `r` and `x_0`. patterns are of length `iterations`
        If r parameter is provided it will use that as the initial value of r, otherwise it will be random.
    
    '''
    r_is_random = r == -1
    patterns = [''] *(SAMPLES) 
    # patterns = np.empty(SAMPLES,dtype='object')
    for i in range(SAMPLES):
        if (r_is_random):
            r = random.uniform(0,4)
        x_0 = np.random.rand()
        mapp = logistic_map(x_0,r,iterations,epsilon=epsilon,e_freq=e_freq)
        patterns[i] = to_bin(discretize(mapp))
    return patterns


In [None]:
def generate_logistic(iterations,start_iteration=0,epsilon=0,e_frequency=0,r=-1):
    ''' Wrapper function for logicstic map generation with kick.

    Parameters:
        `iterations`: the number of iterations that will be taken into account, this will determine the length of the pattern.
        `start_iteration`: after what iteration to start observing the pattern. defaults to `0`.
        `epsilon`: The magnitude of the random kick applied, when 0.5 is passed, the kick will be between -0.5,0.5. default to 0
        `e_frequency`: Nonnegative number determining the frequency of the kick. defaults to 0.
                       If the number is in the range [0,1), this is a probability.
    '''
                       # Otherwise it represents the frequency of the kick (1 means every time, 2 means every two time [nokick,kick,nokick,kick]) NOT IMPLEMENTED
    patterns = generate_patterns(start_iteration+iterations,epsilon=epsilon,e_freq=e_frequency,r=r)
    patterns = [ pattern[start_iteration:] for pattern in patterns]
    return patterns

In [None]:
def calc_SB(probability_values,complexities):
     df = pd.DataFrame({"y":probability_values,'x':complexities})
     groupmax = df.groupby('x').max().reset_index()
     return np.polyfit(groupmax['x'],np.log10(groupmax['y']),1);

In [None]:
n_iterations = 25
start_iteration=0
epsilon = 0.5
e_frequency = 0.1
r =3

In [None]:
# # # Generate figure for each epsilon value
# # # %prun generate_logistic(n_iterations,start_iteration=1000,epsilon=epsilon,e_frequency=e_frequency,r=r)
# patterns = generate_logistic(n_iterations,start_iteration=start_iteration,epsilon=epsilon,e_frequency=e_frequency,r=r)
# counts = Counter(patterns)

# probability = counts.copy()
# probability = {k: v/SAMPLES for k,v in probability.items()}

# probability_values =  list(probability.values())
# complexities = list(map(comp_from_ctw,probability.keys()))
# plt.figure()
# plt.scatter(complexities,np.log10(probability_values))
# plt.xlabel(r'$\mathit{\tilde{K}(x)}$',fontsize=15)
# plt.ylabel(r'$\mathit{\log_{10} P(x)}$',fontsize=15)
# x = [min(complexities),max(complexities)]
# y = np.log10([max(probability_values),min(probability_values)])
# plt.plot(x,y,c="k")

# # # # plt.savefig(f"fixed_r/{r}.png")

In [None]:
# # Generate figure for each epsilon value
# # %prun generate_logistic(n_iterations,start_iteration=1000,epsilon=epsilon,e_frequency=e_frequency,r=r)
# patterns = generate_logistic(n_iterations,start_iteration=start_iteration,epsilon=epsilon,e_frequency=e_frequency,r=r)
# counts = Counter(patterns)

# probability = counts.copy()
# probability = {k: v/SAMPLES for k,v in probability.items()}

# probability_values =  list(probability.values())
# complexities = list(map(calc_KC76,probability.keys()))
# plt.figure()
# plt.scatter(complexities,np.log10(probability_values))
# # # plt.savefig(f"fixed_r/{r}.png")

In [None]:

# methods = ["LZ76","LZ78","CTW"]
# # plt.figure()
# plt.figure()
# results = []
# method = "PLT"
# for start_iteration in [0,1000]:
#     for epsilon in [0.01,0.1,0.25,0.4,0.5,0.75]:
#         for e_frequency in [0.1,0.5,1]:
#             for r in [-1,2,2.5,3,3.65]:
#                 patterns = generate_logistic(n_iterations,start_iteration=start_iteration,epsilon=epsilon,e_frequency=e_frequency,r=r)
#                 probability = Counter(patterns)
#                 probability = {k: v/SAMPLES for k,v in probability.items()}
#                 probability_values =  list(probability.values())
#                 complexities = list(map(calc_plotnik,probability.keys()))
#                 df = pd.DataFrame({"y":probability_values,'x':complexities})
#                 groupmax = df.groupby('x').max().reset_index()
#                 ar = calc_SB(groupmax['x'],np.log10(groupmax['y']))#
#                 slope = ar[0]
#                 plt.clf()
#                 plt.scatter(complexities,np.log10(probability_values))
#                 # plt.title(f"start={start_iteration}, eps={epsilon}, e_freq={e_frequency}, r={r}, KC_method={method}")
#                 plt.xlabel(r'$\mathit{\tilde{K}(x)}$',fontsize=15)
#                 plt.ylabel(r'$\mathit{\log_{10} P(x)}$',fontsize=15)
#                 x = [min(complexities),max(complexities)]
#                 y = np.log10([max(probability_values),min(probability_values)])
#                 plt.plot(x,y,c="k")
#                 plt.savefig(f"results/output/{method}/{start_iteration}_{epsilon}_{e_frequency}_{r}.jpg")
                
#                 row = [ start_iteration,epsilon,e_frequency,r,slope] 
#                 print(row)
#                 results.append(row)
#                 del patterns 
#                 del probability 
#                 del probability_values 
#     pd.DataFrame(results,columns=["start_iteration","epsilon","e_frequency","r","slope_CTW"]).to_csv(f"results{method}.csv")

In [None]:
methods = ["LZ76","LZ78","CTW","PLT"]
# plt.figure()

offsets= {
    "LZ76":2,
    "LZ78":6,
    "CTW":2,
    "PLT":-2
}

results = []
plt.figure()
# for start_iteration in [0,1000]:
#     for epsilon in [0.1,0.25,0.4,0.5,0.75]:
#         for e_frequency in [0.1,0.5,1]:
#             for r in [-1,2,2.5,3,3.65]:
for start_iteration in [0]:
    for epsilon in [0]:
        for e_frequency in [0]:
            for r in [-1,2,2.5,3,3.65]:
                patterns = generate_logistic(n_iterations,start_iteration=start_iteration,epsilon=epsilon,e_frequency=e_frequency,r=r)
                probability = Counter(patterns)
                probability = {k: v/SAMPLES for k,v in probability.items()}
                probability_values =  list(probability.values())
                slopes = [-1,-1,-1,-1]
                for i, KC_method in enumerate([calc_KC76,calc_KC78,comp_from_ctw,calc_plotnik]):
                    method = methods[i]
                    complexities = list(map(KC_method,probability.keys()))
                    slope, intercept = calc_SB(probability_values,complexities)
                    slopes[i] = slope
                    plt.clf()
                    log_probabilities = np.log10(probability_values)
                    plt.scatter(complexities,log_probabilities)
                    x = np.array([min(complexities),max(complexities)])
                    y = np.log10([max(probability_values),min(probability_values)])
                    k =np.log10(2**(-x))  
                    offset = abs(min(log_probabilities)-min(k))
                    if (k[1] > y[1]):
                        offset = - offset
                    y_k = k + offset

                    plt.plot(x,y,c="k",label='fit')
                    plt.plot(x,y_k,c='orange',label=r"$2^{-k}+offset$")
                    # plt.plot(x,y_k-offset,c='green',label=r"$2^{-k}$")
                    plt.legend(loc="upper right")
                    plt.title(f"start={start_iteration}, eps={epsilon}, e_freq={e_frequency}, r={'random' if r == -1 else r}, KC_method={method}")
                    plt.xlabel(r'$\mathit{\tilde{K}_{%s}(x)}$' % method,fontsize=15)
                    plt.ylabel(r'$\mathit{\log_{10} P(x)}$',fontsize=15)
                    plt.show()
                    # plt.savefig(f"results/output/{method}/{start_iteration}_{epsilon}_{e_frequency}_{r}.jpg",bbox_inches = "tight")
                
                row = [ start_iteration,epsilon,e_frequency,r,*slopes]
                print(row)
                results.append(row)
                del patterns 
                del probability 
                del probability_values 