# This script processes the tree related statistics generated by owm_stats.cpp  

1. Generate the file [CloudName].xyz.csv by compiling and running:
    ```
    make bin/stats
    cd scripts
    python3 run_stats.py 
    ```

run_stats.py uses the MinRadius values that result in minimum time without using memoization for OWM= [0.9,0.6,0.2,0.2] and the same for MaxNumber=[512,512,1024,512]

2. Collect the files generated in the cloud directory, with names [CloudName].xyz.csv

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

# object to save the histograms
class histo:
    def __init__(self):
        self.xi=[] # number of points per node
        self.ni=[] # number of nodes with xi points
        self.nleaf=0 # number of leaf nodes
        self.minRadius=0. # minimum radius of the tree
        self.area=0. # area of each leaf node
        self.maxh=0 # maximum number of points per node
        self.density=0. # density of the cloud
        self.odensity=0. # computed density of the cloud

    def compute_params(self):
        # compute the area of each leaf node
        self.area = (2*self.minRadius)**2
        # compute the mean density of the cloud
        vdensity = [x*y/self.area for x,y in zip(self.xi, self.ni)]
        self.odensity = sum(vdensity)/sum(self.ni)
        # check that the number of leaf nodes is the same as the sum of the histogram
        assert self.nleaf==sum(self.ni), 'Error: nleaf different from sum(ni)'

def readstats(filename):
    f=open(filename)
    lines=f.readlines()
    f.close()

    newhisto = histo()
    # get the number of leaf nodes created
    newhisto.nleaf = int(lines[1].split()[-1]) # line 2
    # get minRadius
    newhisto.minRadius = float(lines[2].split()[1]) # line 3
    # get the maximum number of points per node
    newhisto.maxh = int(lines[8].split()[1]) # line 9
    # get the estimated density of the cloud
    newhisto.density = float(lines[15].split()[-1]) # line 16
    # from line 20 to the line with maxh, read the histogram
    for line in lines[19:]:
        line=line.split()
        newhisto.xi.append(int(line[0]))
        newhisto.ni.append(int(line[1]))
        # stop when the maximum number of points per node is reached
        if int(line[0])==newhisto.maxh:
            break

    # compute hist parameters
    newhisto.compute_params()

    return newhisto

clouds=['Alcoy','Arzua','BrionF','BrionU']
minRadius = [2.0, 0.9, 0.6, 0.1]

# get the hostname
hostname = os.popen("hostname").read().strip()
# check minRadius and maxNumber directories
maxNum_dir = f'{hostname}/treeStatistics/MaxNumHisto-{hostname}'
minRad_dir = f'{hostname}/treeStatistics/MinRadHisto-{hostname}'

all_histos = {}
# all_nph=[]
for cloud in clouds:
    all_histos[cloud] = {}
    for mR in minRadius:
        print('Cloud:',cloud)
        all_histos[cloud][mR] = readstats(f"{minRad_dir}/{cloud}H_hist_mR{mR}.csv")
        print('nleaf:',all_histos[cloud][mR].nleaf)
        print('minRadius:',all_histos[cloud][mR].minRadius)
        print('maxh:',all_histos[cloud][mR].maxh)
        print('density:',all_histos[cloud][mR].density)
        print('odensity:',all_histos[cloud][mR].odensity)
        # all_nph.append(np.zeros(newhisto.maxh+1))
        # for i in range(len(newhisto.xi)):
        #     all_nph[-1][newhisto.xi[i]]=newhisto.ni[i]

In [None]:
points_per_leaf = 34
for myh in all_histos:
    print('Density:',myh.density)
    print(f'Best minRadius: {myh.minRadius}')
    print(f'Estimated minRadius: {np.sqrt(points_per_leaf/myh.density)/2}')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

# Datos de los puntos
x_data = np.array([0.9, 0.6, 0.1, 0.1]) # minRadius
y_data = np.array([7.57, 40.7, 119.7, 122.11]) # Densidad media

# Función exponencial: y = ae^(bx)
def exponential_func(x, a, b):
    return a * np.exp(b * x)
# Función cúbica: y = ax^3 + bx^2 + cx + d
def cubic_func(x, a, b, c, d):
    return a * x**3 + b * x**2 + c * x + d
# Función cuadrática: y = ax^2 + bx + c
def quadratic_func(x, a, b, c):
    return a * x**2 + b * x + c
# linear function
def linear_func(x, a, b):
    return a*x + b

# Define la función y = 64/(x*2)^2
def func2(x):
    return 64 / (x*2)**2

# Aproximación de parámetros para las curvas
exp_params, _ = curve_fit(exponential_func, x_data, y_data, p0=[122.11, -2.3])
cubic_params, _ = curve_fit(cubic_func, x_data, y_data, p0=[-135.4, 370.6, -222.9, 40.7])
quadratic_params, _ = curve_fit(quadratic_func, x_data, y_data, p0=[119.7, -239.4, 122.11])
linear_params, _ = curve_fit(linear_func, x_data, y_data, p0=[1,1])
print(linear_params)

# Valores estimados para los puntos dados
x_vals = np.linspace(0, 1, 100)
y_exp = exponential_func(x_vals, *exp_params)
y_cubic = cubic_func(x_vals, *cubic_params)
y_quadratic = quadratic_func(x_vals, *quadratic_params)
y_linear = linear_func(x_vals, *linear_params)

y_orig = func2(x_vals)

# Gráfico
plt.figure(figsize=(8, 6))
plt.scatter(x_data, y_data, label='Datos', color='red')
plt.plot(x_vals, y_exp, label='Curva Exponencial', linestyle='--')
plt.plot(x_vals, y_cubic, label='Curva Cúbica', linestyle='-.')
plt.plot(x_vals, y_quadratic, label='Curva Cuadrática', linestyle=':')
plt.plot(x_vals, y_linear, label='Curva Lineal', linestyle='-', color='purple')
plt.plot(x_vals, y_orig, label="y = 64/(x*2)^2", color="green")
plt.xlabel('minRadius [m]')
plt.ylabel('Densidad media [puntos/m^2]')
plt.ylim([0, 150])
plt.title('Curvas A8')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
def plot_histo(all_histo):
    #Configuration variables
    titlefs = 20
    ylabelfs = 18
    xlabelfs = 18
    xticksfs = 16
    yticksfs = 16
    legendfs = 14
    linew = 2
    markers = 8
    marks=['o-','x-','s-','v-','+-']

    clouds=['Alcoy','Arzua','BrionF','BrionU']
    limits=[1500,3000,600,600]
    
    #fig = plt.figure()
    labels=['Histogram']
    #define grid of plots
    fig, axs = plt.subplots(nrows=1, ncols=4,figsize=(15, 5), constrained_layout=True)
    for i,name,l in zip(range(len(clouds)),clouds,limits):
        axs[i].bar(all_histos[i].xi, all_histos[i].ni)

        # compute the histogram mean
        mean=0
        for j in range(len(all_histos[i].xi)):
            mean+=all_histos[i].xi[j]*all_histos[i].ni[j]
        mean=mean/(sum(all_histos[i].ni))
        print('mean:',mean)
        # plot a vertical line at the mean
        axs[i].axvline(mean, color='r', linestyle='dashed', linewidth=1)

        # compute the histogram mode discarding 0 values
        mode=0
        maxh=0
        for j in range(1,len(all_histos[i].xi)):
            if all_histos[i].ni[j]>maxh:
                maxh=all_histos[i].ni[j]
                mode=all_histos[i].xi[j]
        print('mode:',mode)
        # plot a vertical line at the mode
        axs[i].axvline(mode, color='g', linestyle='dashed', linewidth=1)

        axs[i].set_title(name,fontsize=16)
        axs[i].set_xlabel('Number of points', fontsize=xlabelfs)
        axs[i].set_xticks(all_histos[i].xi[::128],fontsize=xticksfs)
        axs[i].set_xlim(0,min(1024, max(all_histos[i].xi)))
        #axs[i].set_ylim(0,l)

        # axs[i].yticks(fontsize=yticksfs)
        axs[i].grid()
    fig.suptitle('Histogram of number of leaf-nodes with x points',  fontweight='bold', fontsize=18)
    
    axs[0].set_ylabel('Number of leaf-nodes', fontsize=ylabelfs)
    #axs[0].legend(labels,loc='best', fontsize= 14)
    pp = PdfPages("Histogram_num_points_minrad.pdf")
    pp.savefig(fig)
    pp.close()

plot_histo(all_histos)

In [None]:
def plot_histo2(all_histo, nbins=512):
    #Configuration variables
    titlefs = 20
    ylabelfs = 18
    xlabelfs = 18
    xticksfs = 16
    yticksfs = 16
    legendfs = 14
    linew = 2
    markers = 8
    marks=['o-','x-','s-','v-','+-']

    clouds=['Alcoy','Arzua','BrionF','BrionU']
    bestminRadius=[0.9,0.6,0.2,0.2]
    
    #define grid of plots
    fig, axs = plt.subplots(nrows=1, ncols=4,figsize=(15, 5), constrained_layout=True)
    for i,name in enumerate(clouds):
        for mR in minRadius:
            # represent only the first values withou 0 values
            limit = min(nbins, len(all_histo[name][mR].ni))
            # get the histogram values and normalize them
            
            # drop 0 and the last values
            chunk = [x/all_histos[name][mR].nleaf for x in all_histo[name][mR].ni[1:limit]]
            chunkx = np.arange(0,limit)

            axs[i].stairs(chunk, chunkx, fill=True, label='MinRadius: '+str(mR))
            
            # compute the histogram mean discarding 0 values
            mean=0
            for j in chunkx[:-1]:
                mean+=j*chunk[j]
            mean=mean/np.sum(chunk)
            # print('mean:',mean)
            # plot a vertical line at the mean
            # axs[i].axvline(mean, color='r', linestyle='dashed', linewidth=1)

            # compute the histogram mode discarding 0 values
            mode=0
            maxh=0
            for j in chunkx[:-1]:
                if chunk[j] > maxh:
                    maxh=chunk[j]
                    mode=j
            # print('mode:',mode)
            # plot a vertical line at the mode
            # axs[i].axvline(mode, color='g', linestyle='dashed', linewidth=1)        
        
        
        #axs[i].set_label('MinRadius: '+str(minRadius[i]))
        axs[i].set_title(name,fontsize=16)
        axs[i].set_xlabel('Number of points', fontsize=xlabelfs)
        #axs[i].set_xticks(all_histos[i][0][::128],fontsize=xticksfs)
        # axs[i].set_xlim(0,1024)
        axs[i].set_ylim(0,0.01)

        # axs[i].yticks(fontsize=yticksfs)
        axs[i].grid()
        axs[i].legend(loc='best', fontsize= 10)
    fig.suptitle('MinRadius: Histogram of number of leaf-nodes with x points',  fontweight='bold', fontsize=18)
    
    axs[0].set_ylabel('Number of leaf-nodes', fontsize=ylabelfs)
    pp = PdfPages("Histogram_num_points_minrad2.pdf")
    pp.savefig(fig)
    pp.close()

# plt.grid(True)
# plt.show()

plot_histo2(all_histos, nbins=1e6)

In [None]:
#number of empty leaf-nodes
empty_leaf_nodes=[]
for i in range(len(all_histos)):
    print('Cloud:',clouds[i],end='')
    print('. Number of empty leaf-nodes: {}'.format(all_histos[i].ni[0]))  
    empty_leaf_nodes.append(all_histos[i].ni[0])

In [None]:
#number of leaf-nodes with more than 512 points
for i in range(len(all_histos)):
    print('Cloud:',clouds[i],end='')
    # get the position of xi>=512
    pos = np.where(np.array(all_histos[i].xi)>=512)[0][0]
    print('. Number of leaf-nodes with more than 512 points: {}'.format(np.sum(all_histos[i].ni[pos:])))

# Compute the histograms for maxNumber results

In [None]:
all_histos_mn=[]
all_nph_mn=[]
for cloud in clouds:
    print('Cloud:',cloud)
    x,y,maxh=readstats(f"{maxNum_dir}/{cloud}H_salida.xyz.csv")
    all_histos_mn.append((x,y))
    print('maxh:',maxh)
    all_nph_mn.append(np.zeros(maxh+1))
    for i in range(len(x)):
        all_nph_mn[-1][x[i]]=y[i]

In [None]:
def plot_histo_mn(all_histo):
    #Configuration variables
    titlefs = 20
    ylabelfs = 18
    xlabelfs = 18
    xticksfs = 16
    yticksfs = 16
    legendfs = 14
    linew = 2
    markers = 8
    marks=['o-','x-','s-','v-','+-']

    clouds=['Alcoy','Arzua','BrionF','BrionU']
    limits=[1500,3000,600,600]
    maxNumber=[512,512,1024,512]

    #define grid of plots
    fig, axs = plt.subplots(nrows=1, ncols=4,figsize=(15, 5), constrained_layout=True)
    for i,name,l in zip(range(len(clouds)),clouds,limits):
        axs[i].stairs(all_histo[i][1:],fill=True,label='MaxNumber: '+str(maxNumber[i]))

        axs[i].set_title(name,fontsize=16)
        axs[i].set_xlabel('Number of points', fontsize=xlabelfs)
        #axs[i].set_xticks(all_histos[i][0][::128],fontsize=xticksfs)
        #axs[i].set_xlim(0,1024)
        #axs[i].set_ylim(0,l)

        # axs[i].yticks(fontsize=yticksfs)
        axs[i].grid()
        axs[i].legend(loc='best', fontsize= 10,markerscale=0.2)
    fig.suptitle('MaxNumber: Histogram of number of leaf-nodes with x points',  fontweight='bold', fontsize=18)
    
    axs[0].set_ylabel('Number of leaf-nodes', fontsize=ylabelfs)
    #axs[0].legend(labels,loc='best', fontsize= 14)
    pp = PdfPages("Histogram_num_points_maxnum2.pdf")
    pp.savefig(fig)
    pp.close()

# plt.grid(True)
# plt.show()

plot_histo_mn(all_nph_mn)

In [None]:
#number of empty leaf-nodes
for i in range(len(all_nph_mn)):
    print('Cloud:',clouds[i],end='')
    print('. Number of empty leaf-nodes: {}'.format(all_nph_mn[i][0])) 
    empty_leaf_nodes.append(all_nph_mn[i][0])

#number of leaf-nodes with more than 512 points
for i,j in zip(range(len(all_nph_mn)), [512,512,1024,512]):
    print('Cloud:',clouds[i],end='')
    print('. Number of leaf-nodes with more than 512 points: {}'.format(np.sum(all_nph_mn[i][j+1:]))) 

# Build LaTeX table

In [None]:
def readlevels(filename):
    f=open(filename)
    lines=f.readlines()
    f.close()

    linenumber=0
    levels={}
    for line in lines:
        if 'HISTOGRAM_DE_NIVELES:' in line:
            break
        else:
            linenumber=linenumber+1
    #print(lines[linenumber+5:])
    for line in lines[linenumber+5:-2]:
        line=line.split()
        levels[int(line[0])]=int(line[1])
    print(levels)
    return levels

clouds=['Alcoy','Arzua','BrionF','BrionU']
dir='statsWithoutMemo/MinRadHisto/'
all_hminrad=[]
total_minrad=[]
for cloud in clouds:
    print('Cloud:',cloud)
    x=readlevels(dir+cloud+'H_salida.xyz.csv')
    all_hminrad.append(x)
    total_minrad.append(sum(x.values()))

dir='statsWithoutMemo/MaxNumHisto/'
all_hmaxnum=[]
total_maxnum=[]
for cloud in clouds:
    print('Cloud:',cloud)
    x=readlevels(dir+cloud+'H_salida.xyz.csv')
    all_hmaxnum.append(x)
    total_maxnum.append(sum(x.values()))



In [None]:
print("\\begin{tabular}{|c|cccc|cccc|}\hline")
print(" & \multicolumn{4}{c|}{MinRadius} & \multicolumn{4}{c|}{MaxNumber} \\\\ \hline")  
print(" # empty leaves",end='')
for i in empty_leaf_nodes:
    print(" & {}".format(i),end='')
print("\\\\ \hline")
print(" Level & Alcoy & Arzua & BrionF & BrionU & Alcoy & Arzua & BrionF & BrionU \\\\ \hline")
for lev in range(6,14):
    print(lev,end='')
    for i in range(4):
        if all_hminrad[i].get(lev) is None:
            print(" & 0",end='')
        else:
            print(" & {}".format(all_hminrad[i][lev]),end='')
    for i in range(4):
        if all_hmaxnum[i].get(lev) is None:
            print(" & 0",end='')
        else:
            print(" & {}".format(all_hmaxnum[i][lev]),end='')
    print("\\\\ \hline")
print("Total:",end='')
for i in range(4):
    print(" & {}".format(total_minrad[i]),end='')
for i in range(4):
    print(" & {}".format(total_maxnum[i]),end='')
print("\\\\ \hline")
print("\\end{tabular}")