In [1]:
import scipy.spatial as spt
import numpy.linalg as la

def compute_squared_EDM_method5(X):
    V = spt.distance.pdist(X, 'sqeuclidean')
    return spt.distance.squareform(V)

def compute_squared_EDM_method5_modified(X):
    V = spt.distance.pdist(X, 'sqeuclidean')
    V = spt.distance.squareform(V)
    return spt.distance.squareform(V)

def compute_squared_EDM_method1(X):
    # determine dimensions of data matrix X
    m,n = X.shape
    # initialize squared EDM D
    D = np.zeros((n,n))
    # iterate over upper triangle of D
    for i in range(n):
        for j in range(i+1,n):
            D[i,j] = la.norm(X[:,i] - X[:,j])**2
            D[j,i] = D[i,j]
    return D

def compute_norm_2(rows):
    N = np.zeros((len(rows),len(rows)))
    for i in range(1,len(rows)):
        j = 0
        while j<i:
            norm = 0
            norm = rows[i] - rows[j]
            norm = np.sqrt(norm[0]**2+norm[1]**2)
            N[i,j] = norm
            j+=1
    return N

def k_search(pts, r):
    knn = np.zeros((len(pts),1), dtype=int)
    index = 0
    for rows in pts:
        knn[index] = np.count_nonzero(rows[np.where(rows<r)])
        index+=1
    return knn

In [2]:
import pickle
import numpy as np
from astropy.table import Table, vstack
import matplotlib.pyplot as plt
flag = 0
for i in range(0,24):
    num = "0"+str(i) if i<10 else str(i)
    temp = pickle.load( open( "RA_DEC/"+num+".p", "rb" ) )
    if flag==0:
        t = temp
        flag=1
    else:
        t = vstack([t, temp])

In [3]:
t['RA'] = t['RA']*15

In [4]:
%matplotlib notebook
plt.scatter(fix_xlimits(data)['RA'],fix_xlimits(data)['DEC'],s=2)
plt.xlabel("Right Ascension [°]")
plt.ylabel("Declination [°]")
center = 0.2
l = 1
innerRadio = l
outerRadio = l*(2+np.sqrt(2))/2

NameError: name 'fix_xlimits' is not defined

In [None]:
def testing_method1(t,lim,corte):
    try:
        data = t[0:lim]
        margin_x = 1
        margin_y = 1
        x_0 = min(data['RA'])
        x_n = max(data['RA'])
        y_0 = min(data['DEC'])
        y_n = max(data['DEC'])
        r = corte

        zona_operacion = [(x_0+margin_x, y_0+margin_y), (x_n-margin_x, y_n-margin_y)]
        puntos=[]
        for coordenada in data:
            puntos.append([coordenada[0],coordenada[1]])
        puntos = np.array(puntos)
        distancias = compute_squared_EDM_method5(puntos)

        cond_x = (puntos[0:-1,0]>= zona_operacion[0][0])&(puntos[0:-1,0]<= zona_operacion[1][0])
        cond_y = (puntos[0:-1,1]>= zona_operacion[0][1])&(puntos[0:-1,0]<= zona_operacion[1][1])
        mask = np.where(cond_x&cond_y)
        puntos_interes = puntos[mask]
        distancias_interes = distancias[mask]

        knn_1 = k_search(distancias_interes,r)
        
        return np.append(puntos_interes, knn_1, axis=1)
    #mask_2 = np.where(knn_1>5)
    except:
        return np.zeros((1,1),dtype=int)
        


In [None]:
t = %timeit -o testing_method1(t,3000,1)

In [5]:
from scipy.spatial import distance_matrix
import time

start = time.clock()
M = distance_matrix(rows, rows)
end = time.clock()
print(end - start)

NameError: name 'rows' is not defined

# Obtención de KNN
## 1er approach: Distancia de todos con todos, qué tan costoso puede ser?
### Cálculo de la matríz distancia
Utilizando un arreglo (`rows`) de 3447 puntos bidimensionales se procede.
#### Usando compute_norm_2
Se usa el cálculo normal mediante for dentro de for, haciendo solo cálculos para la matríz diagonal superior.

    def compute_norm_2(rows):
        N = np.zeros((len(rows),len(rows)))
        for i in range(1,len(rows)):
            j = 0
            while j<i:
                norm = 0
                norm = rows[i] - rows[j]
                norm = np.sqrt(norm[0]**2+norm[1]**2)
                N[i,j] = norm
                j+=1
        return N

resultado con timeit:

    13.3 s ± 239 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    
#### Usando spt distance_matrix
    from scipy.spatial import distance_matrix
    distance_matrix(rows, rows)
resultado con timeit:

    488 ms ± 10.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

#### Usando método 5, norma al cuadrado
    import scipy.spatial as spt
    def compute_squared_EDM_method5(X):
        V = spt.distance.pdist(X, 'sqeuclidean')
        return spt.distance.squareform(V)
resultado con timeit:

    85.7 ms ± 1.78 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

#### Usando método 5, extrayendo raíz cuadrada
    np.sqrt(compute_squared_EDM_method5(rows))
resultado con timeit:
    
    115 ms ± 11.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)





## 2do approach: Implementación de estrategia de ataque

### Estrategia 1

Obtener las distancia de todos con todos, luego discriminar por distancias para obtener el knn
#### Pasos:
* Definir parámetros

In [None]:
# parametros a definir
margin_x = 1
margin_y = 1
r = 1
data = t[0:24000]
x_0 = min(data['RA'])
x_n = max(data['RA'])
y_0 = min(data['DEC'])
y_n = max(data['DEC'])

* Obtener matríz de diferencias

In [None]:
zona_operacion = [(x_0+margin_x, y_0+margin_y), (x_n-margin_x, y_n-margin_y)]
puntos=[]
for coordenada in data:
    puntos.append([coordenada[0],coordenada[1]])
puntos = np.array(puntos)
distancias = compute_squared_EDM_method5(puntos)

* Filtrar con los márgenes

In [None]:
cond_x = (puntos[0:-1,0]>= zona_operacion[0][0])&(puntos[0:-1,0]<= zona_operacion[1][0])
cond_y = (puntos[0:-1,1]>= zona_operacion[0][1])&(puntos[0:-1,0]<= zona_operacion[1][1])
mask = np.where(cond_x&cond_y)
puntos_interes = puntos[mask]
distancias_interes = distancias[mask]

* Buscar los k-vecinos mas cercanos para una distancia r utilizando
```
def k_search(pts, r):
    knn = np.zeros((len(pts),1), dtype=int)
    index = 0
    for rows in pts:
        knn[index] = np.count_nonzero(rows[np.where(rows<r)])
        index+=1
    return knn ```

In [5]:
def k_search(pts, r):
    knn = np.zeros((len(pts),1), dtype=int)
    index = 0
    for rows in pts:
        knn[index] = np.count_nonzero(rows[np.where(rows<r)])
        index+=1
    return knn

* Finalmente juntar esta información con los puntos correspondientes

In [None]:
resultado = np.append(puntos_interes, knn_1, axis=1)

    array([[ 1.02744 , -8.311936, 19.      ],
           [ 1.028205, -8.312104, 19.      ],
           [ 1.028415, -8.31335 , 19.      ],
           ...,
           [27.48597 , -8.194771, 11.      ],
           [27.485985, -8.195136, 11.      ],
           [27.489495, -8.200164, 11.      ]])

Esta estrategia no es viable, dado que al haber muchos puntos, que será el caso, comienza a fallar por falta de memoria.

### Estrategia 1+2
* Definir parámetros

In [None]:
# parametros a definir
r = 5
data = t

* Se tendrán la siguiente función de utilidad

##### Matriz de sectores
Se calculan las longitudes de la zona de operación, luego se crean los sectores y se rellena con 1 los límites, donde que tenga valor 1 evitará que se inspeccione ese sector.

    def m_sectores(data, r, flagx=1):
        x_0 = min(data['RA'])
        x_n = max(data['RA'])
        y_0 = min(data['DEC'])
        y_n = max(data['DEC'])
        l_x = (x_n-x_0)
        l_y = (y_n-y_0)
        subregions_x = int(l_x/r) if l_x%r == 0 else int(l_x/r+1)
        subregions_y = int(l_y/r) if l_y%r == 0 else int(l_y/r+1)
        s = (subregions_y,subregions_x+2)
        sectores = np.zeros(s,dtype=int)
        sectores[0,:] = 1
        sectores[-1,:] = 1
        if flagx==1:
            sectores[:,0] = 1
            sectores[:,-1] = 1
        return sectores
    
##### Cálculo del knn para un radio r dado:
    def k_search(pts, r):
        knn = np.zeros((len(pts),1), dtype=int)
        index = 0
        for rows in pts:
            knn[index] = np.count_nonzero(rows[np.where(rows<r)])
            index+=1
        return knn
        
##### Función que calcula el knn (método 1+2)
Para datos de 0° a 360°, dado un radio r y una matríz de sectores, que tiene la opción del flag para marcar o no en la matríz de sectores por donde ya ha pasado (flag_m=1 por omisión, es decir, marca por donde pasa).

    def knn_method2(data, r, sectores, flag_m=1):
        data = fix_xlimits(data)
        flag = 0
        y_0 = min(data['DEC'])
        y_0inf = int(y_0) if y_0%1 == 0 else int(y_0-1)
        for col in range(np.shape(sectores)[1]):
            xcenter = col*r+0.5*r
            xlim_izq = xcenter-1.5*r
            xlim_der = xcenter+1.5*r
            res_rows = []
            for row in range(np.shape(sectores)[0]):
                ycenter = row*r+0.5*r+y_0inf
                ylim_inf = ycenter-1.5*r
                ylim_sup = ycenter+1.5*r

                if sectores[row][col] == 1:
                    res_rows.append([-1])
                else:
                    if flag_m == 1:
                        sectores[row][col] = 1
                    cond_x = (data['RA']>= xlim_izq)&(data['RA']<xlim_der)
                    cond_y = (data['DEC']>= ylim_inf)&(data['DEC']< ylim_sup)
                    mask_op_sect = np.where(cond_x&cond_y)
                    data_aux = data[mask_op_sect]
                    cond_x = (data_aux['RA']>= xcenter-0.5*r)&(data_aux['RA']<xcenter+0.5*r)
                    cond_y = (data_aux['DEC']>= ycenter-0.5*r)&(data_aux['DEC']< ycenter+0.5*r)
                    mask_int_sect = np.where(cond_x&cond_y)
                    coordx = np.expand_dims(np.array(data_aux['RA']),axis=1)
                    coordy = np.expand_dims(np.array(data_aux['DEC']),axis=1)
                    data_aux = np.append(coordx,coordy,axis=1)
                    distances = compute_squared_EDM_method5(data_aux)
                    distances = distances[mask_int_sect]
                    knn = k_search(distances,r)
                    resultado = np.append(data_aux[mask_int_sect], knn, axis=1)
                    res_rows.append(resultado)
            if flag==0:
                res = res_rows
                flag=1
            else:
                if (np.ndim(res_rows) == 1):
                    res_rows = np.expand_dims(res_rows,axis=1)
                res = np.append(res,res_rows,axis=1)
        return res


In [3]:
import time
import pandas as pd
import pickle
from threading import Thread

def k_search(pts, r):
    knn = np.zeros((len(pts),1), dtype=int)
    index = 0
    for rows in pts:
        knn[index] = np.count_nonzero(rows[np.where(rows<r)])
        index+=1
    return knn

def m_sectores(data, r, flagx=1):
    x_0 = min(data['RA'])
    x_n = max(data['RA'])
    y_0 = min(data['DEC'])
    y_n = max(data['DEC'])
    l_x = (x_n-x_0)
    l_y = (y_n-y_0)
    subregions_x = int(l_x/r) if l_x%r == 0 else int(l_x/r+1)
    subregions_y = int(l_y/r) if l_y%r == 0 else int(l_y/r+1)
    s = (subregions_y,subregions_x+2)
    sectores = np.zeros(s,dtype=int)
    sectores[0,:] = 1
    sectores[-1,:] = 1
    if flagx==1:
        sectores[:,0] = 1
        sectores[:,-1] = 1
    return sectores

def fix_xlimits(data, r):
    x_0inf = int(min(data['RA']))
    x_n = max(data['RA'])
    x_nsup = int(x_n) if x_n%1 == 0 else int(x_n+1)
    cond_a = (data['RA']>= x_0inf)&(data['RA']<x_0inf+r)
    cond_b = (data['RA']>= x_nsup-r)&(data['RA']<x_nsup)
    mask = np.where(cond_a)
    temp = data[mask]
    temp['RA'] = temp['RA']+360
    data = vstack([data, temp])
    mask = np.where(cond_b)
    temp = data[mask]
    temp['RA'] = temp['RA']-360
    data = vstack([data, temp])
    return data

def knn_method2(data, r, flagx=1, flag_m=1):
    data = fix_xlimits(data, r)
    sectores = m_sectores(data, r, flagx)
    flag = 0
    y_0 = min(data['DEC'])
    y_0inf = int(y_0) if y_0%1 == 0 else int(y_0-1)
    x_0 = min(data['RA'])
    if x_0>0:
        x_0inf = int(x_0)
    else:
        x_0inf = int(x_0) if x_0%1 == 0 else int(x_0-1)

    try:
        for col in range(np.shape(sectores)[1]):
            xcenter = col*r+0.5*r+x_0inf
            xlim_izq = xcenter-1.5*r
            xlim_der = xcenter+1.5*r
            res_rows = []
            for row in range(np.shape(sectores)[0]):
                ycenter = row*r+0.5*r+y_0inf
                ylim_inf = ycenter-1.5*r
                ylim_sup = ycenter+1.5*r

                if sectores[row][col] == 1:
                    res_rows.append(np.array([-1]))
                else:
                    if flag_m == 1:
                        sectores[row][col] = 1
                    cond_x = (data['RA']>= xlim_izq)&(data['RA']<xlim_der)
                    cond_y = (data['DEC']>= ylim_inf)&(data['DEC']< ylim_sup)
                    mask_op_sect = np.where(cond_x&cond_y)
                    data_aux = data[mask_op_sect]
                    cond_x = (data_aux['RA']>= xcenter-0.5*r)&(data_aux['RA']<xcenter+0.5*r)
                    cond_y = (data_aux['DEC']>= ycenter-0.5*r)&(data_aux['DEC']< ycenter+0.5*r)
                    mask_int_sect = np.where(cond_x&cond_y)
                    coordx = np.expand_dims(np.array(data_aux['RA']),axis=1)
                    coordy = np.expand_dims(np.array(data_aux['DEC']),axis=1)
                    data_aux = np.append(coordx,coordy,axis=1)
                    distances = compute_squared_EDM_method5(data_aux)
                    distances = distances[mask_int_sect]
                    data_aux = data_aux[mask_int_sect]
                    knn = k_search(distances,r)
                    mask = np.where(knn > 5)
                    data_aux = np.expand_dims(data_aux[mask],axis=1)
                    knn = np.expand_dims(knn[mask],axis=1)
                    resultado = np.append(data_aux, knn, axis=1)
                    res_rows.append(resultado)

            if (np.ndim(res_rows) == 1):
                res_rows = np.expand_dims(res_rows,axis=1)
            if flag==0:
                res = res_rows
                flag=1
            else:
                res = np.append(res,res_rows,axis=1)
    except:
        res = np.array([])
    return res
    
def extract_time(RA_start, RA_stop, r_start, r_stop, data):  
    rows = []
    file = open("knn_data/log.txt","a+")
    for RA in range(RA_start, RA_stop+1):
        for r in range(r_start, r_stop+1):
            if (r<=RA):
                cond_x = (data['RA']>= 0)&(data['RA']<RA)
                mask = np.where(cond_x)
                data_aux = data[mask]
                n = len(data_aux)
                start = time.process_time()
                res = knn_method2(data_aux,r,0,1)
                end = time.process_time()
                wait = end - start
                if len(res)==0:
                    wait = -2
                    file.write(str(RA)+" "+str(n)+" "+str(r)+" "+str(wait)+"\n")
                print(RA, n, r, wait)
                rows.append([RA, n, r, wait])
        if (RA%5== 0):
            df = pd.DataFrame(rows,
                    columns=['RA', 'n', 'r', 'time'])
            pickle.dump( df, open( "knn_data/0_to_"+str(RA)+".p", "wb" ) )
            print("saved to knn_data/0_to_"+str(RA)+".p")
            rows = []
    file.close()

In [4]:
count = 360
data = t
##subproceso1 = Thread(target=extract_time, args=(356, 360, 58, 100, data, ))
##subproceso1.start()
extract_time(236,360,59,360,data)

236 248069 59 -2
236 248069 60 1.839292152
236 248069 61 1.835018701
236 248069 62 1.9667702609999997
236 248069 63 1.938189615999999
236 248069 64 1.7324653769999987
236 248069 65 1.760033343
236 248069 66 1.791619036000002
236 248069 67 1.7439510769999984
236 248069 68 1.7656805099999993
236 248069 69 1.760419252000002
236 248069 70 1.7397850740000003
236 248069 71 1.7857416400000012
236 248069 72 1.7364860100000001
236 248069 73 1.8467212450000012
236 248069 74 1.7644073240000004
236 248069 75 1.7465287190000005
236 248069 76 1.775394906999999
236 248069 77 1.7447593770000012
236 248069 78 1.7482084749999984
236 248069 79 1.7392775639999982
236 248069 80 1.7715004169999986
236 248069 81 1.8046960920000004
236 248069 82 1.853141952999998
236 248069 83 1.7655759310000008
236 248069 84 1.7758390389999974
236 248069 85 1.7407238340000006
236 248069 86 1.7658266830000002
236 248069 87 1.7379146950000006
236 248069 88 1.7340241009999957
236 248069 89 1.780909580999996
236 248069 90 1.8479

237 248069 129 1.7514130380000097
237 248069 130 1.742578707000007
237 248069 131 1.7639093450000018
237 248069 132 1.7171396819999813
237 248069 133 1.747122002000026
237 248069 134 1.7438119919999622
237 248069 135 1.7173600110000393
237 248069 136 1.7228773159999946
237 248069 137 1.713746972000024
237 248069 138 1.7234820140000124
237 248069 139 1.7151337259999764
237 248069 140 1.7517466630000058
237 248069 141 1.7153837410000392
237 248069 142 1.7354785590000006
237 248069 143 1.7198562920000313
237 248069 144 1.7212832959999673
237 248069 145 1.710139551999987
237 248069 146 1.7464936250000278
237 248069 147 1.7395621510000296
237 248069 148 1.7633349270000167
237 248069 149 1.7311202390000062
237 248069 150 1.7211947590000136
237 248069 151 1.7567124299999932
237 248069 152 1.7772979700000064
237 248069 153 1.7533633809999856
237 248069 154 1.8020739309999954
237 248069 155 1.7620233600000006
237 248069 156 1.7330514770000036
237 248069 157 1.7179854490000253
237 248069 158 1.7

238 248069 195 1.712583188999929
238 248069 196 1.708484967000004
238 248069 197 1.7177693920000365
238 248069 198 1.7213985189999903
238 248069 199 1.7255243189999874
238 248069 200 1.7181129980000378
238 248069 201 1.724020585000062
238 248069 202 1.7079469570001038
238 248069 203 1.7103776870000047
238 248069 204 1.7182425419999845
238 248069 205 1.7290248439999232
238 248069 206 1.806279432999986
238 248069 207 1.8049326410000504
238 248069 208 1.822322009000004
238 248069 209 1.7313904809999485
238 248069 210 1.724005304000002
238 248069 211 1.7268229689999544
238 248069 212 1.7258062810000183
238 248069 213 1.7229287659999954
238 248069 214 1.7275105930000336
238 248069 215 1.76857553800005
238 248069 216 1.7321405999999797
238 248069 217 1.721851030000039
238 248069 218 1.7227801809999619
238 248069 219 1.7159337680000135
238 248069 220 1.732152739000071
238 248069 221 1.7149553790000027
238 248069 222 1.7286888510000153
238 248069 223 1.7164043720000564
238 248069 224 1.7242776

240 248069 79 1.7255142359999809
240 248069 80 1.7302632729999914
240 248069 81 1.7218214130000433
240 248069 82 1.7276302660000056
240 248069 83 1.7209898779999548
240 248069 84 1.7226081359999625
240 248069 85 1.7265427310001087
240 248069 86 1.7185655179998776
240 248069 87 1.7123117119999733
240 248069 88 1.716169961000105
240 248069 89 1.7127838630001406
240 248069 90 1.7081452420000005
240 248069 91 1.7159132130000216
240 248069 92 1.7117315150001104
240 248069 93 1.7504132900000968
240 248069 94 1.7148349500000677
240 248069 95 1.7219961630000853
240 248069 96 1.7170556730000044
240 248069 97 1.7231186730000445
240 248069 98 1.7123813939999764
240 248069 99 1.7189148739998927
240 248069 100 1.7200434569999743
240 248069 101 1.7205070859999978
240 248069 102 1.718402449999985
240 248069 103 1.7107478099999298
240 248069 104 1.7182203820000268
240 248069 105 1.7186249619999217
240 248069 106 1.7189381650000541
240 248069 107 1.725587755000106
240 248069 108 1.7103557820000788
240 

241 248069 141 1.7244884369999909
241 248069 142 1.7119766989999334
241 248069 143 1.795447686999978
241 248069 144 1.8545267730000887
241 248069 145 1.7223444150001797
241 248069 146 1.7500337159999617
241 248069 147 1.8506322670000372
241 248069 148 1.9155988879999768
241 248069 149 1.8044467949998761
241 248069 150 1.8687614400000712
241 248069 151 1.7863027509999938
241 248069 152 1.9609206710001672
241 248069 153 1.8885668939999505
241 248069 154 1.9501610889999483
241 248069 155 1.870930769000097
241 248069 156 1.7904399379999631
241 248069 157 1.7748876749999454
241 248069 158 1.7321236410000438
241 248069 159 1.7787568920000467
241 248069 160 1.8196664590000182
241 248069 161 1.7921186339999622
241 248069 162 1.7839119500001743
241 248069 163 1.755867199000022
241 248069 164 1.8688676770000257
241 248069 165 1.7156890050000584
241 248069 166 1.7515789540000242
241 248069 167 1.7275878989999
241 248069 168 1.807196703000045
241 248069 169 1.8730131979998532
241 248069 170 1.7870

242 248069 203 1.708111678000023
242 248069 204 1.7159974109999894
242 248069 205 1.721296491999965
242 248069 206 1.7005872909999198
242 248069 207 1.7011649390001367
242 248069 208 1.7115483429997767
242 248069 209 1.703321778999907
242 248069 210 1.7314917280000373
242 248069 211 1.7981199860000743
242 248069 212 1.72579007000013
242 248069 213 1.8033577910000531
242 248069 214 1.778382746999796
242 248069 215 1.8357445350002308
242 248069 216 1.812752369000009
242 248069 217 1.7728635859998576
242 248069 218 1.826753512000323
242 248069 219 1.8083351639998
242 248069 220 1.740360376999888
242 248069 221 1.7424231289996897
242 248069 222 1.7180605349999496
242 248069 223 1.7392886359998556
242 248069 224 1.7427204559999154
242 248069 225 1.7930476719998296
242 248069 226 1.735445955999694
242 248069 227 1.7377833549999195
242 248069 228 1.820322699999906
242 248069 229 1.8792357930001344
242 248069 230 1.899843318999956
242 248069 231 1.8387102270003197
242 248069 232 1.871827708000

244 248069 80 1.8203453109999828
244 248069 81 1.834244230999957
244 248069 82 1.7658492109999315
244 248069 83 1.8273552689997814
244 248069 84 1.7599699249999503
244 248069 85 1.7683398210001542
244 248069 86 1.7650875219997033
244 248069 87 1.7598865410000144
244 248069 88 1.763179102000322
244 248069 89 1.8169055079997634
244 248069 90 1.8936993350002922
244 248069 91 1.8356473809999443
244 248069 92 1.9228842490001625
244 248069 93 2.0256888029998663
244 248069 94 1.884949418999895
244 248069 95 1.9263383010002144
244 248069 96 1.802703575999658
244 248069 97 1.915468064000379
244 248069 98 1.8529990490001182
244 248069 99 1.8523225369999636
244 248069 100 1.8367216930000723
244 248069 101 1.9637748069999361
244 248069 102 1.8173101440002029
244 248069 103 1.74735667799996
244 248069 104 1.770753286999934
244 248069 105 1.741501796999728
244 248069 106 1.7371487119999074
244 248069 107 1.7484819639998932
244 248069 108 1.7620217859998775
244 248069 109 1.7717158039999958
244 24806

245 248069 140 1.8149406569996245
245 248069 141 1.886304889999792
245 248069 142 1.9636967689998528
245 248069 143 1.9643570359999103
245 248069 144 1.7655672480000248
245 248069 145 1.8659521360000326
245 248069 146 1.742494736000026
245 248069 147 1.7125983430000815
245 248069 148 1.716372226999738
245 248069 149 1.7412354100001721
245 248069 150 1.7433291960001043
245 248069 151 1.7354389029997037
245 248069 152 1.7290772569999717
245 248069 153 1.7317276210001182
245 248069 154 1.7680840250000074
245 248069 155 1.7454743660000531
245 248069 156 1.7436710310003036
245 248069 157 1.7469615479999447
245 248069 158 1.7404247280001073
245 248069 159 1.7325616660000378
245 248069 160 1.7308307529997364
245 248069 161 1.7270393849998982
245 248069 162 1.7130798719999802
245 248069 163 1.7331307540002854
245 248069 164 1.7563162090000333
245 248069 165 1.7343550210002832
245 248069 166 1.7222165419998419
245 248069 167 1.7396855699998923
245 248069 168 1.7205576530000144
245 248069 169 1.

246 248069 197 1.7178791769997588
246 248069 198 1.731847471999572
246 248069 199 1.7379754329999741
246 248069 200 1.7227996960000382
246 248069 201 1.7512914120002279
246 248069 202 1.7437249800000245
246 248069 203 1.7473203720001038
246 248069 204 1.77203834599959
246 248069 205 1.7660765969999375
246 248069 206 1.7917252730003383
246 248069 207 1.7470604380000623
246 248069 208 1.7311570849997224
246 248069 209 1.7572619030001988
246 248069 210 1.72627159700005
246 248069 211 1.7462775899998633
246 248069 212 1.7202105960000154
246 248069 213 1.7412174820001383
246 248069 214 1.7321599889996833
246 248069 215 1.7528592339999705
246 248069 216 1.950157376000334
246 248069 217 1.7405938299998525
246 248069 218 1.9991137279998838
246 248069 219 1.8731229019999773
246 248069 220 1.94763513099997
246 248069 221 1.7411722609999742
246 248069 222 1.73331038599963
246 248069 223 1.727149129999816
246 248069 224 1.7224535519999336
246 248069 225 1.7352004600002147
246 248069 226 1.74829546

248 248069 66 1.7208942399997795
248 248069 67 1.7276522349998231
248 248069 68 1.7291451289997894
248 248069 69 1.7084960449997197
248 248069 70 1.715707989999828
248 248069 71 1.7075162410001212
248 248069 72 1.7195207280001341
248 248069 73 1.7206203999999161
248 248069 74 1.7103625830000055
248 248069 75 1.717931819000114
248 248069 76 1.711725860000115
248 248069 77 1.7272356950002177
248 248069 78 1.7149008629999116
248 248069 79 1.7075737229997685
248 248069 80 1.7164574810003614
248 248069 81 1.7210033510000358
248 248069 82 1.7216551169999548
248 248069 83 1.735725750000256
248 248069 84 1.7495883130000038
248 248069 85 1.7152460190000056
248 248069 86 1.7119702279997
248 248069 87 1.7161020260000441
248 248069 88 1.710264468999867
248 248069 89 1.7141054859998803
248 248069 90 1.7157974409997223
248 248069 91 1.7166548660002263
248 248069 92 1.7178869330000452
248 248069 93 1.7219430479999573
248 248069 94 1.7113034119997792
248 248069 95 1.7709870460003003
248 248069 96 1.72

249 248069 122 1.7596304979997512
249 248069 123 1.7308424329994523
249 248069 124 1.7484967750006035
249 248069 125 1.7552294600000096
249 248069 126 1.7610700920004092
249 248069 127 1.7683136819996434
249 248069 128 1.804282928000248
249 248069 129 1.8068805670000074
249 248069 130 1.8222153410006285
249 248069 131 1.7782610909998766
249 248069 132 1.7431332050000492
249 248069 133 1.745470908000243
249 248069 134 1.7450094979994901
249 248069 135 1.7467325110001184
249 248069 136 1.733390495999629
249 248069 137 1.7488118110004507
249 248069 138 1.7638039710000157
249 248069 139 1.7331794749998153
249 248069 140 1.7554860439995537
249 248069 141 1.757911509999758
249 248069 142 1.742711796000549
249 248069 143 1.731632652999906
249 248069 144 1.7695264499998302
249 248069 145 1.8124086400002852
249 248069 146 1.830641464999644
249 248069 147 1.8485788220004906
249 248069 148 1.770479456999965
249 248069 149 1.7606255820001024
249 248069 150 1.7577039660000082
249 248069 151 1.83774

250 248069 176 1.7501738990004014
250 248069 177 1.744083264999972
250 248069 178 1.9021240430001853
250 248069 179 1.7453526950002924
250 248069 180 1.7345990700005132
250 248069 181 1.7377057460007563
250 248069 182 1.7598418590005167
250 248069 183 1.8669354140001815
250 248069 184 1.7152778809995652
250 248069 185 1.8126227929997185
250 248069 186 1.847272392999912
250 248069 187 1.746129237999412
250 248069 188 1.7037459570001374
250 248069 189 1.742808244999651
250 248069 190 1.7123660939996626
250 248069 191 1.7867635200000223
250 248069 192 1.7859455950001575
250 248069 193 1.7249701150003602
250 248069 194 1.7094706939997195
250 248069 195 1.7226769440003409
250 248069 196 1.7216043190001074
250 248069 197 1.7094471190002878
250 248069 198 1.766344300000128
250 248069 199 1.8598462550007753
250 248069 200 1.7877147700000933
250 248069 201 1.724169178000011
250 248069 202 1.7159903419997136
250 248069 203 1.7645969830000467
250 248069 204 1.8233620740002152
250 248069 205 1.846

251 248069 228 1.7348370069994417
251 248069 229 1.8668855100004293
251 248069 230 1.7920163159997173
251 248069 231 1.8614415319998443
251 248069 232 1.8550654559994655
251 248069 233 1.8463980249998713
251 248069 234 1.955043573999319
251 248069 235 1.8367715300000782
251 248069 236 1.8170887939995737
251 248069 237 1.9226092690005316
251 248069 238 1.882452535000084
251 248069 239 1.8895051090003108
251 248069 240 1.7170106300000043
251 248069 241 1.7357510129995717
251 248069 242 1.736227350000263
251 248069 243 1.743668910999986
251 248069 244 1.7651869060000536
251 248069 245 1.8448212829998738
251 248069 246 1.7155553640004655
251 248069 247 1.7928910110003926
251 248069 248 1.729148419000012
251 248069 249 1.7211541959995884
251 248069 250 1.736545281999497
251 248069 251 1.710200022999743
252 248069 59 -2
252 248069 60 1.7138491780006007
252 248069 61 1.725945856999715
252 248069 62 1.7057822010001473
252 248069 63 1.7171158669998476
252 248069 64 1.747767273000136
252 248069 

253 248069 88 1.7249720179997894
253 248069 89 1.730418439999994
253 248069 90 1.7196593930002564
253 248069 91 1.7150277560003815
253 248069 92 1.7174526970002262
253 248069 93 1.7148105010001018
253 248069 94 1.7096203589999277
253 248069 95 1.712442175000433
253 248069 96 1.7206302899994625
253 248069 97 1.7265497039998081
253 248069 98 1.7230436640002154
253 248069 99 1.7172782510006073
253 248069 100 3.137438590999409
253 248069 101 2.7276177370004007
253 248069 102 1.7117185870001776
253 248069 103 1.712865354999849
253 248069 104 1.7154008910001721
253 248069 105 1.7239700999998604
253 248069 106 1.7132847729999412
253 248069 107 1.7247551420005038
253 248069 108 1.7200630329998603
253 248069 109 1.727658042999792
253 248069 110 1.7138613940005598
253 248069 111 1.7247049039997364
253 248069 112 1.7273000990007858
253 248069 113 1.7177384529995834
253 248069 114 1.7192106490001606
253 248069 115 1.7207532839993291
253 248069 116 1.7203491769996617
253 248069 117 1.73174957599985

254 248069 138 1.7982712060002086
254 248069 139 1.7946116089997304
254 248069 140 1.7841572669995003
254 248069 141 1.7112509810003758
254 248069 142 1.6998951870000383
254 248069 143 1.7106296359997941
254 248069 144 1.7106493659994157
254 248069 145 1.7156547180002235
254 248069 146 1.7120744019994163
254 248069 147 1.7086800810002387
254 248069 148 1.7060038720001103
254 248069 149 1.706885333000173
254 248069 150 1.7113577860000078
254 248069 151 1.7471628379998947
254 248069 152 1.7082736619995558
254 248069 153 1.7694568359993355
254 248069 154 1.7724409290003678
254 248069 155 1.7037456469997778
254 248069 156 1.7140410420006447
254 248069 157 1.7075987669995811
254 248069 158 1.710374948000208
254 248069 159 1.7095915199997762
254 248069 160 1.7286143849996733
254 248069 161 1.7106060989999605
254 248069 162 1.7120615560006627
254 248069 163 1.7050953979996848
254 248069 164 1.7034363809998467
254 248069 165 1.7055151539998405
254 248069 166 1.710966312999517
254 248069 167 1.

KeyboardInterrupt: 

In [1]:
a

NameError: name 'a' is not defined

In [None]:
import time
import pandas as pd
import pickle
from threading import Thread

rows = []
file = open("knn_data/log.txt","a+")
data = t
for RA in range(360,361):
    for r in range(1,101):
        if (r<=RA):
            cond_x = (data['RA']>= 0)&(data['RA']<RA)
            mask = np.where(cond_x)
            data_aux = data[mask]
            n = len(data_aux)
            start = time.process_time()
            res = knn_method2(data_aux,r,0,1)
            end = time.process_time()
            wait = end - start
            if len(res)==0:
                wait = -2
                file.write(str(RA)+" "+str(n)+" "+str(r)+" "+str(wait)+"\n")
            print(RA, n, r, wait)
            rows.append([RA, n, r, wait])
    if (RA%2== 0):
        df = pd.DataFrame(rows,
                columns=['RA', 'n', 'r', 'time'])
        pickle.dump( df, open( "knn_data/0_to_"+str(RA)+".p", "wb" ) )
        print("saved to knn_data/0_to_"+str(RA)+".p")
        rows = []
file.close()

360 248069 1 36.100528151999995
360 248069 2 12.629428284
360 248069 3 13.96797358900001
360 248069 4 21.733696347000006
360 248069 5 34.748350199000015
360 248069 6 51.65969623999999
360 248069 7 70.497747785
360 248069 8 93.48833544799999
360 248069 9 124.21218886499997
360 248069 10 142.00342176100003
360 248069 11 171.2719559850001
360 248069 12 200.403591313
360 248069 13 258.72606011200014
360 248069 14 -2
360 248069 15 -2
360 248069 16 -2
360 248069 17 -2
360 248069 18 -2
360 248069 19 -2
360 248069 20 -2
360 248069 21 -2
360 248069 22 -2
360 248069 23 -2
360 248069 24 -2
360 248069 25 -2
360 248069 26 -2
360 248069 27 -2
360 248069 28 -2


360 248069 1 32.888277156
360 248069 2 12.062724816
360 248069 3 13.186072795999998
360 248069 4 21.043711244
360 248069 5 33.53531587500001
360 248069 6 49.970483421
360 248069 7 68.92603013200002
360 248069 8 91.68760037399997
360 248069 9 120.15328979099996
360 248069 10 139.052238885
360 248069 11 167.714093251
360 248069 12 195.40970868600004
360 248069 13 245.40387419899992

#### Se guardan los datos asociados a los tiempos de ejecución y se cargan para prodecer con los gráficos de desempeño

In [16]:
import pickle
import numpy as np
import pandas as pd
flag = 0
for i in range(5,86,5):
    num = str(i)
    temp_df = pickle.load( open( "knn_data/0_to_"+num+".p", "rb" ) )
    if flag==0:
        df = temp_df
        flag=1
    else:
        df = df.append(temp_df)

In [17]:
arreglo = df.values

In [40]:
%matplotlib notebook
import matplotlib.pyplot as plt

plt.figure()
plt.hold
for i in range(80,87,1):
    mask = np.where(arreglo[:,2]==i)
    arreglo_aux = arreglo[mask]
    plt.plot(arreglo_aux[:,-3],arreglo_aux[:,-1],label='r = '+str(i))
    
plt.legend()
plt.ylabel("t [s]")
plt.xlabel("n [cant. datos]")
plt.show()

<IPython.core.display.Javascript object>

In [32]:
int(max(arreglo[:,1])/5)

5822

In [56]:
%matplotlib notebook
plt.figure()
plt.hold
for i in range(10,int(max(arreglo[:,0])),10):
    mask = np.where(arreglo[:,0]==i)
    arreglo_aux = arreglo[mask]
    plt.plot(arreglo_aux[:,2],arreglo_aux[:,-1],label='RA = '+str(i))
    
plt.legend()
plt.ylabel("t [s]")
plt.xlabel("r [°]")
plt.show()

<IPython.core.display.Javascript object>

In [None]:
arreglo[:,0]

In [None]:
%matplotlib notebook
plt.figure()
plt.scatter(arreglo[:,-3],arreglo[:,-1],s=2)
plt.ylabel("t [s]")
plt.xlabel("r [°]")
plt.show()

In [None]:
y_0inf = int(y_0) if y_0%1 == 0 else int(y_0-1)
xcenter = 3*r+0.5*r
ycenter = 4*r+0.5*r+y_0inf
xlim_izq = xcenter-1.5*r
xlim_der = xcenter+1.5*r
ylim_inf = ycenter-1.5*r
ylim_sup = ycenter+1.5*r

cond_x = (data['RA']>= xlim_izq)&(data['RA']<xlim_der)
cond_y = (data['DEC']>= ylim_inf)&(data['DEC']< ylim_sup)
mask_op_sect = np.where(cond_x&cond_y)
data_aux = data[mask_op_sect]
cond_x = (data_aux['RA']>= xcenter-0.5*r)&(data_aux['RA']<xcenter+0.5*r)
cond_y = (data_aux['DEC']>= ycenter-0.5*r)&(data_aux['DEC']< ycenter+0.5*r)
mask_int_sect = np.where(cond_x&cond_y)
coordx = np.expand_dims(np.array(data_aux['RA']),axis=1)
coordy = np.expand_dims(np.array(data_aux['DEC']),axis=1)
data_aux = np.append(coordx,coordy,axis=1)
distances = compute_squared_EDM_method5(data_aux)
distances = distances[mask_int_sect]
knn_2 = k_search(distances,r)
resultado = np.append(data_aux[mask_int_sect], knn_2, axis=1)


In [None]:
col1 = 3
row1 = 4
resultado2 = np.append(data_aux[mask_int_sect], knn_2, axis=1)

In [None]:
row = []
row.append(np.array([1]))
row.append(np.array([2]))
row.append(np.array([3]))
row.append(resultado)

In [None]:
row = np.expand_dims(row,axis=1)

In [None]:
np.ndim(row)

In [None]:
compute_squared_EDM_method5(np.array(data_aux))
array([[ 1.02744 , -8.311936, 19.      ],
       [ 1.028205, -8.312104, 19.      ],
       [ 1.028415, -8.31335 , 19.      ],
       ...,
       [27.48597 , -8.194771, 11.      ],
       [27.485985, -8.195136, 11.      ],
       [27.489495, -8.200164, 11.      ]])


### Estrategia 2
#### Pasos:
* Definir parámetros
* Obtener datos solo para la zona que se analizará
* Inicializar matríz de sectores
* Calcular matríz distancia por sectores de operación