In [253]:
%reset -f
%load_ext autoreload
%autoreload 2

import sys
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Make fake dataset
~15,000,000 points of (x,y,z,xvel, yvel, zvel)

## Choose the size of each dimension

In [254]:
grid_size = {'x': 250, 'y': 250, 'z': 250}

## Calculate total number of points & make vectors for each dimension

In [255]:
N = 1
for k in grid_size.keys():
    N = N * grid_size[k]
    
print(N, 'points')

15625000 points


In [256]:
grid_vector = {}
for k in grid_size.keys():
    grid_vector[k] = np.arange(0, grid_size[k], step=1)

grid_vector

{'x': array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
        130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
        143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
        156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
        169, 170, 171, 172, 173, 174, 175, 176

## Convert vectors to 3D points

In [257]:
xv, yv, zv = np.meshgrid(grid_vector['x'], grid_vector['y'], grid_vector['z'], indexing='ij')

## Convert 3D coordinates to a 2D list of coordinates

In [258]:
data = np.array([xv, yv, zv]).reshape(3, -1).T
data_df = pd.DataFrame(data, columns=['x', 'y', 'z'])
data_df

Unnamed: 0,x,y,z
0,0,0,0
1,0,0,1
2,0,0,2
3,0,0,3
4,0,0,4
...,...,...,...
15624995,249,249,245
15624996,249,249,246
15624997,249,249,247
15624998,249,249,248


## Add some other values you  may want to pull out
These would correspond to xvel, yvel, zvel, etc.

In [259]:
data_df_augment = data_df.copy()
data_df_augment['val1'] = data_df_augment['z']**2
data_df_augment['val2'] = np.sin(data_df_augment['x']) + np.sin(data_df_augment['y']) + + np.sin(data_df_augment['z'])
data_df_augment['val3'] = np.random.normal(loc=0.0, scale=1.0, size=data_df_augment['val1'].shape[0])
data_df_augment

Unnamed: 0,x,y,z,val1,val2,val3
0,0,0,0,0,0.000000,0.063320
1,0,0,1,1,0.841471,-2.184887
2,0,0,2,4,0.909297,0.051739
3,0,0,3,9,0.141120,-1.565275
4,0,0,4,16,-0.756802,2.263395
...,...,...,...,...,...,...
15624995,249,249,245,60025,-1.498539,0.962556
15624996,249,249,246,60516,-0.637566,-0.261776
15624997,249,249,247,61009,-0.527519,-0.396644
15624998,249,249,248,61504,-1.269574,0.194666


## Set point to pull out

In [260]:
point = {'x': 30.2, 'y': 14.7, 'z': 28.9}

## Visualize some values in 3D space
Note, this will take forever. You may want to comment it out or try it with a smaller gird size

In [261]:
# # %matplotlib widget
# ax = plt.figure().add_subplot(projection='3d')
# ax.scatter(data_df_augment['x'],
#            data_df_augment['y'],
#            data_df_augment['z'],
#            c=data_df_augment['val2'],
#            marker='.', cmap='plasma', alpha=0.02)
# 
# ax.scatter(point['x'],
#            point['y'],
#            point['z'],
#            c='black',
#            marker='.', s=100, label='point to pull out')
# 
# ax.legend()
# plt.show()

# Find point index
Find the point in the grids

In [262]:
point_min = {}
for k in point.keys():
    point_min[k] = np.argmin(np.abs(grid_vector[k] - point[k]))
    
point_min

{'x': 30, 'y': 15, 'z': 29}

Now find the same point in the data-frame using logical indexing

In [263]:
point_logical = (data_df_augment['x'] == point_min['x']) & (data_df_augment['y'] == point_min['y']) & (data_df_augment['z'] == point_min['z'])
# point_logical

In [264]:
point_index = int(np.squeeze(np.where(point_logical)[0]).astype(int))
point_index

1878779

# Pull out point
Note it pulls out the closest point to: point = {'x': 30.2, 'y': 14.7, 'z': 28.9}

In [267]:
st = time.time()
# data_point = data_df_augment.iloc[point_logical.values, :] # alternative
data_point = data_df_augment.iloc[[point_index], :]
et = time.time()
delta = et - st
print('elapsed time for spite test:', delta)
data_point

elapsed time for spite test: 0.00099945068359375


Unnamed: 0,x,y,z,val1,val2,val3
1878779,30,15,29,841,-1.001378,0.827992


# Time the whole thing

In [274]:
st = time.time()

point_min = {}
for k in point.keys():
    point_min[k] = np.argmin(np.abs(grid_vector[k] - point[k]))

point_logical = (data_df_augment['x'] == point_min['x']) & (data_df_augment['y'] == point_min['y']) & ( data_df_augment['z'] == point_min['z'])

point_index = int(np.squeeze(np.where(point_logical)[0]).astype(int))

data_point = data_df_augment.iloc[[point_index], :]

et = time.time()
delta = et - st

print('elapsed time for total spite test:', delta)
data_point

elapsed time for total spite test: 0.03400015830993652


Unnamed: 0,x,y,z,val1,val2,val3
1878779,30,15,29,841,-1.001378,0.827992
