In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.utils import class_weight
from sklearn.decomposition import PCA
from imblearn.combine import SMOTEENN
import time

# Additional
import open3d as o3d

In [None]:
startTime = time.time()

In [None]:
VAL_SPLIT = 0.1 # 10%
STATISTICS = ["median", "std", "max", "min"]
CELL_SIZE = 0.1 # 0.5 meters

In [None]:
data = pd.read_csv("data/felt1_points.csv")

In [None]:
def area_of_dataframe(dataframe, x_start, x_last, y_start, y_last):
    area_dataframe = dataframe[
        (dataframe["x"] >= x_start) &
        (dataframe["x"] <= x_last) & 
        (dataframe["y"] >= y_start) & 
        (dataframe["y"] <= y_last)
    ]
    return area_dataframe

xMax = data.x.max()/10
yMax = data.y.max()/10
data_area = area_of_dataframe(data, 0, 0+xMax, 0, 0+yMax)
data_area

In [None]:
def visualize_area(data_area):
    _x = np.array(data_area.x)
    _y = np.array(data_area.y)
    _z = np.array(data_area.z)

    _red = np.array(np.zeros(len(data_area.x)))
    _green = np.array(np.zeros(len(data_area.x)))
    _blue = np.array(np.zeros(len(data_area.x)))

    acceptance = np.array(data_area.accepted)
    for idx, val in enumerate(acceptance):
        
        if val > 0:
            _green[idx] = 65535
        else:
            _red[idx] = 65535

    pcd = o3d.geometry.PointCloud()

    pcd.points = o3d.utility.Vector3dVector(np.vstack((_x, _y, _z)).transpose())
    pcd.colors = o3d.utility.Vector3dVector(np.vstack((_red, _green, _blue)).transpose()/65535)

    voxel_grid=o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=0.02)

    o3d.visualization.draw_geometries([voxel_grid])

In [None]:
visualize_area(data_area)

In [None]:
def dataframe_to_cells(dataframe, cells_in_x, cells_in_y):
    cells = []

    for ix, x in enumerate(cells_in_x):
        for iy, y in enumerate(cells_in_y):
            if ix != 0 and iy != 0:
                cell = area_of_dataframe(dataframe, cells_in_x[ix-1], x, cells_in_y[iy-1], y)
                cells.append(cell) # Use np.array here(?)
    
    return cells

# Turn into cells before normalizing to ensure 0.5m areas
cells_in_x = np.arange(0, xMax+CELL_SIZE, CELL_SIZE)
cells_in_y = np.arange(0, yMax+CELL_SIZE, CELL_SIZE)
cells = dataframe_to_cells(data_area, cells_in_x, cells_in_y)

In [None]:
print(xMax)
print(len(cells_in_x))
print(yMax)
print(len(cells_in_y))

In [None]:
def normalize_cells_area(cells, MinMaxArea):
    normalized_cells = []

    for cell in cells:
        norm_cell = (cell - MinMaxArea.min()) / (MinMaxArea.max() - MinMaxArea.min())
        normalized_cells.append(norm_cell)

    return normalized_cells
    
# Min-Max normalization
norm_cells = normalize_cells_area(cells, data_area)

In [1]:
def get_cell_neighbors(cells, cells_in_x, cells_in_y, num_neighbors):
    num_cells_in_y = len(cells_in_y)-1
    neighbors = {}

    for cell_idx in range(len(cells)):
        if cell_idx < num_cells_in_y*num_neighbors or cell_idx >= len(cells)-num_cells_in_y*num_neighbors:
            continue # Continue on horizontal edges
        if (cell_idx % num_cells_in_y) < num_neighbors or (cell_idx % num_cells_in_y) >= num_cells_in_y-num_neighbors:
            continue # Continue on vertical edges

        neighbors[cell_idx] = []
        for nx in range(1, num_neighbors+1):
            neighbors[cell_idx].append(cell_idx-(num_cells_in_y*nx)) # Neighbors to the left
            neighbors[cell_idx].append(cell_idx-nx) # Neighbors above
            neighbors[cell_idx].append(cell_idx+nx) # Neighbors below
            neighbors[cell_idx].append(cell_idx+(num_cells_in_y*nx)) # Neighbors to the right
            for ny in range(1, num_neighbors+1):       
                neighbors[cell_idx].append(cell_idx-((num_cells_in_y*nx)+ny)) # Left corners
                neighbors[cell_idx].append(cell_idx-((num_cells_in_y*nx)-ny)) # Left corners
                neighbors[cell_idx].append(cell_idx+((num_cells_in_y*nx)-ny)) # Right corners
                neighbors[cell_idx].append(cell_idx+((num_cells_in_y*nx)+ny)) # Right corners

    return neighbors

# Get neighbor cells
neighbors = 2
neighbors = get_cell_neighbors(norm_cells, cells_in_x, cells_in_y)

In [None]:
def cell_statistics(cell):
    median = np.median(cell.z)
    std = np.std(cell.z)
    mx = np.max(cell.z)
    mn = np.min(cell.z)

    return median, std, mx, mn

In [None]:
def calculate_statistics(neighbors, cells):
    # Get all statistics
    for key in neighbors:
        # Skip if there is no points in the cell
        if len(cells[key]) == 0: 
            continue

        # Cell indexes to check for statistics
        cell_indexes = [key] + neighbors[key]

        # Find statistics if each cell
        statistics = []
        for idx in cell_indexes:
            if len(cells[idx]) == 0:
                # Empty statistics if neighbor has nothing to show for
                for i in range(len(STATISTICS)):
                    statistics.append(-1)
            else:
                median, std, mx, mn = cell_statistics(cells[idx]) 
                statistics.append(median)
                statistics.append(std)
                statistics.append(mx)
                statistics.append(mn)

        # Add it to the current cell
        for idx, stats in enumerate(statistics):
            col_name = STATISTICS[(idx % len(STATISTICS))] + str(idx)
            cells[key][col_name] = stats

    return cells

# Calculate statistics
stat_cells = calculate_statistics(neighbors, norm_cells)

In [None]:
def get_x_and_y(wanted_indexes, norm_cells, cells,):
    # Pick out our wanted indexes
    wanted_cells = []
    for idx in wanted_indexes:
        if len(cells[idx]) == 0:
            continue
        wanted_cells.append(norm_cells[idx])

    # Combine the cells as one dataframe
    combined_cells = pd.concat(wanted_cells, ignore_index=True)

    # Get x columns
    cols = []
    for col in norm_cells[wanted_indexes[0]].columns:
        if col == "x":
            continue
        if col == "y":
            continue
        if col == "accepted":
            continue
        cols.append(col)

    # Get train and test from combined cells
    x = np.array(combined_cells.loc[:, cols])
    y = np.array(combined_cells.accepted)

    return x, y

# Prepare the data from the dataset
train_split = int(len(non_edge_cells) * (1 - VAL_SPLIT))

train_cell_indexes = non_edge_cells[:train_split]
test_cell_indexes = non_edge_cells[train_split:]

x_train, y_train = get_x_and_y(train_cell_indexes, stat_cells, cells)
x_val, y_val = get_x_and_y(test_cell_indexes, stat_cells, cells)

In [None]:
executionTime = (time.time() - startTime)
print('Execution time in seconds: ' + str(executionTime))