In [1]:
from set_notebook_path import check_path
check_path()

In [None]:
import numpy as np
import pandas as pd
import re
import openpyxl

In [2]:
def get_data_from_excel(excel_path):
    xf = pd.ExcelFile(excel_path)
    sheet_names = xf.sheet_names
    sheet_name_regex = re.compile("^Station Coords - N X Y Z[w+a-zA-Z0-9]*")
    candidate_sheet_names = [x for x in sheet_names if sheet_name_regex.search(x)]
    sheet_name = None
    if len(candidate_sheet_names) == 0:
        raise ValueError("No valid sheet names found.")
    elif len(candidate_sheet_names) >= 1:
        sheet_name = candidate_sheet_names[0]
    df = xf.parse(sheet_name=sheet_name, header=2)
    headers = df.columns.values.tolist()
    if headers[0] != "Phone":
        raise ValueError("Headers do not match expected value.")
    x_header = headers[1]
    y_header = headers[2]
    z_header = headers[3]
    x_points = df[x_header]
    y_points = df[y_header]
    z_points = df[z_header]
    return x_points, y_points, z_points


def get_points_on_projection(start_point, end_point, to_project):
    def get_point_on_projection(point_to_project):
        # Based on solution here: https://stackoverflow.com/questions/61341712/calculate-projected-point-location-x-y-on-given-line-startx-y-endx-y
        # Find distance between start and end points
        l2 = np.sum((start_point - end_point) ** 2)
        if l2 == 0:
            raise ValueError("Start and end points are the same point!")
        t = np.sum((point_to_project - start_point) * (end_point - start_point)) / l2
        return start_point + t * (end_point - start_point)

    return np.apply_along_axis(get_point_on_projection, 1, to_project)


def calc_array_distance(initial_point, distance_points):
    return np.linalg.norm(initial_point - distance_points, axis=1)


def get_geom_func_from_excel(excel_path, method: str = "naive"):
    method = method.lower()

    # Extract raw values
    x_points, y_points, z_points = get_data_from_excel(excel_path)
    xy_points = np.column_stack([x_points, y_points])
    xyz_points = np.column_stack([x_points, y_points, z_points])

    # Get peak elevation
    peak_elevation = np.max(z_points)
    depths = z_points - peak_elevation

    positions = None
    if method == "naive":
        positions = calc_array_distance(xy_points[0], xy_points)
    elif method == "2d_fit":
        fit_line_x = np.unique(x_points)
        fit_line_y = np.poly1d(np.polyfit(x_points, y_points, 1))(np.unique(x_points))
        fit_line = np.column_stack([fit_line_x, fit_line_y])
        projected_points = get_points_on_projection(fit_line[0], fit_line[-1], xy_points)
        projected_points_x = projected_points[:, 0]
        projected_points_y = projected_points[:, 1]
        positions = calc_array_distance(projected_points[0], projected_points)
    elif method == "3d_fit":
        raise NotImplementedError()
    else:
        raise ValueError("Method must be 'naive' or '2d_fit'")

    # Generate geometry function
    geom_interp_func = lambda x: np.interp(x, positions, depths)
    return geom_interp_func, peak_elevation

