In [191]:
# CREATED: 4-APR-2022
# LAST EDIT: 4-APR-2022
# AUTHOR: DUANE RINEHART
'''
IMPORT POLYGON POINTS FROM CSV (EVENTUALLY PULL FROM RDBMS)
Note: polygons_dump5.csv GENERATED WITH QUERY - SELECT * FROM annotations_points WHERE (prep_id='DK55' AND (label='7N_L' OR label='5N_L')) OR (prep_id='MD594' AND (label='SC' OR label='IC'))
'''
import os
import pandas as pd
import numpy as np

srcpath = "."
infile = os.path.join(srcpath, "polygons_dump5.csv")


def load_raw_data(infile):
    return pd.read_csv(infile, sep='\t', index_col=0)


def parse_df(df_raw_data, prep_id, label, xy_resolution_pixels_micronss, z_resolution_section_to_microns):
    df_raw_data['x'] = df_raw_data['x'].apply(lambda x: x * xy_resolution_pixels_microns)
    df_raw_data['y'] = df_raw_data['y'].apply(lambda y: y * xy_resolution_pixels_microns)
    df_raw_data['z'] = df_raw_data['z'].apply(lambda z: z / z_resolution_section_to_microns)
    df_raw_data["unique_key"] =  df_raw_data["prep_id"].astype(str) + '_' + df_raw_data["label"].astype(str) + '_' + df_raw_data["z"].round().astype(int).astype(str)
    df_raw_data["coord"] = list(zip(df_raw_data['x'], df_raw_data["y"], df_raw_data["z"]))
    df_raw_data[(df_raw_data['prep_id'] == prep_id) & (df_raw_data['label'] == label)]
    selected_columns = df_raw_data[
        ["unique_key", "prep_id", "label", "coord", "z", "polygon_id", "volume_id", "ordering"]
    ]  # EXTRACT SPECIFIC COLUMNS FOR ANALYSIS
    return selected_columns


df_raw_data = load_raw_data(infile) # REPLACE W/ RDBMS CALL FOR PRODUCTION


# FILTERING CONSTANTS
xy_resolution_pixels_microns = .452 # STORED IN DB
z_resolution_section_to_microns = 20 # STORED IN DB

# POLYGON SETS BY prep_id, label CREATED INDIVIDUALLY BELOW FOR TESTING
prep_id = 'MD594'
label = 'SC'
extracted_columns_MD594_SC = parse_df(df_raw_data, prep_id, label, xy_resolution_pixels_microns, z_resolution_section_to_microns)
label = 'IC'
extracted_columns_MD594_IC = parse_df(df_raw_data, prep_id, label, xy_resolution_pixels_microns, z_resolution_section_to_microns)
prep_id = 'DK55'
label = '7N_L'
extracted_columns_DK55_7N_L = parse_df(df_raw_data, prep_id, label, xy_resolution_pixels_microns, z_resolution_section_to_microns)
label = '7N_R'
extracted_columns_DK55_7N_R = parse_df(df_raw_data, prep_id, label, xy_resolution_pixels_microns, z_resolution_section_to_microns)

consolidated_coordinates = pd.concat([extracted_columns_MD594_SC, extracted_columns_MD594_IC, extracted_columns_DK55_7N_L, extracted_columns_DK55_7N_R])

# CREATE DICTIONARY OF COORDINATES
transformed_polygon_structures = consolidated_coordinates.groupby(['unique_key'])['coord'].apply(lambda x: list(np.unique(x))).to_dict()

# OUTPUT FORMAT:
# -KEY IS CONCATENATED prep_id_structure_section ("_" delimiter)
# -VALUE IS LIST OF COORDINATE TUPLES (x, y, z)
#
# TO ACCESS [EXAMPLE WITH prep_id 'MD594', label 'IC', section 140]:
polygon_index = 'MD594_IC_140'
for coordinate_list in transformed_polygon_structures[polygon_index]:
    print(coordinate_list)

(5320.718, 1934.0673199999999, 140.0)
(5324.2436, 1919.8564400000002, 140.0)
(5327.8144, 1905.64556, 140.0)
(5331.385200000001, 1941.1727600000002, 140.0)
(5334.9108, 1891.4301600000001, 140.0)
(5338.4816, 1880.7720000000002, 140.0)
(5338.4816, 1951.83092, 140.0)
(5345.578, 1866.5566000000001, 140.0)
(5345.578, 1962.48908, 140.0)
(5349.1035999999995, 1855.8984400000002, 140.0)
(5352.674400000001, 1973.14724, 140.0)
(5356.2452, 1845.23576, 140.0)
(5359.816, 1983.8054, 140.0)
(5366.867200000001, 1834.5776, 140.0)
(5374.0088, 1987.3581200000003, 140.0)
(5377.5344000000005, 1827.46764, 140.0)
(5381.1052, 1983.8054, 140.0)
(5384.6308, 1820.3622, 140.0)
(5391.772400000001, 1980.24816, 140.0)
(5395.298, 1809.6995200000001, 140.0)
(5405.965200000001, 1802.59408, 140.0)
(5405.965200000001, 1973.1427199999998, 140.0)
(5413.0616, 1795.48412, 140.0)
(5416.6324, 1969.58548, 140.0)
(5423.7288, 1788.37868, 140.0)
(5430.870400000001, 1962.4800400000001, 140.0)
(5434.396, 1777.7160000000001, 140.0)
(54