# Boundary mask builder

### Note: Use the epi-paint kernel for alphashape

What is this used for?
- Once the boundary detection script is run, and the Lamin outline is clearly extracted, this notebook helps to build a polygon around the lamin.
- This polygon is then used to extract the locs within the polygon from all the other channels and then save them for future processing. 

Workflow
- Define the folder to the cleaned up data and the files.
- Define the file with the boundary data.
- Import the Lamin_boundary.hdf5 file and extract the localizations. 
- Use alphashape to build the polygon around the shape. This should export the polygon. 
- Use the polygon as a mask and then clean all the imaging channels. 

In [None]:
# Import dependencies

import alphashape as alphashape
from matplotlib.path import Path
import numpy as _np
import os.path as _ospath
import os as _os
import h5py as _h5py
import yaml as _yaml
from PyQt5.QtWidgets import QMessageBox as _QMessageBox


In [None]:
# Define the folder and the files with the data

folder = '' # Folder name for specific cell.
file_extn = '.hdf5'
file_names = [f for f in _os.listdir(folder) if f.endswith(file_extn)]

In [None]:
# Define the output folder

parent_folder, working_folder = _ospath.split(folder)
output_folder = _ospath.join(parent_folder, working_folder , 'Masked')
if not _ospath.exists(output_folder):
    _os.makedirs(output_folder)

In [None]:
# Picasso Functions Used Here

def load_locs(path, qt_parent=None):
    with _h5py.File(path, "r") as locs_file:
        locs = locs_file["locs"][...]
    locs = _np.rec.array(
        locs, dtype=locs.dtype
    )  # Convert to rec array with fields as attributes
    info = load_info(path, qt_parent=qt_parent)
    return locs, info

class NoMetadataFileError(FileNotFoundError):
    pass

def load_info(path, qt_parent=None):
    path_base, path_extension = _ospath.splitext(path)
    filename = path_base + ".yaml"
    try:
        with open(filename, "r") as info_file:
            info = list(_yaml.load_all(info_file, Loader=_yaml.UnsafeLoader))
    except FileNotFoundError as e:
        print("\nAn error occured. Could not find metadata file:\n{}".format(filename))
        if qt_parent is not None:
            _QMessageBox.critical(
                qt_parent,
                "An error occured",
                "Could not find metadata file:\n{}".format(filename),
            )
        raise NoMetadataFileError(e)
    return info

def save_info(path, info, default_flow_style=False):
    with open(path, "w") as file:
        _yaml.dump_all(info, file, default_flow_style=default_flow_style)

def ensure_sanity(locs, info):
    """Ensures that localizations are within the image dimensions
    and have positive localization precisions.
    
    Parameters
    ----------
    locs : np.rec.array
        Localizations list.
    info : list of dicts
        Localization metadata.
    
    Returns
    -------
    locs : np.rec.array
        Localizations that pass the sanity checks.
    """
    
    # no inf or nan:
    locs = locs[
        _np.all(
            _np.array([_np.isfinite(locs[_]) for _ in locs.dtype.names]),
            axis=0,
        )
    ]
    # other sanity checks:
    locs = locs[locs.x > 0]
    locs = locs[locs.y > 0]
    locs = locs[locs.x < info[0]["Width"]]
    locs = locs[locs.y < info[0]["Height"]]
    locs = locs[locs.lpx > 0]
    locs = locs[locs.lpy > 0]
    return locs

def save_locs_withSuffix(path, locs, info, suffix=''):
    locs = ensure_sanity(locs, info)
    base, ext_locs = _ospath.splitext(path)
    output_locs_path = base + '_' + suffix + ext_locs    
    output_info_path = base + '_' + suffix + '.yaml'
    with _h5py.File(output_locs_path, "w") as locs_file:
        locs_file.create_dataset("locs", data=locs)
    save_info(output_info_path, info, default_flow_style=False)

In [None]:
# Find the file with the string 'boundary' in the file name

for file in file_names:
    if 'boundary' in file:
        boundary_file = _ospath.join(folder, file)
        print('The boundary file {} is found'.format(file))
        file_names.remove(file)
        break

# Load the boundary data
boundary_locs, boundary_info = load_locs(boundary_file)

# Extract the x and y coordinates of the boundary
boundary_x = boundary_locs['x']
boundary_y = boundary_locs['y']

alpha_points = _np.array([[_x, _y] for _x, _y in zip(boundary_x, boundary_y)])

In [None]:
# Extract polygon from the boundary data

alpha_shape = alphashape.alphashape(alpha_points, 0.0)
alpha_shape

In [None]:
# Get the coordinates of the polygon

polygon_coords = _np.array(alpha_shape.exterior.coords)
polygon_path = Path(polygon_coords)
_np.savetxt(_ospath.join(output_folder, 'polygon_coords.csv'), polygon_coords, delimiter=',')

In [None]:
# Filter the points inside the polygon for the other channels

for file in file_names:
    fpath = _ospath.join(folder, file)
    locs, info = load_locs(fpath)
    points = _np.column_stack((locs['x'], locs['y']))
    inside_mask = polygon_path.contains_points(points)
    filtered_locs = locs[inside_mask]
    output_path = _ospath.join(output_folder, file)
    save_locs_withSuffix(output_path, filtered_locs, info, suffix='Masked')
    print('The file {} is processed'.format(file))