# One Jupyter notebook. From GeoJSON to .xml 

## Imports

In [118]:
import geopandas
import pandas
import numpy
import tifffile
import shapely
from shapely.geometry import Point
# import streamlit as st
from lmd.lib import SegmentationLoader
from lmd.lib import Collection, Shape
from lmd import tools
from PIL import Image
from pathlib import Path
import ast
import string

In [119]:
from loguru import logger
import sys
logger.remove()
logger.add(sys.stdout, format="<green>{time:HH:mm:ss.SS}</green> | <level>{level}</level> | {message}")

2

## From GeoJSON to Dataframe

In [4]:
#user defined variables:
path_to_file = '/Users/jnimoca/Desktop/M1_l4_toprightextp3_Composite.geojson'
path_to_save_files = '/Users/jnimoca/Desktop/'
#the naming of these should match exactly the names in the geoJSON file
calibration_points = ['calib1','calib2','calib3']

### Samples and wells

In [3]:
# if you want to pass a custom well loadout use the following dictionary:  
# The dictionary keys will have to be the class name used in qupath, it has to be the exact names.  
# please choose wells with a 2-row 2-column margin. not rows (A, B, N, O) not columns (1,2,23,24)  

In [4]:
samples_and_wells = { 
 '187_HBC_mid_3': 'C3',
 '187_HBC_nVEC_1': 'C4',
 '187_HBC_nVEC_2': 'C5',
 '187_HBC_mid_2': 'C6',
 '187_HBC_nTroph_3': 'C7',
 '187_HBC_nTroph_1': 'C8',
 '187_HBC_mid_1': 'C9',
 '187_HBC_nVEC_3': 'C10',
 '187_HBC_nTroph_2': 'C11',
 '108_STB_nVEC_1': 'F3',
 '108_STB_nVEC_2': 'F4',
 '108_STB_nVEC_3': 'F5',
 '108_STB_gen_1': 'F6',
 '108_STB_gen_2': 'F7',
 '108_STB_gen_3': 'F8',
 '108_CTB_1_1': 'F9',
 '108_CTB_2_1': 'F10',
 '108_CTB_3_1': 'F11',
 '108_HBC_nTroph_1': 'G3',
 '108_HBC_nTroph_2': 'G4',
 '108_HBC_nTroph_3': 'G5',
 '108_HBC_nVEC_1': 'G6',
 '108_HBC_nVEC_2': 'G7',
 '108_HBC_nVEC_3': 'G8',
 '108_HBC_mid_1': 'G9',
 '108_HBC_mid_2': 'G10',
 '108_HBC_mid_3': 'G11',
 '108_CCT_1': 'H3',
 '108_CCT_2': 'H4',
 '108_CCT_3': 'H5',
 '108_VEC_1': 'H6',
 '108_VEC_2': 'H7',
 '108_VEC_3': 'H8',
 '108_Str_1': 'H9',
 '108_Str_2': 'H10',
 '108_Str_3': 'H11',
 '187_STB_nVEC_1': 'D3',
 '187_STB_nVEC_2': 'D4',
 '187_STB_nVEC_3': 'D5',
 '187_STB_gen_1': 'D6',
 '187_STB_gen_2': 'D7',
 '187_STB_gen_3': 'D8',
 '187_CTB_1': 'D9',
 '187_CTB_2': 'D10',
 '187_CTB_3': 'D11',
 '187_CCT_1': 'E3',
 '187_CCT_2': 'E4',
 '187_CCT_3': 'E5',
 '187_VEC_1': 'E6',
 '187_VEC_2': 'E7',
 '187_VEC_3': 'E8',
 '187_str_1': 'E9',
 '187_str_2': 'E10',
 '187_str_3': 'E11'
 }

## Code

In [133]:
def load_and_QC_geojson_file(geojson_path: str, list_of_calibpoint_names: list = ['calib1','calib2','calib3']):

    #load geojson file
    df = geopandas.read_file(geojson_path)
    logger.info(f"Geojson file loaded with shape {df.shape}")
    
    try:
        df['annotation_name'] = df['name']
    except:
        logger.warning('No name column found, meaning no annotation in Qupath was named, at least calibration points should be named')
        sys.exit()

    geometry_counts = df.geometry.geom_type.value_counts()
    log_message = ", ".join(f"{count} {geom_type}s" for geom_type, count in geometry_counts.items())
    logger.info(f"Geometries in DataFrame: {log_message}")

    #save calib points in a list
    caliblist = []
    for point_name in list_of_calibpoint_names:
        if point_name in df['annotation_name'].unique():
            caliblist.append(df.loc[df['annotation_name'] == point_name, 'geometry'].values[0])
        else:
            logger.error(f'Your given annotation_name {point_name} is not present in the file')
            logger.error(f'These are the calib points you passed: {list_of_calibpoint_names}')
            logger.error(f"These are the calib points found in the geojson you gave me: {df[df['geometry'].geom_type == 'Point']['annotation_name']}")

            # print('Your given annotation_name is not present in the file  \n', 
            # f'These are the calib points you passed: {list_of_calibpoint_names}  \n',
            # f"These are the calib points found in the geojson you gave me: ")
            # st.table(df[df['geometry'].geom_type == 'Point']['annotation_name'])

    #remove points
    df = df[df['geometry'].apply(lambda geom: not isinstance(geom, shapely.geometry.Point))]
    logger.debug(f"Point geometries have been removed")

    #check and remove empty classifications
    if df['classification'].isna().sum() !=0 :
        logger.info(f"You have {df['classification'].isna().sum()} NaNs in your classification column")
        logger.info(f"These are unclassified objects from Qupath, they will be removed")
        df = df[df['classification'].notna()]

    #rename classification
    df['classification_name'] = df['classification'].apply(lambda x: x.get('name'))

    #check for MultiPolygon objects
    logger.debug(f"Checking for MultiPolygon objects")
    if 'MultiPolygon' in df.geometry.geom_type.value_counts().keys():
        logger.debug(f"MultiPolygon objects present:  {df.geometry.geom_type.value_counts()['MultiPolygon']}")
        logger.debug(f"These are not supported, and will be removed")
        df = df[df.geometry.geom_type != 'MultiPolygon']
        # st.table(df[df.geometry.geom_type == 'MultiPolygon'][['annotation_name','classification_name']])
    
    def extract_coordinates(geometry):
        if geometry.geom_type == 'Polygon':
            return [list(coord) for coord in geometry.exterior.coords]
        elif geometry.geom_type == 'LineString':
            return [list(coord) for coord in geometry.coords]
        else:
            return None 

    logger.debug(f"Extracting coordinates from geometries")
    df['coords'] = df.geometry.simplify(1).apply(extract_coordinates)

    logger.success(f"QC of geojson file complete")

In [8]:
load_and_QC_geojson_file(path_to_file, calibration_points)

you have 3 NaNs in your classification column these are unclassified objects from Qupath, they will be ignored


AttributeError: 'LineString' object has no attribute 'exterior'

In [99]:
json_single_LineString = "/Users/jnimoca/Desktop/LineStringTest.geojson"
list_of_calibpoint_names = ['calib1','calib2','calib3']

In [100]:
#load geojson file
df = geopandas.read_file(json_single_LineString)
df['annotation_name'] = df['name']

In [101]:
#save calib points in a list
caliblist = []
for point_name in list_of_calibpoint_names:
    if point_name in df['annotation_name'].unique():
        caliblist.append(df.loc[df['annotation_name'] == point_name, 'geometry'].values[0])
    else:
        print('Your given annotation_name is not present in the file  \n', 
        f'These are the calib points you passed: {list_of_calibpoint_names}  \n',
        f"These are the calib points found in the geojson you gave me: ")
        # st.table(df[df['geometry'].geom_type == 'Point']['annotation_name'])

In [102]:
df = df[df['geometry'].apply(lambda geom: not isinstance(geom, shapely.geometry.Point))]

if df['classification'].isna().sum() !=0 :
    print(f"you have {df['classification'].isna().sum()} NaNs in your classification column",
        "these are unclassified objects from Qupath, they will be ignored") 
    df = df[df['classification'].notna()]

In [103]:
df['classification_name'] = df['classification'].apply(lambda x: x.get('name'))

In [104]:
#create coordenate list
listarray = []
for point in caliblist:
    listarray.append([point.x, point.y])
calib_np_array = numpy.array(listarray)

In [105]:
#now that calibration points are saved, remove them from the dataframe
df = df[df['annotation_name'].isin(list_of_calibpoint_names) == False]

In [106]:
#check for MultiPolygon objects
if 'MultiPolygon' in df.geometry.geom_type.value_counts().keys():
    print('MultiPolygon objects present:  \n')
    #print out the classification name of the MultiPolygon objects
    # st.table(df[df.geometry.geom_type == 'MultiPolygon'][['annotation_name','classification_name']])
    print('these are not supported, please convert them to polygons in Qupath  \n',
    'the script will continue but these objects will be ignored')
    #remove MultiPolygon objects
    df = df[df.geometry.geom_type != 'MultiPolygon']

In [107]:
# reformat shape coordenate list
df['coords'] = numpy.nan
df.head()

Unnamed: 0,id,objectType,classification,name,geometry,annotation_name,classification_name,coords
0,94c467c3-984d-4ddd-b700-77d688ec58c0,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"LINESTRING (960.02000 1586.64000, 969.88000 15...",,red,
1,7542e7d2-0457-4205-bcd4-df9d980f9af7,annotation,"{'name': 'green', 'color': [143, 118, 196]}",,"LINESTRING (755.88000 1496.45000, 777.44000 14...",,green,
5,5ea8f07c-7a1d-440d-bcc3-f97a48c400d1,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"POLYGON ((843.58000 1763.50000, 843.01000 1763...",,red,
6,8388bf9d-f377-49b7-b800-2548540108df,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"POLYGON ((753.86000 1647.00000, 742.86000 1647...",,red,


In [108]:
df['coords'] = df['coords'].astype('object')
df.head()

Unnamed: 0,id,objectType,classification,name,geometry,annotation_name,classification_name,coords
0,94c467c3-984d-4ddd-b700-77d688ec58c0,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"LINESTRING (960.02000 1586.64000, 969.88000 15...",,red,
1,7542e7d2-0457-4205-bcd4-df9d980f9af7,annotation,"{'name': 'green', 'color': [143, 118, 196]}",,"LINESTRING (755.88000 1496.45000, 777.44000 14...",,green,
5,5ea8f07c-7a1d-440d-bcc3-f97a48c400d1,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"POLYGON ((843.58000 1763.50000, 843.01000 1763...",,red,
6,8388bf9d-f377-49b7-b800-2548540108df,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"POLYGON ((753.86000 1647.00000, 742.86000 1647...",,red,


In [109]:
df['simple'] = df.geometry.simplify(1)
df.head()

Unnamed: 0,id,objectType,classification,name,geometry,annotation_name,classification_name,coords,simple
0,94c467c3-984d-4ddd-b700-77d688ec58c0,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"LINESTRING (960.02000 1586.64000, 969.88000 15...",,red,,"LINESTRING (960.02000 1586.64000, 969.88000 15..."
1,7542e7d2-0457-4205-bcd4-df9d980f9af7,annotation,"{'name': 'green', 'color': [143, 118, 196]}",,"LINESTRING (755.88000 1496.45000, 777.44000 14...",,green,,"LINESTRING (755.88000 1496.45000, 777.44000 14..."
5,5ea8f07c-7a1d-440d-bcc3-f97a48c400d1,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"POLYGON ((843.58000 1763.50000, 843.01000 1763...",,red,,"POLYGON ((843.58000 1763.50000, 836.32000 1764..."
6,8388bf9d-f377-49b7-b800-2548540108df,annotation,"{'name': 'red', 'color': [126, 176, 151]}",,"POLYGON ((753.86000 1647.00000, 742.86000 1647...",,red,,"POLYGON ((753.86000 1647.00000, 741.65000 1647..."


In [98]:
def extract_coordinates(geometry):
    if geometry.geom_type == 'Polygon':
        return [list(coord) for coord in geometry.exterior.coords]
    elif geometry.geom_type == 'LineString':
        return [list(coord) for coord in geometry.coords]
    else:
        return None 

In [110]:
# Apply the function to create a new column with the coordinates
df['coords'] = df['geometry'].apply(extract_coordinates)

In [121]:
df.geometry.geom_type.value_counts()

LineString    2
Polygon       2
Name: count, dtype: int64

In [123]:
geometry_counts = df.geometry.geom_type.value_counts()
log_message = ", ".join(f"{count} {geom_type}s" for geom_type, count in geometry_counts.items())
logger.info(f"Geometries in DataFrame: {log_message}")

[32m11:37:46.26[0m | [1mINFO[0m | Geometries in DataFrame: 2 LineStrings, 2 Polygons


In [122]:
geometry_counts = df.geometry.geom_type.value_counts()
log_message = "Geometries in DataFrame: " + ", ".join(f"{count} {geom_type}s" for geom_type, count in geometry_counts.items())
print(log_message)

Geometries in DataFrame: 2 LineStrings, 2 Polygons


# Full file step by step

In [125]:
#full file test
df_full = geopandas.read_file(path_to_file)

In [132]:
load_and_QC_geojson_file(path_to_file, calibration_points)

[32m11:45:56.68[0m | [34m[1mDEBUG[0m | Geojson file loaded with shape (97, 5)


[32m11:45:56.68[0m | [1mINFO[0m | Geometries in DataFrame: 85 Polygons, 9 LineStrings, 3 Points
[32m11:45:56.68[0m | [34m[1mDEBUG[0m | Point geometries have been removed
[32m11:45:56.69[0m | [34m[1mDEBUG[0m | Checking for MultiPolygon objects
[32m11:45:56.69[0m | [34m[1mDEBUG[0m | Extracting coordinates from geometries
[32m11:45:56.69[0m | [32m[1mSUCCESS[0m | QC of geojson file complete


In [115]:
df_full.shape

(97, 5)

In [117]:
df_full.geometry.geom_type.value_counts()

Polygon       85
LineString     9
Point          3
Name: count, dtype: int64

In [19]:
df_full.name.isna().sum()

94

In [21]:
df_full[~df_full['name'].isna()]

Unnamed: 0,id,objectType,classification,name,geometry
85,2c3b07d2-24cb-4988-ba52-910d7ce3d40e,annotation,,calib1,POINT (1064.93000 410.66000)
86,087adbfe-77eb-4ecc-867f-d5d6bc8077f3,annotation,,calib2,POINT (485.30000 1113.32000)
87,b920e6f9-e28d-47d8-b43e-6e3367c63c5a,annotation,,calib3,POINT (988.33000 487.52000)


### process dataframe

In [5]:
def load_and_QC_geojson_file(geojson_path: str, list_of_calibpoint_names: list = ['calib1','calib2','calib3']):

   #load geojson file
   df = geopandas.read_file(geojson_path)

   #save calib points in a list
   caliblist = []
   for point_name in list_of_calibpoint_names:
      if point_name in df['name'].unique():
            caliblist.append(df.loc[df['name'] == point_name, 'geometry'].values[0])
      # else:
      #       st.write('Your given name is not present in the file', 
      #       f'These are the calib points you passed: {list_of_calibpoint_names}',
      #       f"These are the calib points found in the geojson you gave me: {df['name'].unique()}")
   #create coordenate list
   listarray = []
   for point in caliblist:
      listarray.append([point.x, point.y])
   calib_np_array = numpy.array(listarray)

   #now that calibration points are saved, remove them from the dataframe
   df = df[df['name'].isin(list_of_calibpoint_names) == False]

   #check and remove empty classifications 
   if df['classification'].isna().sum() !=0 :
      # st.write(f"you have {df['classification'].isna().sum()} NaNs in your classification column",
      #       "these are unclassified objects from Qupath, they will be ignored") 
      df = df[df['classification'].notna()]

   #check for MultiPolygon objects
   if 'MultiPolygon' in df.geometry.geom_type.value_counts().keys():
      # st.write('MultiPolygon objects present:',
      # #print out the classification name of the MultiPolygon objects
      # f"{df[df.geometry.geom_type == 'MultiPolygon']['classification']}", 
      # 'these are not supported, please convert them to polygons in Qupath',
      # 'the script will continue but these objects will be ignored')
      #remove MultiPolygon objects
      df = df[df.geometry.geom_type != 'MultiPolygon']

   # reformat shape coordenate list
   df['coords'] = numpy.nan
   df['coords'] = df['coords'].astype('object')
   # simplify to reduce number of points
   df['simple'] = df.geometry.simplify(1)
   # df['coords'] = df['simple'].apply(lambda geom: numpy.array(list(geom.exterior.coords)))
   df['coords'] = df['simple'].apply(lambda geom: numpy.array(geom.exterior.coords))
   # for i in df.index:
   #    geom=df.at[i, 'simple']
   #    tmp = list(geom.exterior.coords)
   #    tmp_lol = [list(i) for i in tmp]
   #    df.at[i,'coords'] = tmp_lol

   #extract classification name into a new column
   df['Name'] = numpy.nan
   for i in df.index:
      tmp = df.classification[i].get('name')
      df.at[i,'Name'] = tmp

   # st.write('The file loading is complete')

   #save dataframe as csv
   df.to_csv(f"./{datetime}_QCed_geojson.csv", index=False)
   #save numpy array as csv
   numpy.savetxt(f"./{datetime}_calib_points.csv", calib_np_array, delimiter=",")

In [6]:
def load_and_QC_SamplesandWells(samples_and_wells_input, df_csv):

   df = pandas.read_csv(df_csv)

   # parse common human copy paste formats
   # remove newlines
   samples_and_wells_processed = samples_and_wells_input.replace("\n", "")
   # remove spaces
   samples_and_wells_processed = samples_and_wells_processed.replace(" ", "")
   #parse into python dictionary
   samples_and_wells = ast.literal_eval(samples_and_wells_processed)

   #create list of acceptable wells, default is using a space in between columns
   list_of_acceptable_wells =[]
   for row in list(string.ascii_uppercase[2:14]):
      for column in range(2,22):
         list_of_acceptable_wells.append(str(row) + str(column))

   #check for improper wells
   # for well in samples_and_wells.values():
   #    if well not in list_of_acceptable_wells:
            # st.write(f'Your well {well} is not in the list of acceptable wells, please correct it',
            # 'the LMD is not able to collect into this well, the script will stop here')
            # st.stop()

   #check that names in df are all present in the samples and wells
   # for name in df.Name.unique():
      # if name not in samples_and_wells.keys():
            # st.write(f'Your name {name} is not in the list of samples_and_wells, please correct either',
            # 'please change the class name in Qupath or add it to the samples_and_wells dictionary',
            # 'and then rerun the web app')
            # st.stop()

   # st.write('The samples and wells scheme QC is done!')

In [5]:
calibration_point_1 = "calib12"
calibration_point_2 = "calib13"
calibration_point_3 = "calib20"
list_of_calibpoint_names = [calibration_point_1, calibration_point_2, calibration_point_3]

In [34]:
def create_collection(df_csv, calib_np_array_csv, samples_and_wells_input ):
    df = pandas.read_csv(df_csv)
    calib_np_array = numpy.loadtxt(calib_np_array_csv, delimiter=",")

    # samples_and_wells_processed = samples_and_wells_input.replace("\n", "")
    # samples_and_wells_processed = samples_and_wells_processed.replace(" ", "")
    # samples_and_wells = ast.literal_eval(samples_and_wells_processed)
    samples_and_wells = samples_and_wells_input

    #create the collection of py-lmd-env package
    #uses caliblist passed on the function, order matters
    #orientation vector is for QuPath coordenate system
    the_collection = Collection(calibration_points = calib_np_array)
    the_collection.orientation_transform = numpy.array([[1,0 ], [0,-1]])
    for i in df.index:
        the_collection.new_shape(df.at[i,'coords'], well = samples_and_wells[df.at[i, "Name"]])

    the_collection.plot(save_name= "./TheCollection.png")
    # st.image("./TheCollection.png", caption='Your Contours', use_column_width=True)
    # st.write(the_collection.stats())
    the_collection.save(f"./{datetime}_LMD_ready_contours.xml")


    #create and export dataframe with sample placement in 384 well plate
    rows_A_P= [i for i in string.ascii_uppercase[:16]]
    columns_1_24 = [str(i) for i in range(1,25)]
    df_wp384 = pd.DataFrame('',columns=columns_1_24, index=rows_A_P)
    #fill in the dataframe with samples and wells
    for i in samples_and_wells:
        location = samples_and_wells[i]
        df_wp384.at[location[0],location[1:]] = i
    #save dataframe as csv
    df_wp384.to_csv(f"./{datetime}_384_wellplate.csv", index=True)

In [7]:
load_and_QC_geojson_file(geojson_path='/Users/jnimoca/Jose_BI/Repositories/Qupath_to_LMD_v2/example_input/test_input.geojson', list_of_calibpoint_names=list_of_calibpoint_names)

NameError: name 'load_and_QC_geojson_file' is not defined

In [42]:
calib_np_array = numpy.loadtxt(f"./{datetime}_calib_points.csv", delimiter=",")

In [43]:
calib_np_array.shape

(3, 2)

In [10]:
df = pandas.read_csv("/Users/jnimoca/Jose_BI/Repositories/Qupath_to_LMD_v2/20231204_QCed_geojson.csv")

In [11]:
df

Unnamed: 0,id,objectType,classification,name,geometry,coords,simple,Name
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925 12328, 7924 12329, 7919 12329, ...",[[ 7931. 12328.]\n [ 7919. 12329.]\n [ 7913. 1...,"POLYGON ((7931 12328, 7919 12329, 7913 12333, ...",187_HBC_mid_1
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011 12393, 8010 12394, 8008 12394, ...",[[ 8011. 12393.]\n [ 8003. 12398.]\n [ 8001. 1...,"POLYGON ((8011 12393, 8003 12398, 8001 12402, ...",187_HBC_nVEC_2
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047 12321, 8046 12322, 8044 12322, ...",[[ 8047. 12321.]\n [ 8040. 12324.]\n [ 8032. 1...,"POLYGON ((8047 12321, 8040 12324, 8032 12331, ...",187_HBC_mid_1
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084 12325, 8083 12326, 8082 12326, ...",[[ 8084. 12325.]\n [ 8074. 12329.]\n [ 8070. 1...,"POLYGON ((8084 12325, 8074 12329, 8070 12333, ...",187_HBC_mid_1
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917 12268, 7916 12269, 7914 12269, ...",[[ 7917. 12268.]\n [ 7913. 12270.]\n [ 7902. 1...,"POLYGON ((7917 12268, 7913 12270, 7902 12271, ...",187_HBC_mid_2
...,...,...,...,...,...,...,...,...
2105,8f21e230-2922-4738-be72-2291e3c4b53a,annotation,"{'name': '187_STB_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((9088 7678, 9082 7681, 9081.66 7681.6...",[[9088. 7678.]\n [9079. 7684.]\n [9075. 7692.]...,"POLYGON ((9088 7678, 9079 7684, 9075 7692, 907...",187_STB_nVEC_2
2106,d91a7a6f-2d60-43c7-bda1-52f224847ed3,annotation,"{'name': '187_HBC_mid_3', 'color': [0, 128, 128]}",,"POLYGON ((8393 8306, 8389 8308, 8388 8309, 838...",[[8393. 8306.]\n [8386. 8311.]\n [8383. 8318.]...,"POLYGON ((8393 8306, 8386 8311, 8383 8318, 838...",187_HBC_mid_3
2107,82d18f39-add1-4c1e-b8ce-571de4c4cad0,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((8238 8447, 8236 8448, 8235 8448, 823...",[[8250. 8448.]\n [8235. 8448.]\n [8229. 8454.]...,"POLYGON ((8250 8448, 8235 8448, 8229 8454, 822...",187_HBC_mid_2
2108,7675f39f-58ce-41ea-8078-c58217d634ff,annotation,"{'name': '187_HBC_nTroph_2', 'color': [0, 128,...",,"POLYGON ((9173 10250, 9172 10251, 9170 10252, ...",[[ 9173. 10250.]\n [ 9168. 10255.]\n [ 9168. 1...,"POLYGON ((9173 10250, 9168 10255, 9168 10263, ...",187_HBC_nTroph_2


In [13]:
def replace_coords_simple(df):
    df['lol_simple'] = np.nan
    df['lol_simple'] = df['lol_simple'].astype('object')
    df['simple'] = df.geometry.simplify(1)
    
    for i in df.index:
        #get geometry object for row i
        geom=df.at[i, 'simple']
        #list the coordenate points as tuples
        tmp = list(geom.exterior.coords)
        #transform list of tuples to list of lists
        tmp_lol = [list(i) for i in tmp]
        #add list of list to lol column
        df.at[i,'lol_simple'] = tmp_lol
    return(df)

In [12]:
def replace_coords(df):
    df['lol'] = np.nan
    df['lol'] = df['lol'].astype('object')
    
    for i in df.index:
        #get geometry object for row i
        geom=df.at[i, 'geometry']
        #list the coordenate points as tuples
        tmp = list(geom.exterior.coords)
        #transform list of tuples to list of lists
        tmp_lol = [list(i) for i in tmp]
        #add list of list to lol column
        df.at[i,'lol'] = tmp_lol
    return(df)

In [16]:
import numpy as np

In [25]:
df = geopandas.read_file("/Users/jnimoca/Jose_BI/Repositories/Qupath_to_LMD_v2/example_input/test_input.geojson")

In [26]:
df

Unnamed: 0,id,objectType,classification,name,geometry
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925.00000 12328.00000, 7924.00000 1..."
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011.00000 12393.00000, 8010.00000 1..."
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047.00000 12321.00000, 8046.00000 1..."
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084.00000 12325.00000, 8083.00000 1..."
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917.00000 12268.00000, 7916.00000 1..."
...,...,...,...,...,...
2111,7675f39f-58ce-41ea-8078-c58217d634ff,annotation,"{'name': '187_HBC_nTroph_2', 'color': [0, 128,...",,"POLYGON ((9173.00000 10250.00000, 9172.00000 1..."
2112,c2f88c50-3fa6-4e43-84d0-df609336908c,annotation,"{'name': '187_CTB_3', 'color': [0, 128, 128]}",,"POLYGON ((9153.00000 10289.00000, 9152.00000 1..."
2113,b8daa84d-2e25-40c2-b3fd-bbbdc059bb5f,annotation,,calib12,POINT (11343.79000 6900.13000)
2114,6feb2a83-5054-4c9b-876e-98f354012ae3,annotation,,calib13,POINT (9635.17000 14764.84000)


In [27]:
#save calib points in a list
caliblist = []
for point_name in list_of_calibpoint_names:
    if point_name in df['name'].unique():
        caliblist.append(df.loc[df['name'] == point_name, 'geometry'].values[0])
    else:
        st.write('Your given name is not present in the file', 
        f'These are the calib points you passed: {list_of_calibpoint_names}',
        f"These are the calib points found in the geojson you gave me: {df['name'].unique()}")
#create coordenate list
listarray = []
for point in caliblist:
    listarray.append([point.x, point.y])
calib_np_array = numpy.array(listarray)

In [28]:
calib_np_array

array([[11343.79,  6900.13],
       [ 9635.17, 14764.84],
       [  642.12, 14103.67]])

In [29]:
df = df[df['name'].isin(list_of_calibpoint_names) == False]

In [30]:
df.geometry

0       POLYGON ((7925.00000 12328.00000, 7924.00000 1...
1       POLYGON ((8011.00000 12393.00000, 8010.00000 1...
2       POLYGON ((8047.00000 12321.00000, 8046.00000 1...
3       POLYGON ((8084.00000 12325.00000, 8083.00000 1...
4       POLYGON ((7917.00000 12268.00000, 7916.00000 1...
                              ...                        
2108    POLYGON ((9088.00000 7678.00000, 9082.00000 76...
2109    POLYGON ((8393.00000 8306.00000, 8389.00000 83...
2110    POLYGON ((8238.00000 8447.00000, 8236.00000 84...
2111    POLYGON ((9173.00000 10250.00000, 9172.00000 1...
2112    POLYGON ((9153.00000 10289.00000, 9152.00000 1...
Name: geometry, Length: 2113, dtype: geometry

In [33]:
#check and remove empty classifications 
if df['classification'].isna().sum() !=0 :
    # st.write(f"you have {df['classification'].isna().sum()} NaNs in your classification column",
    #     "these are unclassified objects from Qupath, they will be ignored") 
    df = df[df['classification'].notna()]

In [36]:
df

Unnamed: 0,id,objectType,classification,name,geometry
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925.00000 12328.00000, 7924.00000 1..."
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011.00000 12393.00000, 8010.00000 1..."
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047.00000 12321.00000, 8046.00000 1..."
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084.00000 12325.00000, 8083.00000 1..."
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917.00000 12268.00000, 7916.00000 1..."
...,...,...,...,...,...
2108,8f21e230-2922-4738-be72-2291e3c4b53a,annotation,"{'name': '187_STB_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((9088.00000 7678.00000, 9082.00000 76..."
2109,d91a7a6f-2d60-43c7-bda1-52f224847ed3,annotation,"{'name': '187_HBC_mid_3', 'color': [0, 128, 128]}",,"POLYGON ((8393.00000 8306.00000, 8389.00000 83..."
2110,82d18f39-add1-4c1e-b8ce-571de4c4cad0,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((8238.00000 8447.00000, 8236.00000 84..."
2111,7675f39f-58ce-41ea-8078-c58217d634ff,annotation,"{'name': '187_HBC_nTroph_2', 'color': [0, 128,...",,"POLYGON ((9173.00000 10250.00000, 9172.00000 1..."


In [37]:
#check for MultiPolygon objects
if 'MultiPolygon' in df.geometry.geom_type.value_counts().keys():
    st.write('MultiPolygon objects present:',
    #print out the classification name of the MultiPolygon objects
    f"{df[df.geometry.geom_type == 'MultiPolygon']['classification']}", 
    'these are not supported, please convert them to polygons in Qupath',
    'the script will continue but these objects will be ignored')
    #remove MultiPolygon objects
    df = df[df.geometry.geom_type != 'MultiPolygon']

In [39]:
df

Unnamed: 0,id,objectType,classification,name,geometry
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925.00000 12328.00000, 7924.00000 1..."
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011.00000 12393.00000, 8010.00000 1..."
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047.00000 12321.00000, 8046.00000 1..."
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084.00000 12325.00000, 8083.00000 1..."
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917.00000 12268.00000, 7916.00000 1..."
...,...,...,...,...,...
2108,8f21e230-2922-4738-be72-2291e3c4b53a,annotation,"{'name': '187_STB_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((9088.00000 7678.00000, 9082.00000 76..."
2109,d91a7a6f-2d60-43c7-bda1-52f224847ed3,annotation,"{'name': '187_HBC_mid_3', 'color': [0, 128, 128]}",,"POLYGON ((8393.00000 8306.00000, 8389.00000 83..."
2110,82d18f39-add1-4c1e-b8ce-571de4c4cad0,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((8238.00000 8447.00000, 8236.00000 84..."
2111,7675f39f-58ce-41ea-8078-c58217d634ff,annotation,"{'name': '187_HBC_nTroph_2', 'color': [0, 128,...",,"POLYGON ((9173.00000 10250.00000, 9172.00000 1..."


In [40]:
# reformat shape coordenate list
df['coords'] = numpy.nan
df['coords'] = df['coords'].astype('object')

In [41]:
df

Unnamed: 0,id,objectType,classification,name,geometry,coords
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925.00000 12328.00000, 7924.00000 1...",
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011.00000 12393.00000, 8010.00000 1...",
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047.00000 12321.00000, 8046.00000 1...",
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084.00000 12325.00000, 8083.00000 1...",
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917.00000 12268.00000, 7916.00000 1...",
...,...,...,...,...,...,...
2108,8f21e230-2922-4738-be72-2291e3c4b53a,annotation,"{'name': '187_STB_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((9088.00000 7678.00000, 9082.00000 76...",
2109,d91a7a6f-2d60-43c7-bda1-52f224847ed3,annotation,"{'name': '187_HBC_mid_3', 'color': [0, 128, 128]}",,"POLYGON ((8393.00000 8306.00000, 8389.00000 83...",
2110,82d18f39-add1-4c1e-b8ce-571de4c4cad0,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((8238.00000 8447.00000, 8236.00000 84...",
2111,7675f39f-58ce-41ea-8078-c58217d634ff,annotation,"{'name': '187_HBC_nTroph_2', 'color': [0, 128,...",,"POLYGON ((9173.00000 10250.00000, 9172.00000 1...",


In [42]:
df['simple'] = df.geometry.simplify(1)

In [43]:
df

Unnamed: 0,id,objectType,classification,name,geometry,coords,simple
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925.00000 12328.00000, 7924.00000 1...",,"POLYGON ((7931.00000 12328.00000, 7919.00000 1..."
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011.00000 12393.00000, 8010.00000 1...",,"POLYGON ((8011.00000 12393.00000, 8003.00000 1..."
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047.00000 12321.00000, 8046.00000 1...",,"POLYGON ((8047.00000 12321.00000, 8040.00000 1..."
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084.00000 12325.00000, 8083.00000 1...",,"POLYGON ((8084.00000 12325.00000, 8074.00000 1..."
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917.00000 12268.00000, 7916.00000 1...",,"POLYGON ((7917.00000 12268.00000, 7913.00000 1..."
...,...,...,...,...,...,...,...
2108,8f21e230-2922-4738-be72-2291e3c4b53a,annotation,"{'name': '187_STB_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((9088.00000 7678.00000, 9082.00000 76...",,"POLYGON ((9088.00000 7678.00000, 9079.00000 76..."
2109,d91a7a6f-2d60-43c7-bda1-52f224847ed3,annotation,"{'name': '187_HBC_mid_3', 'color': [0, 128, 128]}",,"POLYGON ((8393.00000 8306.00000, 8389.00000 83...",,"POLYGON ((8393.00000 8306.00000, 8386.00000 83..."
2110,82d18f39-add1-4c1e-b8ce-571de4c4cad0,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((8238.00000 8447.00000, 8236.00000 84...",,"POLYGON ((8250.00000 8448.00000, 8235.00000 84..."
2111,7675f39f-58ce-41ea-8078-c58217d634ff,annotation,"{'name': '187_HBC_nTroph_2', 'color': [0, 128,...",,"POLYGON ((9173.00000 10250.00000, 9172.00000 1...",,"POLYGON ((9173.00000 10250.00000, 9168.00000 1..."


In [44]:
df['coords'] = df['simple'].apply(lambda geom: numpy.array(list(geom.exterior.coords)))

In [46]:
df.head()

Unnamed: 0,id,objectType,classification,name,geometry,coords,simple
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925.00000 12328.00000, 7924.00000 1...","[[7931.0, 12328.0], [7919.0, 12329.0], [7913.0...","POLYGON ((7931.00000 12328.00000, 7919.00000 1..."
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011.00000 12393.00000, 8010.00000 1...","[[8011.0, 12393.0], [8003.0, 12398.0], [8001.0...","POLYGON ((8011.00000 12393.00000, 8003.00000 1..."
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047.00000 12321.00000, 8046.00000 1...","[[8047.0, 12321.0], [8040.0, 12324.0], [8032.0...","POLYGON ((8047.00000 12321.00000, 8040.00000 1..."
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084.00000 12325.00000, 8083.00000 1...","[[8084.0, 12325.0], [8074.0, 12329.0], [8070.0...","POLYGON ((8084.00000 12325.00000, 8074.00000 1..."
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917.00000 12268.00000, 7916.00000 1...","[[7917.0, 12268.0], [7913.0, 12270.0], [7902.0...","POLYGON ((7917.00000 12268.00000, 7913.00000 1..."


In [47]:
df['Name'] = numpy.nan
for i in df.index:
    tmp = df.classification[i].get('name')
    df.at[i,'Name'] = tmp

  df.at[i,'Name'] = tmp


In [49]:
df.head()

Unnamed: 0,id,objectType,classification,name,geometry,coords,simple,Name
0,38af0704-dec2-48e9-99cc-d6262ec92722,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((7925.00000 12328.00000, 7924.00000 1...","[[7931.0, 12328.0], [7919.0, 12329.0], [7913.0...","POLYGON ((7931.00000 12328.00000, 7919.00000 1...",187_HBC_mid_1
1,8ac78eff-262b-4942-9580-2badf20e22b5,annotation,"{'name': '187_HBC_nVEC_2', 'color': [0, 128, 1...",,"POLYGON ((8011.00000 12393.00000, 8010.00000 1...","[[8011.0, 12393.0], [8003.0, 12398.0], [8001.0...","POLYGON ((8011.00000 12393.00000, 8003.00000 1...",187_HBC_nVEC_2
2,4586ad21-65e0-48e9-b882-328bdc081c44,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8047.00000 12321.00000, 8046.00000 1...","[[8047.0, 12321.0], [8040.0, 12324.0], [8032.0...","POLYGON ((8047.00000 12321.00000, 8040.00000 1...",187_HBC_mid_1
3,33822c54-ae0a-4ad4-8ad9-d0eea9ad91c6,annotation,"{'name': '187_HBC_mid_1', 'color': [0, 128, 128]}",,"POLYGON ((8084.00000 12325.00000, 8083.00000 1...","[[8084.0, 12325.0], [8074.0, 12329.0], [8070.0...","POLYGON ((8084.00000 12325.00000, 8074.00000 1...",187_HBC_mid_1
4,64dc48fa-7583-4300-b1d3-a9c35c3ea010,annotation,"{'name': '187_HBC_mid_2', 'color': [0, 128, 128]}",,"POLYGON ((7917.00000 12268.00000, 7916.00000 1...","[[7917.0, 12268.0], [7913.0, 12270.0], [7902.0...","POLYGON ((7917.00000 12268.00000, 7913.00000 1...",187_HBC_mid_2


In [50]:
#save dataframe as csv
df.to_csv(f"./QCed_geojson.csv", index=False)

In [51]:
df = pandas.read_csv("./QCed_geojson.csv")

In [71]:
df = geopandas.read_file("inputs_that_dont_work/MultiPolygon.geojson")

In [72]:
df

Unnamed: 0,id,objectType,name,classification,geometry
0,c1bb1604-39e8-49fe-8f09-aaed31bb6a79,annotation,Spreading_CC_13.30,"{'name': 'C18', 'color': [161, 115, 81]}","POLYGON ((17815.00000 11775.00000, 17747.00000..."
1,3b8e1d73-a409-45f9-b328-5200042ef921,annotation,Calib1,,POINT (22355.58000 3377.24000)
2,cec8fe9d-2bd1-480e-8b99-b54e9f8279db,annotation,Tumorbed_bottomLeft_3.1,"{'name': 'I8', 'color': [239, 240, 121]}","POLYGON ((17512.00000 9776.00000, 17361.00000 ..."
3,90714e2e-2758-4366-97bd-8c6706b52ea9,annotation,Spreading_CC_6.1,"{'name': 'D10', 'color': [134, 181, 75]}","POLYGON ((21906.00000 11603.00000, 21802.00000..."
4,812d5b88-e88a-4b1e-90e1-eb079dcc67a4,annotation,Tumorbed_TopRight_1.2,"{'name': 'H18', 'color': [222, 219, 71]}","POLYGON ((21276.00000 6040.00000, 21127.00000 ..."
...,...,...,...,...,...
149,7764f722-fb51-4e64-a600-c63cce6d8ae7,annotation,Tumorbed_TopRight_2.2,"{'name': 'H20', 'color': [227, 43, 198]}","POLYGON ((21509.00000 4746.00000, 21415.00000 ..."
150,ba7dffb6-deb5-4482-93e3-5717eda6048b,annotation,Spreading_CC_13.44,"{'name': 'C18', 'color': [161, 115, 81]}","POLYGON ((17542.00000 12975.00000, 17501.00000..."
151,a7f02a86-2ab6-454d-a74b-74ee0554ee6d,annotation,StromaTumorBed_1.3,"{'name': 'G2', 'color': [235, 132, 89]}","POLYGON ((18229.00000 5411.00000, 18209.00000 ..."
152,21a0c0b1-d5bc-438b-9ec3-4c9bf20d77a9,annotation,Desmo_OutTumor_3.1,"{'name': 'B12', 'color': [20, 5, 135]}","POLYGON ((23143.00000 11369.00000, 22950.00000..."


In [60]:
df = df[df['classification'].notna()]

In [61]:
df['Name'] = numpy.nan
for i in df.index:
    tmp = df.classification[i].get('name')
    df.at[i,'Name'] = tmp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
  df.at[i,'Name'] = tmp


In [62]:
x = df[df.geometry.geom_type == 'MultiPolygon']['Name']

In [67]:
# df[df.geometry.geom_type == 'MultiPolygon']
# x = df[['name','classification']]

df['classification_name'] = df['classification'].apply(lambda x: x.get('name'))
df['annotation_name'] = df['name']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [69]:
df

Unnamed: 0,id,objectType,name,classification,geometry,Name,classification_name,annotation_name
0,c1bb1604-39e8-49fe-8f09-aaed31bb6a79,annotation,Spreading_CC_13.30,"{'name': 'C18', 'color': [161, 115, 81]}","POLYGON ((17815.00000 11775.00000, 17747.00000...",C18,C18,Spreading_CC_13.30
2,cec8fe9d-2bd1-480e-8b99-b54e9f8279db,annotation,Tumorbed_bottomLeft_3.1,"{'name': 'I8', 'color': [239, 240, 121]}","POLYGON ((17512.00000 9776.00000, 17361.00000 ...",I8,I8,Tumorbed_bottomLeft_3.1
3,90714e2e-2758-4366-97bd-8c6706b52ea9,annotation,Spreading_CC_6.1,"{'name': 'D10', 'color': [134, 181, 75]}","POLYGON ((21906.00000 11603.00000, 21802.00000...",D10,D10,Spreading_CC_6.1
4,812d5b88-e88a-4b1e-90e1-eb079dcc67a4,annotation,Tumorbed_TopRight_1.2,"{'name': 'H18', 'color': [222, 219, 71]}","POLYGON ((21276.00000 6040.00000, 21127.00000 ...",H18,H18,Tumorbed_TopRight_1.2
5,4d9ed5d8-77d6-4415-94ed-3ce5235e5934,annotation,Spreading_CC_13.37,"{'name': 'C18', 'color': [161, 115, 81]}","POLYGON ((17929.00000 12155.00000, 17921.00000...",C18,C18,Spreading_CC_13.37
...,...,...,...,...,...,...,...,...
149,7764f722-fb51-4e64-a600-c63cce6d8ae7,annotation,Tumorbed_TopRight_2.2,"{'name': 'H20', 'color': [227, 43, 198]}","POLYGON ((21509.00000 4746.00000, 21415.00000 ...",H20,H20,Tumorbed_TopRight_2.2
150,ba7dffb6-deb5-4482-93e3-5717eda6048b,annotation,Spreading_CC_13.44,"{'name': 'C18', 'color': [161, 115, 81]}","POLYGON ((17542.00000 12975.00000, 17501.00000...",C18,C18,Spreading_CC_13.44
151,a7f02a86-2ab6-454d-a74b-74ee0554ee6d,annotation,StromaTumorBed_1.3,"{'name': 'G2', 'color': [235, 132, 89]}","POLYGON ((18229.00000 5411.00000, 18209.00000 ...",G2,G2,StromaTumorBed_1.3
152,21a0c0b1-d5bc-438b-9ec3-4c9bf20d77a9,annotation,Desmo_OutTumor_3.1,"{'name': 'B12', 'color': [20, 5, 135]}","POLYGON ((23143.00000 11369.00000, 22950.00000...",B12,B12,Desmo_OutTumor_3.1


In [73]:
df[df.geometry.geom_type == 'Point']['name']

1      Calib1
76     Calib2
133    Calib3
Name: name, dtype: object

In [63]:
name

13    C20
Name: Name, dtype: object

In [None]:
df['lol'] = np.nan
    df['lol'] = df['lol'].astype('object')
    
    for i in df.index:
        #get geometry object for row i
        geom=df.at[i, 'geometry']
        #list the coordenate points as tuples
        tmp = list(geom.exterior.coords)
        #transform list of tuples to list of lists
        tmp_lol = [list(i) for i in tmp]
        #add list of list to lol column
        df.at[i,'lol'] = tmp_lol

In [75]:
df.simple[0]

'POLYGON ((7931 12328, 7919 12329, 7913 12333, 7912 12344, 7914 12348, 7920 12353, 7924 12355, 7929 12355, 7940 12348, 7942 12344, 7942 12338, 7935 12330, 7931 12328))'

In [77]:
x, y = df.simple[0].exterior.xy

AttributeError: 'str' object has no attribute 'exterior'

In [79]:
df.geometry[0]

'POLYGON ((7925 12328, 7924 12329, 7919 12329, 7918 12330, 7917 12330, 7916 12331, 7915 12331, 7915 12332, 7914 12332, 7914 12333, 7913 12333, 7913 12334, 7912 12335, 7912 12336, 7912 12337, 7912 12338, 7912 12339, 7912 12340, 7912 12341, 7912 12342, 7912 12343, 7912 12344, 7913 12345, 7913 12346, 7914 12347, 7914 12348, 7915 12348, 7915 12349, 7916 12349, 7916 12350, 7917 12350, 7918 12351, 7919 12351, 7919 12352, 7920 12352, 7920 12353, 7921 12353, 7922 12354, 7923 12354, 7924 12355, 7929 12355, 7930 12354, 7931 12354, 7932 12353, 7933 12353, 7934 12352, 7935 12352, 7936 12351, 7937 12351, 7937 12350, 7938 12350, 7938 12349, 7939 12349, 7939 12348, 7940 12348, 7940 12347, 7941 12346, 7941 12345, 7942 12344, 7942 12343, 7942 12342, 7942 12341, 7942 12340, 7942 12339, 7942 12338, 7941 12337, 7941 12336, 7940 12336, 7940 12335, 7939 12335, 7939 12334, 7938 12334, 7938 12333, 7937 12333, 7937 12332, 7936 12332, 7936 12331, 7935 12331, 7935 12330, 7934 12330, 7933 12329, 7932 12329, 7931 

In [87]:
df.simple[0]

'1'

In [76]:
df['simple'] = df['simple'].apply(shapely.geometry.shape)

AttributeError: 'str' object has no attribute 'get'

In [72]:
#coords should be a list of coordernates from the simple column a geojson object in the dataframe
df['coords'] = df['simple'].apply(lambda geom: numpy.array(geom.exterior.coords))

AttributeError: 'str' object has no attribute 'exterior'

In [70]:
df.coords[0]

'[[ 7931. 12328.]\n [ 7919. 12329.]\n [ 7913. 12333.]\n [ 7912. 12344.]\n [ 7914. 12348.]\n [ 7920. 12353.]\n [ 7924. 12355.]\n [ 7929. 12355.]\n [ 7940. 12348.]\n [ 7942. 12344.]\n [ 7942. 12338.]\n [ 7935. 12330.]\n [ 7931. 12328.]]'

In [None]:
for i in df.index:
    geom=df.at[i, 'simple']
    tmp = list(geom.exterior.coords)
    tmp_lol = [list(i) for i in tmp]
    df.at[i,'coords'] = tmp_lol

In [58]:
df.simple

0       POLYGON ((7931 12328, 7919 12329, 7913 12333, ...
1       POLYGON ((8011 12393, 8003 12398, 8001 12402, ...
2       POLYGON ((8047 12321, 8040 12324, 8032 12331, ...
3       POLYGON ((8084 12325, 8074 12329, 8070 12333, ...
4       POLYGON ((7917 12268, 7913 12270, 7902 12271, ...
                              ...                        
2105    POLYGON ((9088 7678, 9079 7684, 9075 7692, 907...
2106    POLYGON ((8393 8306, 8386 8311, 8383 8318, 838...
2107    POLYGON ((8250 8448, 8235 8448, 8229 8454, 822...
2108    POLYGON ((9173 10250, 9168 10255, 9168 10263, ...
2109    POLYGON ((9153 10289, 9148 10294, 9148 10303, ...
Name: simple, Length: 2110, dtype: object

In [59]:
geom = df.at[10, 'simple']

In [60]:
geom

'POLYGON ((8079 12046, 8073 12048, 8065 12056, 8063 12062, 8064 12072, 8071 12079, 8074 12080, 8093 12079, 8102 12072, 8104 12066, 8103 12061, 8090 12050, 8079 12046))'

In [63]:
geom.coords

AttributeError: 'str' object has no attribute 'coords'

In [61]:
tmp = list(geom.exterior.coords)

AttributeError: 'str' object has no attribute 'exterior'

In [37]:
x = df.at[0,'coords']

In [39]:
x.dtype

AttributeError: 'str' object has no attribute 'dtype'

In [21]:
create_collection(df_csv=f"./{datetime}_QCed_geojson.csv", calib_np_array_csv=f"./{datetime}_calib_points.csv", samples_and_wells_input=samples_and_wells)

ValueError: please provide a numpy array of shape (N, 2)

## Processing from GeoJSON to Dataframe

In [111]:
# 1 load and QC geojson file
def load_and_QC_geojson_file(geojson_path: str, list_of_calibpoint_names: list = ['calib1','calib2','calib3']):

    #load geojson file
    df = geopandas.read_file(path_to_file)

    #save calib points in a list
    caliblist = []
    for point_name in list_of_calibpoint_names:
        if point_name in df['name'].unique():
            caliblist.append(df.loc[df['name'] == point_name, 'geometry'].values[0])
        else:
            st.write('Your given name is not present in the file', 
            f'These are the calib points you passed: {list_of_calibpoint_names}',
            f"These are the calib points found in the geojson you gave me: {df['name'].unique()}")
    #create coordenate list
    listarray = []
    for point in pointlist:
        listarray.append([point.x, point.y])
    calib_np_array = np.array(listarray)

    #now that calibration points are saved, remove them from the dataframe
    df = df[df['name'].isin(list_of_calibpoint_names) == False]

    #check and remove empty classifications 
    if df['classification'].isna().sum() !=0 :
        st.write(f"you have {df['classification'].isna().sum()} NaNs in your classification column",
                "these are unclassified objects from Qupath, they will be ignored") 
        df = df[df['classification'].notna()]

    #check for MultiPolygon objects
    if 'MultiPolygon' in df.geometry.geom_type.value_counts().keys():
        st.write('MultiPolygon objects present:',
        #print out the classification name of the MultiPolygon objects
        f"{df[df.geometry.geom_type == 'MultiPolygon']['classification']}", 
        'these are not supported, please convert them to polygons in Qupath',
        'the script will continue but these objects will be ignored')
        #remove MultiPolygon objects
        df = df[df.geometry.geom_type != 'MultiPolygon']

    # reformat shape coordenate list
    df['coords'] = np.nan
    df['coords'] = df['coords'].astype('object')
    # simplify to reduce number of points
    df['simple'] = df.geometry.simplify(1)
    for i in df.index:
        geom=df.at[i, 'simple']
        tmp = list(geom.exterior.coords)
        tmp_lol = [list(i) for i in tmp]
        df.at[i,'coords'] = tmp_lol

    #extract classification name into a new column
    df['Name'] = np.nan
    for i in df.index:
        tmp = df.classification[i].get('name')
        df.at[i,'Name'] = tmp

    st.write('The file loading is complete')

    return(df,calib_np_array)

In [None]:
# 2 parse and check samples and wells
def load_and_QC_SamplesandWells(samples_and_wells_input, df):

    # parse common human copy paste formats
    # remove newlines
    samples_and_wells_processed = samples_and_wells_input.replace("\n", "")
    # remove spaces
    samples_and_wells_processed = samples_and_wells_processed.replace(" ", "")
    #parse into python dictionary
    samples_and_wells = ast.literal_eval(samples_and_wells_processed)

    #create list of acceptable wells, default is using a space in between columns
    list_of_acceptable_wells =[]
    for row in list(string.ascii_uppercase[2:14]):
        for column in range(2,22):
            list_of_acceptable_wells.append(str(row) + str(column))

    #check for improper wells
    for well in samples_and_wells.values():
        if well not in list_of_acceptable_wells:
            st.write(f'Your well {well} is not in the list of acceptable wells, please correct it',
            'the LMD is not able to collect into this well, the script will stop here')
            st.stop()

    #check that names in df are all present in the samples and wells
    for name in df.Name.unique():
        if name not in samples_and_wells.keys():
            st.write(f'Your name {name} is not in the list of samples_and_wells, please correct either',
            'please change the class name in Qupath or add it to the samples_and_wells dictionary',
            'and then rerun the web app')
            st.stop()
    
    return samples_and_wells

In [None]:
# 3 create collection
def create_collection(df = df, calib_np_array = calib_np_array, samples_and_wells = samples_and_wells ):
    #create the collection of py-lmd-env package
    #uses caliblist passed on the function, order matters
    #orientation vector is for QuPath coordenate system
    the_collection = Collection(calibration_points = calib_np_array)
    the_collection.orientation_transform = np.array([[1,0 ], [0,-1]])
    for i in df.index:
        the_collection.new_shape(df.at[i,'coords'], well = samples_and_wells[df.at[i, "Name"]])

    the_collection.plot(save_name= "./TheCollection.png")
    st.image("./TheCollection.png", caption='Your Contours', use_column_width=True)
    st.write(the_collection.stats())
    
    the_collection.save(f"./{datetime}_LMD_ready_contours.xml")
    st.download_button("Download file", Path(f"./{datetime}_LMD_ready_contours.xml").read_text(), f"./{datetime}_LMD_ready_contours.xml")
    
    #create and export dataframe with sample placement in 384 well plate
    rows_A_P= [i for i in string.ascii_uppercase[:16]]
    columns_1_24 = [str(i) for i in range(1,25)]
    df_wp384 = pd.DataFrame('',columns=columns_1_24, index=rows_A_P)
    #fill in the dataframe with samples and wells
    for i in samples_and_wells:
        location = samples_and_wells[i]
        df_wp384.at[location[0],location[1:]] = i

    if st.button('Download 384 well plate layout'):
        df_wp384.to_csv(f"./{datetime}_384_wellplate.csv", index=True)
        st.write('Your 384 well plate layout has been downloaded')
