# Hemocyte Recruitment Analysis - Preprocessing 
This notebook takes abdomen and hemocyte segmented features extracted by QuPath and consolidates features from all samples into dataframes for downstream analysis. Further, sample names are mapped and XY coordinates of hemocytes are scaled relative to the abdomen centroid. 

## Import required packages

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from os import path
from pathlib import Path
import glob
sns.set_style('white')

## Set up input/output directories and experiment info

In [8]:

base_path = Path("/Users/sbandya/Desktop/hemocyte_recruitment_image_quantification/")
sample_list = pd.read_csv(base_path.joinpath("sample_list.csv"),sep=",",header=0)
sample_mappings = pd.read_csv(base_path.joinpath("sample_mapping.tsv"),sep="\t",header=0)

# Paths to abdomen measurements and hemocyte detection measurements
am_path= base_path.joinpath('data','midsections','annotation_measurements')
dm_path= base_path.joinpath('data','midsections','detection_measurements')

outputs = base_path.joinpath('results','midsections')




## Create a dataframe of abdomen features

In [9]:

def get_sample_condition(imagename,sample_mapping):
    sampleid = imagename[:2]
    condition = sample_mapping[sample_mapping['Sample #'] == int(sampleid)]
    return condition['Condition'].iloc[0]

all_annotation_measurement_files = glob.glob(path.join(am_path , "*.txt"))
am_list = []
for amf in all_annotation_measurement_files:
    df = pd.read_csv(amf, index_col=None, header=0, sep="\t")
    am_list.append(df)


abdomen_coordinates = pd.concat(am_list, axis=0, ignore_index=True)

## annotate each fly abdomen with experimental group info 
abdomen_coordinates['condition'] = abdomen_coordinates['Image'].apply(get_sample_condition,sample_mapping=sample_list)
abdomen_coordinates['condition'] = np.where(abdomen_coordinates['condition'] == 'C', 'Fed', abdomen_coordinates['condition'])
abdomen_coordinates['condition'] = np.where(abdomen_coordinates['condition'] == 'S', 'Starved', abdomen_coordinates['condition'])

abdomen_coordinates.columns = abdomen_coordinates.columns.str.replace('Centroid X µm', 'Centroid_X')
abdomen_coordinates.columns = abdomen_coordinates.columns.str.replace('Centroid Y µm', 'Centroid_Y')
print(abdomen_coordinates.shape)
abdomen_coordinates[:5]


(40, 14)


Unnamed: 0,Image,Name,Class,Parent,ROI,Centroid_X,Centroid_Y,Num Detections,Num Positive,Positive %,Num Positive per mm^2,Area µm^2,Perimeter µm,condition
0,24E_F3_2_FITC.tif,PathAnnotationObject,,Image,Polygon,383.65,360.84,87,87,100,366.05,237669.9,1998.3,Fed
1,76B_1_FITC.tif,PathAnnotationObject,,Image,Polygon,404.4,297.69,115,115,100,723.23,159008.5,1629.7,Starved
2,84D_2_FITC.tif,PathAnnotationObject,,Image,Polygon,425.58,306.4,30,30,100,204.23,146890.4,1547.7,Starved
3,86D_3_FITC.tif,PathAnnotationObject,,Image,Polygon,431.44,318.57,87,87,100,553.93,157060.0,1624.2,Starved
4,86D_4_FITC.tif,PathAnnotationObject,,Image,Polygon,426.46,328.86,91,91,100,602.37,151070.0,1621.3,Starved


## Map Fly and Section IDs for each abdomen image

In [10]:
# update sample names in the image column to new names in the "fly_section" format 
def map_samples(imageid, sample_mapping_lookup):
    try:
        fly_section_id = sample_mapping_lookup[sample_mapping_lookup['old']==imageid]['new'].iloc[0]
        fly_id = sample_mapping_lookup[sample_mapping_lookup['old']==imageid]['new'].iloc[0].split("_")[0]
        section_id = sample_mapping_lookup[sample_mapping_lookup['old']==imageid]['new'].iloc[0].split("_")[1]
    except: 
        fly_section_id = "notfound"
        fly_id = "none"
        section_id = "none"
    return(pd.Series([fly_section_id,fly_id,section_id]))

abdomen_coordinates[['fly_section_ID','fly_ID','section_ID']] = abdomen_coordinates.apply(lambda x: map_samples(x.Image, sample_mapping_lookup=sample_mappings), axis=1) 

abdomen_coordinates

Unnamed: 0,Image,Name,Class,Parent,ROI,Centroid_X,Centroid_Y,Num Detections,Num Positive,Positive %,Num Positive per mm^2,Area µm^2,Perimeter µm,condition,fly_section_ID,fly_ID,section_ID
0,24E_F3_2_FITC.tif,PathAnnotationObject,,Image,Polygon,383.65,360.84,87,87,100,366.05,237669.9,1998.3,Fed,1_2,1,2
1,76B_1_FITC.tif,PathAnnotationObject,,Image,Polygon,404.4,297.69,115,115,100,723.23,159008.5,1629.7,Starved,76_7,76,7
2,84D_2_FITC.tif,PathAnnotationObject,,Image,Polygon,425.58,306.4,30,30,100,204.23,146890.4,1547.7,Starved,84_17,84,17
3,86D_3_FITC.tif,PathAnnotationObject,,Image,Polygon,431.44,318.57,87,87,100,553.93,157060.0,1624.2,Starved,86_18,86,18
4,86D_4_FITC.tif,PathAnnotationObject,,Image,Polygon,426.46,328.86,91,91,100,602.37,151070.0,1621.3,Starved,86_19,86,19
5,72C_5_FITC.tif,PathAnnotationObject,,Image,Polygon,412.65,318.85,117,117,100,438.76,266658.4,2083.3,Fed,72_15,72,15
6,72C_4_FITC.tif,PathAnnotationObject,,Image,Polygon,451.95,326.52,61,61,100,256.14,238151.4,1913.7,Fed,72_14,72,14
7,73D_2_FITC.tif,PathAnnotationObject,,Image,Polygon,408.02,301.42,55,55,100,265.77,206942.7,1824.7,Fed,73_17,73,17
8,73D_1_FITC.tif,PathAnnotationObject,,Image,Polygon,460.9,323.27,52,52,100,225.2,230906.1,1892.8,Fed,73_16,73,16
9,90C_3_FITC.tif,PathAnnotationObject,,Image,Polygon,412.88,321.63,23,23,100,114.64,200630.0,1901.7,Starved,90_13,90,13


## Create a dataframe of hemocyte features from all samples - scale coordinates relative to abdomen centroid and map fly and section IDs 

In [11]:
from scipy.spatial import distance
import math 
sns.set_palette("pastel")

def zero_scale_x(image, x, polygon):
    polygon_x = abdomen_coordinates[abdomen_coordinates['Image']==image]['Centroid_X'].iloc[0] 
    zero_scaled_x = x-polygon_x
    return(zero_scaled_x)

def zero_scale_y(image, y, polygon):
   polygon_y = abdomen_coordinates[abdomen_coordinates['Image']==image]['Centroid_Y'].iloc[0] 
   zero_scaled_y = y-polygon_y
   return(zero_scaled_y)

def get_centroid_distance(image,x,y,polygon):
    polygon_x = abdomen_coordinates[abdomen_coordinates['Image']==image]['Centroid_X'].iloc[0]
    polygon_y = abdomen_coordinates[abdomen_coordinates['Image']==image]['Centroid_Y'].iloc[0]
    centroid_distance = distance.euclidean([polygon_x, polygon_y], [x,y])

    return centroid_distance
    

all_detection_measurement_files = glob.glob(path.join(dm_path , "*.txt"))
dm_list = []

for dm in all_detection_measurement_files:
    detection_measurements = pd.read_csv(dm,sep="\t",header=0)
    detection_measurements.columns = detection_measurements.columns.str.replace('Centroid X µm', 'Centroid_X')
    detection_measurements.columns = detection_measurements.columns.str.replace('Centroid Y µm', 'Centroid_Y')
    detection_measurements["centroid_x_zeroscaled"] = detection_measurements.apply(
            lambda x: zero_scale_x(x.Image, x.Centroid_X,polygon=abdomen_coordinates), axis=1) 
    detection_measurements["centroid_y_zeroscaled"] = detection_measurements.apply(
            lambda x: zero_scale_y(x.Image, x.Centroid_Y,polygon=abdomen_coordinates), axis=1)
    detection_measurements["centroid_distances"] = detection_measurements.apply(
            lambda x: get_centroid_distance(x.Image, x.Centroid_X,x.Centroid_Y,polygon=abdomen_coordinates), axis=1)
    
    dm_list.append(detection_measurements)
    
    
#print(dm_list)
hemocyte_coordinates = pd.concat(dm_list, axis=0, ignore_index=True)
hemocyte_coordinates[['fly_section_ID','fly_ID','section_ID']] = hemocyte_coordinates.apply(lambda x: map_samples(x.Image, sample_mapping_lookup=sample_mappings), axis=1) 

hemocyte_coordinates[:5]

Unnamed: 0,Image,Name,Class,Parent,ROI,Centroid_X,Centroid_Y,Nucleus: Area,Nucleus: Perimeter,Nucleus: Circularity,...,Delaunay: Max distance,Delaunay: Min distance,Delaunay: Mean triangle area,Delaunay: Max triangle area,centroid_x_zeroscaled,centroid_y_zeroscaled,centroid_distances,fly_section_ID,fly_ID,section_ID
0,24E_F3_2_FITC.tif,Positive,Positive,PathAnnotationObject,Polygon,600.78,196.9,17.6465,18.1972,0.6697,...,436.341,94.0672,6727.6167,11890.46,217.13,-163.94,272.069404,1_2,1,2
1,24E_F3_2_FITC.tif,Positive,Positive,PathAnnotationObject,Polygon,164.63,200.02,5.8822,9.6863,0.7878,...,436.341,146.6031,5286.6216,22187.0,-219.02,-160.82,271.721977,1_2,1,2
2,24E_F3_2_FITC.tif,Positive,Positive,PathAnnotationObject,Polygon,462.34,238.35,5.8822,8.7413,0.9674,...,300.4919,4.1613,2609.145,9307.5098,78.69,-122.49,145.588173,1_2,1,2
3,24E_F3_2_FITC.tif,Positive,Positive,PathAnnotationObject,Polygon,466.39,245.4,8.235,10.9225,0.8674,...,182.6596,4.1613,4488.1665,13898.832,82.74,-115.44,142.029227,1_2,1,2
4,24E_F3_2_FITC.tif,Positive,Positive,PathAnnotationObject,Polygon,457.45,247.26,16.8622,18.784,0.6005,...,297.7702,5.39,6737.3823,22187.0,73.8,-113.58,135.450568,1_2,1,2


## Write abdomen and hemocyte features to csv files for downstream analysis 

In [12]:
hemocyte_coordinates.to_csv(outputs.joinpath("hemocyte_coordinates_midsections.csv"), sep="\t",index=False)
abdomen_coordinates.to_csv(outputs.joinpath("abdomen_coordinates_midsections.csv"), sep="\t",index=False)