## Summed OS volume

One way to circumvent that several outer segments are combined in one label (without filtering) is calculating the summed OS volume per image and afterwards the average per biological replicate.

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Define path were measurements are stored
path = "../../measurements/cpfl/"

# Load unfiltered measurements
measurements = pd.read_csv(path + "04-measurements-rescaled.csv")
measurements

Unnamed: 0,label,age,image_id,genotype,maximum,mean,median,minimum,sigma,sum,...,equivalent_spherical_perimeter_rescaled,equivalent_spherical_radius_rescaled,feret_diameter_rescaled,perimeter_2d_rescaled,major_axis_length_2d_rescaled,minor_axis_length_2d_rescaled,surface_area_rescaled,bbox_volume_rescaled,convex_volume_rescaled,volume_rescaled
0,1,8,0,cpfl,250.0,189.793103,184.144531,157.0,20.305487,5504.0,...,14.744113,0.615610,1.480172,2.957153,0.976660,0.904212,3.881768,1.516422,1.078345,0.977250
1,2,8,0,cpfl,334.0,207.273684,202.871094,133.0,42.706679,19691.0,...,32.521386,0.914283,2.624064,4.973873,2.127172,1.027014,7.095704,4.852550,3.572016,3.201335
2,4,8,0,cpfl,262.0,214.904762,215.355469,175.0,27.285353,4513.0,...,11.889571,0.552814,1.370373,2.957153,0.976660,0.904212,5.174290,1.516422,0.943551,0.707664
3,5,8,0,cpfl,311.0,195.171429,190.386719,118.0,36.997206,6831.0,...,16.713421,0.655434,1.678357,3.390218,1.255098,0.902928,7.379101,2.426275,1.381629,1.179439
4,6,8,0,cpfl,355.0,219.354167,215.355469,147.0,47.335750,21058.0,...,32.749208,0.917480,2.068209,5.481241,1.910113,1.514661,11.022045,6.065688,3.774206,3.235034
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5845,176,70,39,wt,437.0,228.476298,219.855469,118.0,59.914239,101215.0,...,90.772814,1.527475,5.732681,13.307188,6.005873,1.731845,45.719796,55.197761,20.994020,14.928332
5846,177,70,39,wt,1542.0,485.858939,412.605469,137.0,265.029227,1043625.0,...,260.041601,2.585340,9.158631,32.454379,8.011704,7.033195,160.768595,300.992921,132.366793,72.383878
5847,178,70,39,wt,416.0,224.548673,213.832031,127.0,62.839505,25374.0,...,36.509477,0.968722,2.261000,5.734924,1.901773,1.715924,15.687029,8.491963,4.414473,3.807904
5848,180,70,39,wt,481.0,251.469466,237.925781,145.0,64.019970,65885.0,...,63.956915,1.282154,4.579315,13.771030,4.629315,2.203200,35.317111,35.383180,15.063125,8.828946


### Calculate the summed volume per image_id

Here, the summed volume per image id is calculated.

In [3]:
# Calculate summed OS volume
summed_volume = measurements.groupby("image_id", as_index=False)["volume_rescaled"].sum()

# Rename column for better identification
summed_volume.rename(columns={"volume_rescaled": "summed_volume"}, inplace=True)
summed_volume

Unnamed: 0,image_id,summed_volume
0,0,563.030645
1,1,648.556847
2,2,300.083068
3,3,3659.193719
4,4,3710.954257
5,5,2371.852523
6,6,2115.03803
7,7,1804.643293
8,8,2869.171547
9,9,953.964241


### Open average dataframe and add summed volume

In [8]:
# Load mean dataframe
measurements_means = pd.read_csv(path + "07-average-values-max-number-of-labels-added.csv")

# Add summed volume
measurements_processed = pd.merge(measurements_means, summed_volume, how="inner")
measurements_processed["age"] = measurements_processed["age"].astypeype(int)
measurements_processed

Unnamed: 0,image_id,age,maximum,mean,median,minimum,sigma,sum,variance,flatness,...,perimeter_2d_rescaled,major_axis_length_2d_rescaled,minor_axis_length_2d_rescaled,surface_area_rescaled,bbox_volume_rescaled,convex_volume_rescaled,volume_rescaled,label,genotype,summed_volume
0,0,8,435.323232,231.815654,215.544626,136.808081,67.478992,50966.181818,8476.60195,1.525459,...,6.42257,2.481628,1.372441,19.03757,17.155822,8.109028,5.67084,99,cpfl,563.030645
1,1,8,460.709677,224.380055,206.129242,126.634409,70.985109,56809.064516,8270.087836,1.629387,...,7.325543,2.841319,1.473626,24.355526,21.933224,10.160933,6.959598,93,cpfl,648.556847
2,2,8,510.370968,252.925858,232.679688,141.645161,81.942129,46226.919355,12177.344482,1.478168,...,5.670415,2.152691,1.295106,16.577307,13.719543,6.730957,4.8232,62,cpfl,300.083068
3,3,14,608.186207,293.062064,266.616379,157.948276,99.985899,94890.186207,16773.811155,1.415236,...,8.747193,3.531725,1.585332,27.055333,25.996121,12.82161,8.65755,290,cpfl,3659.193719
4,4,14,648.180556,298.264152,268.251157,154.018519,107.243126,104264.393519,18925.976599,1.42442,...,9.041931,3.52464,1.744031,27.524135,29.736536,13.43422,9.092604,216,cpfl,3710.954257
5,5,14,546.748148,272.073887,250.41428,153.17037,86.006519,84179.6,11148.498631,1.464712,...,8.9495,3.56804,1.559667,27.394193,29.199424,13.371972,8.43555,135,cpfl,2371.852523
6,6,20,487.565517,243.963166,224.548761,130.993103,77.417826,106942.248276,9129.420382,1.488183,...,10.161918,3.974969,1.837445,36.029479,39.587794,18.379267,11.898974,145,cpfl,2115.03803
7,7,20,442.237805,228.048594,211.738281,126.243902,69.436005,77319.286585,6960.528745,1.612874,...,8.76723,3.694451,1.541159,28.744134,31.292334,13.436362,9.488528,164,cpfl,1804.643293
8,8,20,359.148387,191.621903,179.647253,108.348387,52.7696,78788.929032,4414.103336,1.477569,...,9.842299,3.811786,1.790636,33.458424,35.488406,17.097196,10.85019,155,cpfl,2869.171547
9,9,245,865.575758,368.635598,323.865057,173.484848,155.070912,157599.121212,41263.537517,1.235098,...,9.257627,3.302101,1.937421,27.561774,26.845265,14.834386,10.540409,33,cpfl,953.964241


In [9]:
measurements_processed.to_csv(path + "08-features-processed.csv", index=False)