In [1]:
import bmdrc 
import pandas as pd

## Module 1: Input Data Modules 

##### Binary Class

In [2]:
morpho_example_wide = pd.read_csv("/Users/degn400/Git_Repos/bmdrc/data/Binary_Morphology_Wide.csv")

Wide = bmdrc.input_data_classes.BinaryClass(
    df = morpho_example_wide,
    chemical = "chemical.id",
    plate = "plate.id",
    well = "well",
    concentration = "conc",
    format = "wide"
)

print(Wide.df)
print(Wide.format)
print(Wide.chemical)
print(Wide.plate)
print(Wide.well)
print(Wide.concentration)
print(Wide.endpoint)
print(Wide.value)

       chemical.id  conc  plate.id well endpoint  value
0             3757   0.0     19877  H01     MO24    0.0
1             3757   0.0     19877  H02     MO24    0.0
2             3757   0.0     19877  H03     MO24    0.0
3             3757   0.0     19877  H04     MO24    0.0
4             3757   0.0     19877  H05     MO24    0.0
...            ...   ...       ...  ...      ...    ...
19867         3863  10.0     20637  H08     DNC_    0.0
19868         3863  10.0     20637  H09     DNC_    0.0
19869         3863  10.0     20637  H10     DNC_    0.0
19870         3863  10.0     20637  H11     DNC_    0.0
19871         3863  10.0     20637  H12     DNC_    0.0

[19872 rows x 6 columns]
wide
chemical.id
plate.id
well
conc
endpoint
value


In [3]:
morpho_example_long = pd.read_csv("/Users/degn400/Git_Repos/bmdrc/data/Binary_Morphology_Long.csv")

Long = bmdrc.input_data_classes.BinaryClass(
    df = morpho_example_long,
    chemical = "chemical.id",
    plate = "plate.id",
    well = "well",
    concentration = "conc",
    endpoint = "endpoint",
    value = "value"
)

print(Long.df)
print(Long.format)
print(Long.chemical)
print(Long.plate)
print(Long.well)
print(Long.concentration)
print(Long.endpoint)
print(Long.value)

       chemical.id  conc  plate.id well endpoint  value
0             3757   0.0     19877  H01     MO24    0.0
1             3757   0.0     19877  H02     MO24    0.0
2             3757   0.0     19877  H03     MO24    0.0
3             3757   0.0     19877  H04     MO24    0.0
4             3757   0.0     19877  H05     MO24    0.0
...            ...   ...       ...  ...      ...    ...
19867         3863  10.0     20637  H08     DNC_    0.0
19868         3863  10.0     20637  H09     DNC_    0.0
19869         3863  10.0     20637  H10     DNC_    0.0
19870         3863  10.0     20637  H11     DNC_    0.0
19871         3863  10.0     20637  H12     DNC_    0.0

[19872 rows x 6 columns]
long
chemical.id
plate.id
well
conc
endpoint
value


## Module 2: Pre-Processing Modules

#### Binary Class: Combine Endpoints

In [4]:
endpoint_dict = {"ANY24":["MO24", "DP24", "SM24", "NC24"], "DAVID":["MO24", "DP24"]}

# Add new endpoint - twice
Long.combine_and_create_new_endpoints(endpoint_dict)
Long.combine_and_create_new_endpoints({"Test":["MO24"], "DAVID":["DP24"]})

Long.df

DAVID is already an existing endpoint


Unnamed: 0,chemical.id,conc,plate.id,well,endpoint,value
0,3757,0.0,19877,H01,MO24,0.0
1,3757,0.0,19877,H02,MO24,0.0
2,3757,0.0,19877,H03,MO24,0.0
3,3757,0.0,19877,H04,MO24,0.0
4,3757,0.0,19877,H05,MO24,0.0
...,...,...,...,...,...,...
859,3863,10.0,20637,H08,Test,1.0
860,3863,10.0,20637,H09,Test,1.0
861,3863,10.0,20637,H10,Test,0.0
862,3863,10.0,20637,H11,Test,0.0


In [5]:
Long.report_combination

{'Test': ['MO24'],
 'DAVID': ['MO24', 'DP24'],
 'ANY24': ['MO24', 'DP24', 'SM24', 'NC24']}

In [6]:
Long.df[Long.df[Long.endpoint] == "DAVID"]

Unnamed: 0,chemical.id,conc,plate.id,well,endpoint,value
0,3757,0.0,19877,H01,DAVID,0.0
1,3757,0.0,19877,H02,DAVID,0.0
2,3757,0.0,19877,H03,DAVID,0.0
3,3757,0.0,19877,H04,DAVID,0.0
4,3757,0.0,19877,H05,DAVID,0.0
...,...,...,...,...,...,...
859,3863,10.0,20637,H08,DAVID,1.0
860,3863,10.0,20637,H09,DAVID,1.0
861,3863,10.0,20637,H10,DAVID,0.0
862,3863,10.0,20637,H11,DAVID,0.0


In [7]:
# Add new endpoint
Wide.combine_and_create_new_endpoints(endpoint_dict)

Wide.df

Unnamed: 0,chemical.id,conc,plate.id,well,endpoint,value
0,3757,0.0,19877,H01,MO24,0.0
1,3757,0.0,19877,H02,MO24,0.0
2,3757,0.0,19877,H03,MO24,0.0
3,3757,0.0,19877,H04,MO24,0.0
4,3757,0.0,19877,H05,MO24,0.0
...,...,...,...,...,...,...
859,3863,10.0,20637,H08,DAVID,1.0
860,3863,10.0,20637,H09,DAVID,1.0
861,3863,10.0,20637,H10,DAVID,0.0
862,3863,10.0,20637,H11,DAVID,0.0


#### Binary Class: Set wells to NA

In [8]:
Long.set_well_to_na(endpoint_name = "DNC_", endpoint_value = 1)

# All of these wells should be NA 
Long.df[Long.df["bmdrc.Well.ID"] == "3757 21.9 19877 B01"]


Unnamed: 0,chemical.id,conc,plate.id,well,endpoint,value,bmdrc.Well.ID
216,3757,21.9,19877,B01,MO24,,3757 21.9 19877 B01
1080,3757,21.9,19877,B01,DP24,,3757 21.9 19877 B01
1944,3757,21.9,19877,B01,SM24,,3757 21.9 19877 B01
2808,3757,21.9,19877,B01,NC24,,3757 21.9 19877 B01
3672,3757,21.9,19877,B01,MORT,,3757 21.9 19877 B01
4536,3757,21.9,19877,B01,YSE_,,3757 21.9 19877 B01
5400,3757,21.9,19877,B01,AXIS,,3757 21.9 19877 B01
6264,3757,21.9,19877,B01,EYE_,,3757 21.9 19877 B01
7128,3757,21.9,19877,B01,SNOU,,3757 21.9 19877 B01
7992,3757,21.9,19877,B01,JAW_,,3757 21.9 19877 B01


In [9]:
Long.set_well_to_na(endpoint_name = "MORT", endpoint_value = 1, except_endpoint = ["DP24", "MO24", "SM24", "MORT"])

# All but DP24, MO24, SM24, and MORT should be np.nan
Long.df[Long.df["bmdrc.Well.ID"] == "3757 2.65 19890 G04"]

Unnamed: 0,chemical.id,conc,plate.id,well,endpoint,value,bmdrc.Well.ID
63,3757,2.65,19890,G04,MO24,0.0,3757 2.65 19890 G04
927,3757,2.65,19890,G04,DP24,1.0,3757 2.65 19890 G04
1791,3757,2.65,19890,G04,SM24,0.0,3757 2.65 19890 G04
2655,3757,2.65,19890,G04,NC24,,3757 2.65 19890 G04
3519,3757,2.65,19890,G04,MORT,1.0,3757 2.65 19890 G04
4383,3757,2.65,19890,G04,YSE_,,3757 2.65 19890 G04
5247,3757,2.65,19890,G04,AXIS,,3757 2.65 19890 G04
6111,3757,2.65,19890,G04,EYE_,,3757 2.65 19890 G04
6975,3757,2.65,19890,G04,SNOU,,3757 2.65 19890 G04
7839,3757,2.65,19890,G04,JAW_,,3757 2.65 19890 G04


In [10]:
Long.report_well_removal

[[['DNC_'], [1], None], [['MORT'], [1], ['DP24', 'MO24', 'SM24', 'MORT']]]

#### Binary Class: Remove Endpoint