In [62]:
import tabulate
import numpy as np

In [63]:
import warnings
import pandas as pd
warnings.simplefilter(action="ignore", category=FutureWarning)
import pandas as pd
import utils
import plotly.express as px
import random

random.seed(12527)

### Calculating mean Average Precision (mAP) for CellProfiler features
In this notebook, we calculate the mAP for perturbation detection and perturbation matching tasks for the CellProfiler features.

All compound, ORF and CRISPR plates from the `2020_11_04_CPJUMP1` batch are read, except for those with cell seeding density other than 1000 (100%) and compound plates with antibiotics (Puromycin/Blasticidin) added. Additional annotations for the compounds are also read.

In [64]:
replicability_map_df = pd.DataFrame()
replicability_fp_df = pd.DataFrame()
matching_map_df = pd.DataFrame()
matching_fp_df = pd.DataFrame()
gene_compound_matching_map_df = pd.DataFrame()
gene_compound_matching_fp_df = pd.DataFrame()

replicate_feature = "Metadata_broad_sample"
batch = "2020_11_04_CPJUMP1"

experiment_df = (
    pd.read_csv("output/experiment-metadata.tsv", sep="\t")
    .query("Batch==@batch")
    .query("Density==100")
    .query('Antibiotics=="absent"')
)

experiment_df.drop(
    experiment_df[
        (experiment_df.Perturbation == "compound") & (experiment_df.Cell_line == "Cas9")
    ].index,
    inplace=True,
)
experiment_df=experiment_df.iloc[:32,:]##########
#experiment_df=experiment_df[experiment_df['Time']>48]

target1_metadata = pd.read_csv(
    "input/JUMP-Target-1_compound_metadata_additional_annotations.tsv",
    sep="\t",
    usecols=["broad_sample", "target_list"],
).rename(
    columns={
        "broad_sample": "Metadata_broad_sample",
        "target_list": "Metadata_target_list",
    }
)

Normalized, feature selected compound profiles for replicate plates at the same time point, using the same cell type are merged into a single data frame. Empty wells are removed before calculating mAP for perturbation detection (retrieve replicates of the query compound). mAP for the random baseline (10000 random list of size number of replicates + number of negcon) is subtracted.

For the perturbation matching task, compounds that not are replicable (mAP <= 0) are removed. Negative control wells are also removed and the additional target annotations are added before mAP for perturbation matching is calculated. Again, the mAP for the random baseline is subtracted.

Similarly, mAP for perturbation detection is computed for both CRISPR and ORF plates while mAP for perturbation matching is computed only for the CRISPR guides as there are no sister ORF perturbations.

Then, mAP for the chemical perturbation-genetic perturbation matching task is computed using the median consensus profiles and the baseline mAP is subtracted.

Along with mAP, fraction positive is also calculated. This is the fraction of queries that have mAP > 0.

The mAP and fraction positive values as saved to `output` directory and the results are plotted.

In [65]:
################################

In [66]:
df=pd.read_csv('/data/datacenter/H3C_GPU/projects/yuchen/SLPP_short_new.csv')
for i in range(len(df)):
    if df['Metadata_gene'][i] == 'empty':
        df['Metadata_gene'][i] = np.nan 
df=df.drop(columns='Metadata_experiment_type')


Columns (13,16) have mixed types. Specify dtype option on import or set low_memory=False.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See

In [68]:
sp=df
sp=df.iloc[:,3:]
sp

Unnamed: 0,Metadata_broad_sample,Metadata_solvent,Metadata_Plate,Metadata_Well,Metadata_InChIKey,Metadata_pert_iname,Metadata_pubchem_cid,Metadata_gene,Metadata_pert_type,Metadata_control_type,...,290,291,292,293,294,295,296,297,298,299
0,BRD-A86665761-001-01-1,DMSO,BR00117016,A01,TZDUHAJSIBHXDL-UHFFFAOYSA-N,gabapentin-enacarbil,9883900.0,CACNB4,trt,,...,-0.007036,-0.006612,0.009845,-0.011538,0.013160,-0.027354,-0.009749,0.005850,-0.000253,-0.000376
1,BRD-A22032524-074-09-9,DMSO,BR00117016,A03,HTIQEAQVCYTUBX-UHFFFAOYSA-N,amlodipine,2162.0,CACNA2D3,trt,,...,0.013385,0.010643,0.006958,-0.003733,0.024650,-0.011005,-0.005121,-0.008226,0.013140,-0.005600
2,BRD-A01078468-001-14-8,DMSO,BR00117016,A04,PBBGSZCBWVPOOL-UHFFFAOYSA-N,hexestrol,3606.0,AKR1C1,trt,,...,-0.012298,0.000568,0.019614,0.001198,0.015910,-0.008409,-0.005918,-0.003438,0.001664,-0.005679
3,BRD-K48278478-001-01-2,DMSO,BR00117016,A05,LOUPRKONTZGTKE-AFHBHXEDSA-N,quinine,94175.0,KCNN4,trt,,...,-0.006355,-0.002988,0.015451,0.003553,0.014991,-0.004201,-0.004814,0.000930,-0.009731,0.000749
4,BRD-K36574127-001-01-3,DMSO,BR00117016,A06,NYNZQNWKBKUAII-KBXCAEBGSA-N,LOXO-101,46189000.0,NTRK1,trt,,...,-0.008072,-0.002900,0.005472,-0.004218,0.009933,-0.011908,-0.015588,0.001549,0.002831,0.001376
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13997,,DMSO,BR00117024,N22,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,-0.005675,-0.003106,-0.006203,0.007038,0.000982,0.001819,-0.004035,0.002704,-0.007409,-0.001977
13998,,DMSO,BR00117024,O07,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.013556,-0.001042,0.000969,0.003483,0.001995,-0.000063,0.002905,-0.003856,-0.004368,0.001815
13999,,DMSO,BR00117024,O18,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.008084,0.004270,-0.000594,-0.001484,0.002636,-0.004660,0.004217,0.000806,-0.007037,-0.000422
14000,,DMSO,BR00117024,P05,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,-0.002414,-0.004191,-0.007369,0.007975,0.002505,0.003243,0.002621,-0.003257,-0.009267,0.004359


In [69]:
cats = sp['Metadata_Plate'].unique()

In [70]:
for cat in cats: 
    sp.loc[sp['Metadata_Plate']==cat,:].to_csv('data/'+str(cat)+'_SLPP_short_new.csv',sep=',',index=False)

In [71]:
#################################

In [72]:
random_baseline_ap = pd.DataFrame(columns=["ap", "n_matches", "n_reference"])

for cell in experiment_df.Cell_type.unique():
    cell_df = experiment_df.query("Cell_type==@cell")
    modality_1_perturbation = "compound"
    modality_1_experiments_df = cell_df.query("Perturbation==@modality_1_perturbation")
    for modality_1_timepoint in modality_1_experiments_df.Time.unique():
        modality_1_timepoint_df = modality_1_experiments_df.query(
            "Time==@modality_1_timepoint"
        )
        modality_1_df = pd.DataFrame()
        for plate in modality_1_timepoint_df.Assay_Plate_Barcode.unique():
            # Read all the plates
            data_df = utils.load_data(
                plate, "SLPP_short_new.csv"
            ).assign(Metadata_modality=modality_1_perturbation)
            modality_1_df = utils.concat_profiles(modality_1_df, data_df)

        # Set Metadata_broad_sample value to "DMSO" for DMSO wells
        modality_1_df[replicate_feature].fillna("DMSO", inplace=True)

        # Remove empty wells
        modality_1_df = utils.remove_empty_wells(modality_1_df)

        # Description
        description = f"{modality_1_perturbation}_{cell}_{utils.time_point(modality_1_perturbation, modality_1_timepoint)}"

        # Calculate replicability mAP
        print(f"Computing {description} replicability")

        match_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "matching": ["Metadata_broad_sample"],
        }

        reference_dict = {
            "filter": {
                "Metadata_pert_type": ["'trt'"],
                "Metadata_control_type": [
                    "'poscon_orf'",
                    "'poscon_diverse'",
                    "'poscon_cp'",
                ],
            },
            "matching": ["Metadata_Plate"],
        }

        metric = utils.AveragePrecision(
            modality_1_df,
            match_dict,
            reference_dict,
            100,
            random_baseline_ap,
            anti_match=False,
        )

        #random_baseline_ap = metric.random_baseline_ap

        replicability_map_df, replicability_fp_df = utils.create_replicability_df(
            replicability_map_df,
            replicability_fp_df,
            metric,
            modality_1_perturbation,
            cell,
            modality_1_timepoint,
        )

        # Remove DMSO wells
        modality_1_df = utils.remove_negcon_and_empty_wells(modality_1_df)

        # Create consensus profiles
        modality_1_consensus_df = utils.consensus(modality_1_df, replicate_feature)

        # Filter out non-replicable compounds
        replicable_compounds = list(
            replicability_map_df[
                (replicability_map_df.Description == description)
                & (replicability_map_df.mAP > 0)
            ][replicate_feature]
        )
        modality_1_consensus_df = modality_1_consensus_df.query(
            "Metadata_broad_sample==@replicable_compounds"
        ).reset_index(drop=True)

        # Adding additional gene annotation metadata
        modality_1_consensus_df = modality_1_consensus_df.merge(
            target1_metadata, on="Metadata_broad_sample", how="left"
        )

        # Expand compound profiles and add new matching_target column
        modality_1_consensus_expanded_df = (
            modality_1_consensus_df.assign(
                Metadata_matching_target=lambda x: x.Metadata_target_list.str.split("|")
            )
            .explode("Metadata_matching_target")
            .reset_index(drop=True)
        )
        modality_1_consensus_df = modality_1_consensus_df.assign(
            Metadata_matching_target=lambda x: x.Metadata_target_list
        )

        # Calculate compound-compound matching
        print(f"Computing {description} matching")

        match_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "matching": ["Metadata_matching_target"],
        }

        reference_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "non_matching": ["Metadata_target_list"],
        }

        metric = utils.AveragePrecision(
            modality_1_consensus_expanded_df,
            match_dict,
            reference_dict,
            100,
            random_baseline_ap,
            anti_match=True,
        )

        #random_baseline_ap = metric.random_baseline_ap

        matching_map_df, matching_fp_df = utils.create_matching_df(
            matching_map_df,
            matching_fp_df,
            metric,
            modality_1_perturbation,
            cell,
            modality_1_timepoint,
        )

        all_modality_2_experiments_df = cell_df.query(
            "Perturbation!=@modality_1_perturbation"
        )
        for (
            modality_2_perturbation
        ) in all_modality_2_experiments_df.Perturbation.unique():
            modality_2_experiments_df = all_modality_2_experiments_df.query(
                "Perturbation==@modality_2_perturbation"
            )
            for modality_2_timepoint in modality_2_experiments_df.Time.unique():
                modality_2_timepoint_df = modality_2_experiments_df.query(
                    "Time==@modality_2_timepoint"
                )

                modality_2_df = pd.DataFrame()
                for plate in modality_2_timepoint_df.Assay_Plate_Barcode.unique():
                    data_df = (
                        utils.load_data(
                            plate,
                            "SLPP_short_new.csv",
                        )
                        .assign(Metadata_modality=modality_2_perturbation)
                        .assign(Metadata_matching_target=lambda x: x.Metadata_gene)
                        .assign(Metadata_target_list=lambda x: x.Metadata_gene)
                    )
                    modality_2_df = utils.concat_profiles(modality_2_df, data_df)

                # Remove empty wells
                modality_2_df = utils.remove_empty_wells(modality_2_df)

                # Description
                description = f"{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"

                # Calculate replicability mAP

                if not replicability_map_df.Description.str.contains(description).any():
                    print(f"Computing {description} replicability")

                    match_dict = {
                        "filter": {"Metadata_control_type": ["'negcon'"]},
                        "matching": ["Metadata_broad_sample"],
                    }

                    reference_dict = {
                        "filter": {
                            "Metadata_pert_type": ["'trt'"],
                            "Metadata_control_type": [
                                "'poscon_orf'",
                                "'poscon_diverse'",
                                "'poscon_cp'",
                            ],
                        },
                        "matching": ["Metadata_Plate"],
                    }

                    metric = utils.AveragePrecision(
                        modality_2_df,
                        match_dict,
                        reference_dict,
                        100,
                        random_baseline_ap,
                        anti_match=False,
                    )

                    #random_baseline_ap = metric.random_baseline_ap

                    (
                        replicability_map_df,
                        replicability_fp_df,
                    ) = utils.create_replicability_df(
                        replicability_map_df,
                        replicability_fp_df,
                        metric,
                        modality_2_perturbation,
                        cell,
                        modality_2_timepoint,
                    )

                # Remove negcon wells
                modality_2_df = utils.remove_negcon_and_empty_wells(modality_2_df)

                # Create consensus profiles
                modality_2_consensus_df = utils.consensus(
                    modality_2_df, "Metadata_broad_sample"
                )

                # Filter out non-replicable genes
                replicable_genes = list(
                    replicability_map_df[
                        (
                            replicability_map_df.Description
                            == f"{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"
                        )
                        & (replicability_map_df.mAP > 0)
                    ][replicate_feature]
                )
                modality_2_consensus_df = modality_2_consensus_df.query(
                    "Metadata_broad_sample==@replicable_genes"
                ).reset_index(drop=True)

                # Calculate cripsr-crispr matching
                if modality_2_perturbation == "crispr":
                    if not matching_map_df.Description.str.contains(description).any():
                        print(f"Computing {description} matching")

                        match_dict = {
                            "filter": {"Metadata_control_type": ["'negcon'"]},
                            "matching": ["Metadata_gene"],
                        }

                        reference_dict = {
                            "filter": {"Metadata_control_type": ["'negcon'"]},
                            "non_matching": ["Metadata_gene"],
                        }

                        metric = utils.AveragePrecision(
                            modality_2_consensus_df,
                            match_dict,
                            reference_dict,
                            100,
                            random_baseline_ap,
                            anti_match=False,
                        )

                        #random_baseline_ap = metric.random_baseline_ap

                        matching_map_df, matching_fp_df = utils.create_matching_df(
                            matching_map_df,
                            matching_fp_df,
                            metric,
                            modality_2_perturbation,
                            cell,
                            modality_2_timepoint,
                        )

                # Filter out genes that are not perturbed by ORFs or CRISPRs
                perturbed_genes = list(
                    set(modality_2_consensus_df.Metadata_matching_target)
                )
                modality_1_consensus_expanded_filtered_df = (
                    modality_1_consensus_expanded_df.query(
                        "Metadata_matching_target==@perturbed_genes"
                    ).reset_index(drop=True)
                )

                # Calculate gene-compound matching mAP
                description = f"{modality_1_perturbation}_{cell}_{utils.time_point(modality_1_perturbation, modality_1_timepoint)}-{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"
                print(f"Computing {description} matching")

                modality_1_modality_2_df = utils.concat_profiles(
                    modality_1_consensus_expanded_filtered_df, modality_2_consensus_df
                )

                match_dict = {
                    "filter": {"Metadata_control_type": ["'negcon'"]},
                    "matching": ["Metadata_matching_target"],
                    "non_matching": ["Metadata_modality"],
                }

                reference_dict = {
                    "filter": {"Metadata_control_type": ["'negcon'"]},
                    "non_matching": ["Metadata_target_list", "Metadata_modality"],
                }

                metric = utils.AveragePrecision(
                    modality_1_modality_2_df,
                    match_dict,
                    reference_dict,
                    100,
                    random_baseline_ap,
                    anti_match=True,
                )

                #random_baseline_ap = metric.random_baseline_ap

                (
                    gene_compound_matching_map_df,
                    gene_compound_matching_fp_df,
                ) = utils.create_gene_compound_matching_df(
                    gene_compound_matching_map_df,
                    gene_compound_matching_fp_df,
                    metric,
                    modality_1_perturbation,
                    modality_2_perturbation,
                    cell,
                    modality_1_timepoint,
                    modality_2_timepoint,
                )

Computing compound_A549_short replicability


100%|█████████████████████████████████████████| 260/260 [00:31<00:00,  8.19it/s]


260
_fp= 0.36153846153846153
Computing compound_A549_short matching


100%|█████████████████████████████████████████| 314/314 [01:00<00:00,  5.20it/s]


105
_fp= 0.41904761904761906
Computing crispr_A549_long replicability


100%|█████████████████████████████████████████| 247/247 [00:43<00:00,  5.63it/s]


247
_fp= 0.21052631578947367
Computing crispr_A549_long matching


100%|███████████████████████████████████████████| 43/43 [00:05<00:00,  7.28it/s]


9
_fp= 0.3333333333333333
Computing compound_A549_short-crispr_A549_long matching


100%|███████████████████████████████████████████| 43/43 [01:39<00:00,  2.32s/it]


25
_fp= 0.0
Computing crispr_A549_short replicability


100%|█████████████████████████████████████████| 247/247 [00:34<00:00,  7.15it/s]


247
_fp= 0.23481781376518218
Computing crispr_A549_short matching


100%|███████████████████████████████████████████| 46/46 [00:06<00:00,  7.54it/s]


12
_fp= 0.4166666666666667
Computing compound_A549_short-crispr_A549_short matching


100%|███████████████████████████████████████████| 46/46 [02:11<00:00,  2.86s/it]


29
_fp= 0.06896551724137931
Computing compound_A549_long replicability


100%|█████████████████████████████████████████| 260/260 [00:26<00:00,  9.66it/s]


260
_fp= 0.6230769230769231
Computing compound_A549_long matching


100%|█████████████████████████████████████████| 500/500 [01:15<00:00,  6.66it/s]


225
_fp= 0.39111111111111113
Computing compound_A549_long-crispr_A549_long matching


100%|███████████████████████████████████████████| 43/43 [02:25<00:00,  3.38s/it]


39
_fp= 0.05128205128205128
Computing compound_A549_long-crispr_A549_short matching


100%|███████████████████████████████████████████| 46/46 [02:34<00:00,  3.37s/it]


41
_fp= 0.0975609756097561
Computing compound_U2OS_short replicability


100%|█████████████████████████████████████████| 260/260 [00:41<00:00,  6.23it/s]


260
_fp= 1.0
Computing compound_U2OS_short matching


100%|█████████████████████████████████████████| 706/706 [01:48<00:00,  6.50it/s]


384
_fp= 0.7708333333333334
Computing crispr_U2OS_long replicability


100%|█████████████████████████████████████████| 247/247 [00:33<00:00,  7.29it/s]


247
_fp= 0.032388663967611336
Computing crispr_U2OS_long matching


100%|████████████████████████████████████████████| 8/8 [00:00<00:00, 564.09it/s]


Computing compound_U2OS_short-crispr_U2OS_long matching


100%|█████████████████████████████████████████████| 8/8 [00:17<00:00,  2.16s/it]


8
_fp= 0.0
Computing crispr_U2OS_short replicability


100%|█████████████████████████████████████████| 247/247 [00:34<00:00,  7.19it/s]


247
_fp= 0.048582995951417005
Computing crispr_U2OS_short matching


100%|███████████████████████████████████████████| 11/11 [00:05<00:00,  2.09it/s]


1
_fp= 0.0
Computing compound_U2OS_short-crispr_U2OS_short matching


100%|███████████████████████████████████████████| 11/11 [00:53<00:00,  4.82s/it]


11
_fp= 0.0
Computing compound_U2OS_long replicability


100%|█████████████████████████████████████████| 260/260 [00:25<00:00, 10.39it/s]


260
_fp= 1.0
Computing compound_U2OS_long matching


100%|█████████████████████████████████████████| 706/706 [01:48<00:00,  6.51it/s]


384
_fp= 0.78125
Computing crispr_U2OS_long matching


100%|████████████████████████████████████████████| 8/8 [00:00<00:00, 548.48it/s]


Computing compound_U2OS_long-crispr_U2OS_long matching


100%|█████████████████████████████████████████████| 8/8 [00:17<00:00,  2.18s/it]


8
_fp= 0.0
Computing compound_U2OS_long-crispr_U2OS_short matching


100%|███████████████████████████████████████████| 11/11 [00:51<00:00,  4.71s/it]

11
_fp= 0.0





Tables of mAP values and the fraction positive mAP (fp) are printed and bar plots are plotted.

Replicability fraction positive.

In [73]:
gene_compound_matching_map_df['mAP'].mean()

-0.29902234542781103

In [74]:
gene_compound_matching_fp_df['fp'].mean()

0.02725

In [75]:
replicability_fp_df[["Description", "timepoint", "fp"]]#.to_csv('11b.csv')
print(
    replicability_fp_df[["Description", "timepoint", "fp"]].to_markdown(index=False)
)

| Description         |   timepoint |    fp |
|:--------------------|------------:|------:|
| compound_A549_short |          24 | 0.362 |
| crispr_A549_long    |         144 | 0.211 |
| crispr_A549_short   |          96 | 0.235 |
| compound_A549_long  |          48 | 0.623 |
| compound_U2OS_short |          24 | 1     |
| crispr_U2OS_long    |         144 | 0.032 |
| crispr_U2OS_short   |          96 | 0.049 |
| compound_U2OS_long  |          48 | 1     |


Within perturbation matching fractive positive.

In [76]:
print(matching_fp_df[["Description", "timepoint", "fp"]].to_markdown(index=False))

| Description         |   timepoint |    fp |
|:--------------------|------------:|------:|
| compound_A549_short |          24 | 0.419 |
| crispr_A549_long    |         144 | 0.333 |
| crispr_A549_short   |          96 | 0.417 |
| compound_A549_long  |          48 | 0.391 |
| compound_U2OS_short |          24 | 0.771 |
| crispr_U2OS_long    |         144 | 0     |
| crispr_U2OS_short   |          96 | 0     |
| compound_U2OS_long  |          48 | 0.781 |
| crispr_U2OS_long    |         144 | 0     |


Gene-compound matching fraction positive.

In [77]:
gene_compound_matching_fp_df[["Description", "Cell", "fp"]]#.to_csv('22b.csv')
print(
    gene_compound_matching_fp_df[["Description", "Cell", "fp"]].to_markdown(
        index=False
    )
)

| Description                           | Cell   |    fp |
|:--------------------------------------|:-------|------:|
| compound_A549_short-crispr_A549_long  | A549   | 0     |
| compound_A549_short-crispr_A549_short | A549   | 0.069 |
| compound_A549_long-crispr_A549_long   | A549   | 0.051 |
| compound_A549_long-crispr_A549_short  | A549   | 0.098 |
| compound_U2OS_short-crispr_U2OS_long  | U2OS   | 0     |
| compound_U2OS_short-crispr_U2OS_short | U2OS   | 0     |
| compound_U2OS_long-crispr_U2OS_long   | U2OS   | 0     |
| compound_U2OS_long-crispr_U2OS_short  | U2OS   | 0     |


In [78]:
import plotly
import plotly.graph_objects as go

In [79]:
y_range=[0,1.0]
fig=go.Figure()
fig = px.bar(
    data_frame=replicability_fp_df,
    x="Modality",
    y="fp",
    facet_row="Cell",
    facet_col="time",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.write_image('11b.png')
fig.show()
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_replicability_fp.png", width=640, height=480, scale=2
#)

In [80]:
fig = px.bar(
    data_frame=matching_fp_df,
    x="Modality",
    y="fp",
    facet_row="Cell",
    facet_col="time",
)
fig.update_layout(
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_matching_fp.png", width=640, height=480, scale=2
#)

In [81]:
y_range=[           0,0.25          ]
fig = px.bar(
    gene_compound_matching_fp_df,
    x="Modality2",
    y="fp",
    facet_col="Modality1",
    facet_row="Cell",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Genetic perturbation"),
    xaxis2=dict(title="Genetic perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.write_image('22b.png')
fig.show()



#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_gene_compound_matching_fp.png",
#    width=640,
#    height=480,
#    scale=2,
#)

In [82]:
y_range=[    -0.5,0.8            ]


fig = px.box(
    data_frame=replicability_map_df,
    x="Modality",
    y="mAP",
    facet_row="Cell",
    facet_col="time",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)


#fig.write_image('33b.png')
fig.show()


In [83]:
fig = px.box(
    data_frame=matching_map_df,
    x="Modality",
    y="mAP",
    facet_row="Cell",
    facet_col="time",title='after norm & demean'
)
fig.update_layout(
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_matching_map.png", width=640, height=480, scale=2
#)

In [84]:
y_range=[      -0.8,0.5            ]


fig = px.box(
    data_frame=gene_compound_matching_map_df,
    x="Modality2",
    y="mAP",
    facet_col="Modality1",
    facet_row="Cell",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)


#fig.write_image('44b.png')
fig.show()

Tables of mAP and fp values are written to file.