In [45]:
import tabulate
import numpy as np

In [46]:
import warnings
import pandas as pd
warnings.simplefilter(action="ignore", category=FutureWarning)
import pandas as pd
import utils
import plotly.express as px
import random

random.seed(12527)

### Calculating mean Average Precision (mAP) for CellProfiler features
In this notebook, we calculate the mAP for perturbation detection and perturbation matching tasks for the CellProfiler features.

All compound, ORF and CRISPR plates from the `2020_11_04_CPJUMP1` batch are read, except for those with cell seeding density other than 1000 (100%) and compound plates with antibiotics (Puromycin/Blasticidin) added. Additional annotations for the compounds are also read.

In [47]:
replicability_map_df = pd.DataFrame()
replicability_fp_df = pd.DataFrame()
matching_map_df = pd.DataFrame()
matching_fp_df = pd.DataFrame()
gene_compound_matching_map_df = pd.DataFrame()
gene_compound_matching_fp_df = pd.DataFrame()

replicate_feature = "Metadata_broad_sample"
batch = "2020_11_04_CPJUMP1"

experiment_df = (
    pd.read_csv("output/experiment-metadata.tsv", sep="\t")
    .query("Batch==@batch")
    .query("Density==100")
    .query('Antibiotics=="absent"')
)

experiment_df.drop(
    experiment_df[
        (experiment_df.Perturbation == "compound") & (experiment_df.Cell_line == "Cas9")
    ].index,
    inplace=True,
)
experiment_df=experiment_df.iloc[:32,:]##########
#experiment_df=experiment_df[experiment_df['Time']>48]

target1_metadata = pd.read_csv(
    "input/JUMP-Target-1_compound_metadata_additional_annotations.tsv",
    sep="\t",
    usecols=["broad_sample", "target_list"],
).rename(
    columns={
        "broad_sample": "Metadata_broad_sample",
        "target_list": "Metadata_target_list",
    }
)

Normalized, feature selected compound profiles for replicate plates at the same time point, using the same cell type are merged into a single data frame. Empty wells are removed before calculating mAP for perturbation detection (retrieve replicates of the query compound). mAP for the random baseline (10000 random list of size number of replicates + number of negcon) is subtracted.

For the perturbation matching task, compounds that not are replicable (mAP <= 0) are removed. Negative control wells are also removed and the additional target annotations are added before mAP for perturbation matching is calculated. Again, the mAP for the random baseline is subtracted.

Similarly, mAP for perturbation detection is computed for both CRISPR and ORF plates while mAP for perturbation matching is computed only for the CRISPR guides as there are no sister ORF perturbations.

Then, mAP for the chemical perturbation-genetic perturbation matching task is computed using the median consensus profiles and the baseline mAP is subtracted.

Along with mAP, fraction positive is also calculated. This is the fraction of queries that have mAP > 0.

The mAP and fraction positive values as saved to `output` directory and the results are plotted.

In [48]:
################################

In [49]:
df=pd.read_csv('/data/datacenter/H3C_GPU/projects/yuchen/MLP_short_old.csv')
for i in range(len(df)):
    if df['Metadata_gene'][i] == 'empty':
        df['Metadata_gene'][i] = np.nan 
df=df.drop(columns='Metadata_experiment_type')


Columns (13,16) have mixed types. Specify dtype option on import or set low_memory=False.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See

In [50]:
sp=df
sp=df.iloc[:,3:-1]
sp

Unnamed: 0,Metadata_broad_sample,Metadata_solvent,Metadata_Plate,Metadata_Well,Metadata_InChIKey,Metadata_pert_iname,Metadata_pubchem_cid,Metadata_gene,Metadata_pert_type,Metadata_control_type,...,Nuclei_Texture_InfoMeas2_RNA_3_02_256,Nuclei_Texture_InverseDifferenceMoment_AGP_10_03_256,Nuclei_Texture_InverseDifferenceMoment_DNA_5_01_256,Nuclei_Texture_InverseDifferenceMoment_ER_10_03_256,Nuclei_Texture_InverseDifferenceMoment_RNA_5_03_256,Nuclei_Texture_SumAverage_LowZBF_10_00_256,Nuclei_Texture_SumVariance_Brightfield_5_00_256,Nuclei_Texture_SumVariance_ER_10_03_256,Nuclei_Texture_SumVariance_HighZBF_3_00_256,Nuclei_Texture_SumVariance_Mito_10_01_256
0,BRD-A86665761-001-01-1,DMSO,BR00117016,A01,TZDUHAJSIBHXDL-UHFFFAOYSA-N,gabapentin-enacarbil,9883900.0,CACNB4,trt,,...,-1.506100,0.93330,1.21130,4.151900,2.903200,3.09630,2.32200,-2.688700,2.84840,1.320000
1,BRD-A22032524-074-09-9,DMSO,BR00117016,A03,HTIQEAQVCYTUBX-UHFFFAOYSA-N,amlodipine,2162.0,CACNA2D3,trt,,...,-3.884400,1.23730,1.24790,0.051965,4.282100,1.21730,1.05120,0.088298,0.09423,-2.008200
2,BRD-A01078468-001-14-8,DMSO,BR00117016,A04,PBBGSZCBWVPOOL-UHFFFAOYSA-N,hexestrol,3606.0,AKR1C1,trt,,...,-2.213100,3.27850,1.77080,2.979000,2.899900,3.68030,0.92525,-1.989700,2.55810,-1.128800
3,BRD-K48278478-001-01-2,DMSO,BR00117016,A05,LOUPRKONTZGTKE-AFHBHXEDSA-N,quinine,94175.0,KCNN4,trt,,...,-1.850100,0.68690,1.36260,2.217500,3.455700,1.28070,0.27294,-1.005500,1.24150,-0.824530
4,BRD-K36574127-001-01-3,DMSO,BR00117016,A06,NYNZQNWKBKUAII-KBXCAEBGSA-N,LOXO-101,46189000.0,NTRK1,trt,,...,-0.923700,1.52800,1.18750,2.569600,2.738900,2.75030,1.28200,-0.859520,1.71170,0.093898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13997,,DMSO,BR00117024,N22,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.297710,0.09902,-0.29071,0.282460,1.547800,-1.97880,-0.49864,0.800700,-0.71417,-0.373070
13998,,DMSO,BR00117024,O07,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.061839,-0.32857,0.92268,0.499020,-0.030485,-0.23714,-0.77677,-0.951560,0.68117,0.841050
13999,,DMSO,BR00117024,O18,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,-1.145800,1.32580,1.73160,1.230900,2.229900,-0.85915,0.94566,-0.565710,-1.10620,-0.061230
14000,,DMSO,BR00117024,P05,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.442590,-0.17811,3.58500,1.274500,-0.584940,6.14230,2.25800,-0.812900,0.58453,3.585800


In [51]:
cats = sp['Metadata_Plate'].unique()

In [52]:
for cat in cats: 
    sp.loc[sp['Metadata_Plate']==cat,:].to_csv('data/'+str(cat)+'_MLP_short_old.csv',sep=',',index=False)

In [53]:
#################################

In [54]:
random_baseline_ap = pd.DataFrame(columns=["ap", "n_matches", "n_reference"])

for cell in experiment_df.Cell_type.unique():
    cell_df = experiment_df.query("Cell_type==@cell")
    modality_1_perturbation = "compound"
    modality_1_experiments_df = cell_df.query("Perturbation==@modality_1_perturbation")
    for modality_1_timepoint in modality_1_experiments_df.Time.unique():
        modality_1_timepoint_df = modality_1_experiments_df.query(
            "Time==@modality_1_timepoint"
        )
        modality_1_df = pd.DataFrame()
        for plate in modality_1_timepoint_df.Assay_Plate_Barcode.unique():
            # Read all the plates
            data_df = utils.load_data(
                plate, "MLP_short_old.csv"
            ).assign(Metadata_modality=modality_1_perturbation)
            modality_1_df = utils.concat_profiles(modality_1_df, data_df)

        # Set Metadata_broad_sample value to "DMSO" for DMSO wells
        modality_1_df[replicate_feature].fillna("DMSO", inplace=True)

        # Remove empty wells
        modality_1_df = utils.remove_empty_wells(modality_1_df)

        # Description
        description = f"{modality_1_perturbation}_{cell}_{utils.time_point(modality_1_perturbation, modality_1_timepoint)}"

        # Calculate replicability mAP
        print(f"Computing {description} replicability")

        match_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "matching": ["Metadata_broad_sample"],
        }

        reference_dict = {
            "filter": {
                "Metadata_pert_type": ["'trt'"],
                "Metadata_control_type": [
                    "'poscon_orf'",
                    "'poscon_diverse'",
                    "'poscon_cp'",
                ],
            },
            "matching": ["Metadata_Plate"],
        }

        metric = utils.AveragePrecision(
            modality_1_df,
            match_dict,
            reference_dict,
            100,
            random_baseline_ap,
            anti_match=False,
        )

        #random_baseline_ap = metric.random_baseline_ap

        replicability_map_df, replicability_fp_df = utils.create_replicability_df(
            replicability_map_df,
            replicability_fp_df,
            metric,
            modality_1_perturbation,
            cell,
            modality_1_timepoint,
        )

        # Remove DMSO wells
        modality_1_df = utils.remove_negcon_and_empty_wells(modality_1_df)

        # Create consensus profiles
        modality_1_consensus_df = utils.consensus(modality_1_df, replicate_feature)

        # Filter out non-replicable compounds
        replicable_compounds = list(
            replicability_map_df[
                (replicability_map_df.Description == description)
                & (replicability_map_df.mAP > 0)
            ][replicate_feature]
        )
        modality_1_consensus_df = modality_1_consensus_df.query(
            "Metadata_broad_sample==@replicable_compounds"
        ).reset_index(drop=True)

        # Adding additional gene annotation metadata
        modality_1_consensus_df = modality_1_consensus_df.merge(
            target1_metadata, on="Metadata_broad_sample", how="left"
        )

        # Expand compound profiles and add new matching_target column
        modality_1_consensus_expanded_df = (
            modality_1_consensus_df.assign(
                Metadata_matching_target=lambda x: x.Metadata_target_list.str.split("|")
            )
            .explode("Metadata_matching_target")
            .reset_index(drop=True)
        )
        modality_1_consensus_df = modality_1_consensus_df.assign(
            Metadata_matching_target=lambda x: x.Metadata_target_list
        )

        # Calculate compound-compound matching
        print(f"Computing {description} matching")

        match_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "matching": ["Metadata_matching_target"],
        }

        reference_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "non_matching": ["Metadata_target_list"],
        }

        metric = utils.AveragePrecision(
            modality_1_consensus_expanded_df,
            match_dict,
            reference_dict,
            100,
            random_baseline_ap,
            anti_match=True,
        )

        #random_baseline_ap = metric.random_baseline_ap

        matching_map_df, matching_fp_df = utils.create_matching_df(
            matching_map_df,
            matching_fp_df,
            metric,
            modality_1_perturbation,
            cell,
            modality_1_timepoint,
        )

        all_modality_2_experiments_df = cell_df.query(
            "Perturbation!=@modality_1_perturbation"
        )
        for (
            modality_2_perturbation
        ) in all_modality_2_experiments_df.Perturbation.unique():
            modality_2_experiments_df = all_modality_2_experiments_df.query(
                "Perturbation==@modality_2_perturbation"
            )
            for modality_2_timepoint in modality_2_experiments_df.Time.unique():
                modality_2_timepoint_df = modality_2_experiments_df.query(
                    "Time==@modality_2_timepoint"
                )

                modality_2_df = pd.DataFrame()
                for plate in modality_2_timepoint_df.Assay_Plate_Barcode.unique():
                    data_df = (
                        utils.load_data(
                            plate,
                            "MLP_short_old.csv",
                        )
                        .assign(Metadata_modality=modality_2_perturbation)
                        .assign(Metadata_matching_target=lambda x: x.Metadata_gene)
                        .assign(Metadata_target_list=lambda x: x.Metadata_gene)
                    )
                    modality_2_df = utils.concat_profiles(modality_2_df, data_df)

                # Remove empty wells
                modality_2_df = utils.remove_empty_wells(modality_2_df)

                # Description
                description = f"{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"

                # Calculate replicability mAP

                if not replicability_map_df.Description.str.contains(description).any():
                    print(f"Computing {description} replicability")

                    match_dict = {
                        "filter": {"Metadata_control_type": ["'negcon'"]},
                        "matching": ["Metadata_broad_sample"],
                    }

                    reference_dict = {
                        "filter": {
                            "Metadata_pert_type": ["'trt'"],
                            "Metadata_control_type": [
                                "'poscon_orf'",
                                "'poscon_diverse'",
                                "'poscon_cp'",
                            ],
                        },
                        "matching": ["Metadata_Plate"],
                    }

                    metric = utils.AveragePrecision(
                        modality_2_df,
                        match_dict,
                        reference_dict,
                        100,
                        random_baseline_ap,
                        anti_match=False,
                    )

                    #random_baseline_ap = metric.random_baseline_ap

                    (
                        replicability_map_df,
                        replicability_fp_df,
                    ) = utils.create_replicability_df(
                        replicability_map_df,
                        replicability_fp_df,
                        metric,
                        modality_2_perturbation,
                        cell,
                        modality_2_timepoint,
                    )

                # Remove negcon wells
                modality_2_df = utils.remove_negcon_and_empty_wells(modality_2_df)

                # Create consensus profiles
                modality_2_consensus_df = utils.consensus(
                    modality_2_df, "Metadata_broad_sample"
                )

                # Filter out non-replicable genes
                replicable_genes = list(
                    replicability_map_df[
                        (
                            replicability_map_df.Description
                            == f"{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"
                        )
                        & (replicability_map_df.mAP > 0)
                    ][replicate_feature]
                )
                modality_2_consensus_df = modality_2_consensus_df.query(
                    "Metadata_broad_sample==@replicable_genes"
                ).reset_index(drop=True)

                # Calculate cripsr-crispr matching
                if modality_2_perturbation == "crispr":
                    if not matching_map_df.Description.str.contains(description).any():
                        print(f"Computing {description} matching")

                        match_dict = {
                            "filter": {"Metadata_control_type": ["'negcon'"]},
                            "matching": ["Metadata_gene"],
                        }

                        reference_dict = {
                            "filter": {"Metadata_control_type": ["'negcon'"]},
                            "non_matching": ["Metadata_gene"],
                        }

                        metric = utils.AveragePrecision(
                            modality_2_consensus_df,
                            match_dict,
                            reference_dict,
                            100,
                            random_baseline_ap,
                            anti_match=False,
                        )

                        #random_baseline_ap = metric.random_baseline_ap

                        matching_map_df, matching_fp_df = utils.create_matching_df(
                            matching_map_df,
                            matching_fp_df,
                            metric,
                            modality_2_perturbation,
                            cell,
                            modality_2_timepoint,
                        )

                # Filter out genes that are not perturbed by ORFs or CRISPRs
                perturbed_genes = list(
                    set(modality_2_consensus_df.Metadata_matching_target)
                )
                modality_1_consensus_expanded_filtered_df = (
                    modality_1_consensus_expanded_df.query(
                        "Metadata_matching_target==@perturbed_genes"
                    ).reset_index(drop=True)
                )

                # Calculate gene-compound matching mAP
                description = f"{modality_1_perturbation}_{cell}_{utils.time_point(modality_1_perturbation, modality_1_timepoint)}-{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"
                print(f"Computing {description} matching")

                modality_1_modality_2_df = utils.concat_profiles(
                    modality_1_consensus_expanded_filtered_df, modality_2_consensus_df
                )

                match_dict = {
                    "filter": {"Metadata_control_type": ["'negcon'"]},
                    "matching": ["Metadata_matching_target"],
                    "non_matching": ["Metadata_modality"],
                }

                reference_dict = {
                    "filter": {"Metadata_control_type": ["'negcon'"]},
                    "non_matching": ["Metadata_target_list", "Metadata_modality"],
                }

                metric = utils.AveragePrecision(
                    modality_1_modality_2_df,
                    match_dict,
                    reference_dict,
                    100,
                    random_baseline_ap,
                    anti_match=True,
                )

                #random_baseline_ap = metric.random_baseline_ap

                (
                    gene_compound_matching_map_df,
                    gene_compound_matching_fp_df,
                ) = utils.create_gene_compound_matching_df(
                    gene_compound_matching_map_df,
                    gene_compound_matching_fp_df,
                    metric,
                    modality_1_perturbation,
                    modality_2_perturbation,
                    cell,
                    modality_1_timepoint,
                    modality_2_timepoint,
                )

Computing compound_A549_short replicability


100%|█████████████████████████████████████████| 260/260 [00:37<00:00,  6.88it/s]


260
_fp= 0.6961538461538461
Computing compound_A549_short matching


100%|█████████████████████████████████████████| 578/578 [01:24<00:00,  6.82it/s]


262
_fp= 0.2748091603053435
Computing crispr_A549_long replicability


100%|█████████████████████████████████████████| 247/247 [00:46<00:00,  5.26it/s]


247
_fp= 0.6558704453441295
Computing crispr_A549_long matching


100%|█████████████████████████████████████████| 107/107 [00:07<00:00, 14.32it/s]


55
_fp= 0.18181818181818182
Computing compound_A549_short-crispr_A549_long matching


100%|█████████████████████████████████████████| 107/107 [01:01<00:00,  1.74it/s]


96
_fp= 0.0625
Computing crispr_A549_short replicability


100%|█████████████████████████████████████████| 247/247 [00:39<00:00,  6.29it/s]


247
_fp= 0.6923076923076923
Computing crispr_A549_short matching


100%|█████████████████████████████████████████| 109/109 [00:07<00:00, 14.40it/s]


62
_fp= 0.12903225806451613
Computing compound_A549_short-crispr_A549_short matching


100%|█████████████████████████████████████████| 109/109 [01:01<00:00,  1.78it/s]


99
_fp= 0.06060606060606061
Computing compound_A549_long replicability


100%|█████████████████████████████████████████| 260/260 [00:32<00:00,  8.12it/s]


260
_fp= 0.9346153846153846
Computing compound_A549_long matching


100%|█████████████████████████████████████████| 695/695 [01:46<00:00,  6.54it/s]


363
_fp= 0.26170798898071623
Computing compound_A549_long-crispr_A549_long matching


100%|█████████████████████████████████████████| 107/107 [01:14<00:00,  1.44it/s]


106
_fp= 0.09433962264150944
Computing compound_A549_long-crispr_A549_short matching


100%|█████████████████████████████████████████| 109/109 [01:16<00:00,  1.43it/s]


108
_fp= 0.07407407407407407
Computing compound_U2OS_short replicability


100%|█████████████████████████████████████████| 260/260 [00:47<00:00,  5.52it/s]


260
_fp= 0.7269230769230769
Computing compound_U2OS_short matching


100%|█████████████████████████████████████████| 582/582 [01:20<00:00,  7.23it/s]


239
_fp= 0.3682008368200837
Computing crispr_U2OS_long replicability


100%|█████████████████████████████████████████| 247/247 [00:38<00:00,  6.42it/s]


247
_fp= 0.5101214574898786
Computing crispr_U2OS_long matching


100%|███████████████████████████████████████████| 91/91 [00:07<00:00, 12.35it/s]


35
_fp= 0.17142857142857143
Computing compound_U2OS_short-crispr_U2OS_long matching


100%|███████████████████████████████████████████| 91/91 [00:58<00:00,  1.56it/s]


88
_fp= 0.09090909090909091
Computing crispr_U2OS_short replicability


100%|█████████████████████████████████████████| 247/247 [00:39<00:00,  6.22it/s]


247
_fp= 0.6720647773279352
Computing crispr_U2OS_short matching


100%|█████████████████████████████████████████| 108/108 [00:08<00:00, 13.41it/s]


58
_fp= 0.1724137931034483
Computing compound_U2OS_short-crispr_U2OS_short matching


100%|█████████████████████████████████████████| 108/108 [01:03<00:00,  1.70it/s]


104
_fp= 0.0673076923076923
Computing compound_U2OS_long replicability


100%|█████████████████████████████████████████| 260/260 [00:31<00:00,  8.36it/s]


260
_fp= 0.6192307692307693
Computing compound_U2OS_long matching


100%|█████████████████████████████████████████| 517/517 [01:20<00:00,  6.46it/s]


245
_fp= 0.4204081632653061
Computing compound_U2OS_long-crispr_U2OS_long matching


100%|███████████████████████████████████████████| 91/91 [00:49<00:00,  1.85it/s]


79
_fp= 0.1518987341772152
Computing compound_U2OS_long-crispr_U2OS_short matching


100%|█████████████████████████████████████████| 108/108 [00:58<00:00,  1.84it/s]

94
_fp= 0.0851063829787234





Tables of mAP values and the fraction positive mAP (fp) are printed and bar plots are plotted.

Replicability fraction positive.

In [55]:
gene_compound_matching_map_df['mAP'].mean()

-0.11689973719204338

In [56]:
gene_compound_matching_fp_df['fp'].mean()

0.08574999999999999

In [57]:
replicability_fp_df[["Description", "timepoint", "fp"]]#.to_csv('11b.csv')
print(
    replicability_fp_df[["Description", "timepoint", "fp"]].to_markdown(index=False)
)

| Description         |   timepoint |    fp |
|:--------------------|------------:|------:|
| compound_A549_short |          24 | 0.696 |
| crispr_A549_long    |         144 | 0.656 |
| crispr_A549_short   |          96 | 0.692 |
| compound_A549_long  |          48 | 0.935 |
| compound_U2OS_short |          24 | 0.727 |
| crispr_U2OS_long    |         144 | 0.51  |
| crispr_U2OS_short   |          96 | 0.672 |
| compound_U2OS_long  |          48 | 0.619 |


Within perturbation matching fractive positive.

In [58]:
print(matching_fp_df[["Description", "timepoint", "fp"]].to_markdown(index=False))

| Description         |   timepoint |    fp |
|:--------------------|------------:|------:|
| compound_A549_short |          24 | 0.275 |
| crispr_A549_long    |         144 | 0.182 |
| crispr_A549_short   |          96 | 0.129 |
| compound_A549_long  |          48 | 0.262 |
| compound_U2OS_short |          24 | 0.368 |
| crispr_U2OS_long    |         144 | 0.171 |
| crispr_U2OS_short   |          96 | 0.172 |
| compound_U2OS_long  |          48 | 0.42  |


Gene-compound matching fraction positive.

In [59]:
gene_compound_matching_fp_df[["Description", "Cell", "fp"]]#.to_csv('22b.csv')
print(
    gene_compound_matching_fp_df[["Description", "Cell", "fp"]].to_markdown(
        index=False
    )
)

| Description                           | Cell   |    fp |
|:--------------------------------------|:-------|------:|
| compound_A549_short-crispr_A549_long  | A549   | 0.062 |
| compound_A549_short-crispr_A549_short | A549   | 0.061 |
| compound_A549_long-crispr_A549_long   | A549   | 0.094 |
| compound_A549_long-crispr_A549_short  | A549   | 0.074 |
| compound_U2OS_short-crispr_U2OS_long  | U2OS   | 0.091 |
| compound_U2OS_short-crispr_U2OS_short | U2OS   | 0.067 |
| compound_U2OS_long-crispr_U2OS_long   | U2OS   | 0.152 |
| compound_U2OS_long-crispr_U2OS_short  | U2OS   | 0.085 |


In [60]:
import plotly
import plotly.graph_objects as go

In [61]:
y_range=[0,1.0]
fig=go.Figure()
fig = px.bar(
    data_frame=replicability_fp_df,
    x="Modality",
    y="fp",
    facet_row="Cell",
    facet_col="time",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.write_image('11b.png')
fig.show()
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_replicability_fp.png", width=640, height=480, scale=2
#)

In [62]:
fig = px.bar(
    data_frame=matching_fp_df,
    x="Modality",
    y="fp",
    facet_row="Cell",
    facet_col="time",
)
fig.update_layout(
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_matching_fp.png", width=640, height=480, scale=2
#)

In [63]:
y_range=[           0,0.25          ]
fig = px.bar(
    gene_compound_matching_fp_df,
    x="Modality2",
    y="fp",
    facet_col="Modality1",
    facet_row="Cell",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Genetic perturbation"),
    xaxis2=dict(title="Genetic perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.write_image('22b.png')
fig.show()



#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_gene_compound_matching_fp.png",
#    width=640,
#    height=480,
#    scale=2,
#)

In [64]:
y_range=[    -0.5,0.8            ]


fig = px.box(
    data_frame=replicability_map_df,
    x="Modality",
    y="mAP",
    facet_row="Cell",
    facet_col="time",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)


#fig.write_image('33b.png')
fig.show()


In [65]:
fig = px.box(
    data_frame=matching_map_df,
    x="Modality",
    y="mAP",
    facet_row="Cell",
    facet_col="time",title='after norm & demean'
)
fig.update_layout(
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_matching_map.png", width=640, height=480, scale=2
#)

In [66]:
y_range=[      -0.8,0.5            ]


fig = px.box(
    data_frame=gene_compound_matching_map_df,
    x="Modality2",
    y="mAP",
    facet_col="Modality1",
    facet_row="Cell",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)


#fig.write_image('44b.png')
fig.show()

Tables of mAP and fp values are written to file.