In [50]:
import tabulate
import numpy as np

In [51]:
import warnings
import pandas as pd
warnings.simplefilter(action="ignore", category=FutureWarning)
import pandas as pd
import utils
import plotly.express as px
import random

random.seed(12527)

### Calculating mean Average Precision (mAP) for CellProfiler features
In this notebook, we calculate the mAP for perturbation detection and perturbation matching tasks for the CellProfiler features.

All compound, ORF and CRISPR plates from the `2020_11_04_CPJUMP1` batch are read, except for those with cell seeding density other than 1000 (100%) and compound plates with antibiotics (Puromycin/Blasticidin) added. Additional annotations for the compounds are also read.

In [52]:
replicability_map_df = pd.DataFrame()
replicability_fp_df = pd.DataFrame()
matching_map_df = pd.DataFrame()
matching_fp_df = pd.DataFrame()
gene_compound_matching_map_df = pd.DataFrame()
gene_compound_matching_fp_df = pd.DataFrame()

replicate_feature = "Metadata_broad_sample"
batch = "2020_11_04_CPJUMP1"

experiment_df = (
    pd.read_csv("output/experiment-metadata.tsv", sep="\t")
    .query("Batch==@batch")
    .query("Density==100")
    .query('Antibiotics=="absent"')
)

experiment_df.drop(
    experiment_df[
        (experiment_df.Perturbation == "compound") & (experiment_df.Cell_line == "Cas9")
    ].index,
    inplace=True,
)
experiment_df=experiment_df.iloc[:32,:]##########
#experiment_df=experiment_df[experiment_df['Time']>48]
target1_metadata = pd.read_csv(
    "input/JUMP-Target-1_compound_metadata_additional_annotations.tsv",
    sep="\t",
    usecols=["broad_sample", "target_list"],
).rename(
    columns={
        "broad_sample": "Metadata_broad_sample",
        "target_list": "Metadata_target_list",
    }
)

Normalized, feature selected compound profiles for replicate plates at the same time point, using the same cell type are merged into a single data frame. Empty wells are removed before calculating mAP for perturbation detection (retrieve replicates of the query compound). mAP for the random baseline (10000 random list of size number of replicates + number of negcon) is subtracted.

For the perturbation matching task, compounds that not are replicable (mAP <= 0) are removed. Negative control wells are also removed and the additional target annotations are added before mAP for perturbation matching is calculated. Again, the mAP for the random baseline is subtracted.

Similarly, mAP for perturbation detection is computed for both CRISPR and ORF plates while mAP for perturbation matching is computed only for the CRISPR guides as there are no sister ORF perturbations.

Then, mAP for the chemical perturbation-genetic perturbation matching task is computed using the median consensus profiles and the baseline mAP is subtracted.

Along with mAP, fraction positive is also calculated. This is the fraction of queries that have mAP > 0.

The mAP and fraction positive values as saved to `output` directory and the results are plotted.

In [53]:
################################

In [54]:
pd.read_csv('/data/datacenter/H3C_GPU/projects/yuchen/MLP_A549_new.csv')


Columns (13,16) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0.2,Unnamed: 0.1,index,Unnamed: 0,Metadata_broad_sample,Metadata_solvent,Metadata_experiment_type,Metadata_Plate,Metadata_Well,Metadata_InChIKey,Metadata_pert_iname,...,291,292,293,294,295,296,297,298,299,Metadata_experiment_type.1
0,0,768,0,BRDN0001480888,,CRISPR,BR00118046,A01,,,...,8.968628,0.0,27.649503,17.909319,0.000000,2.761963,3.589142,12.965521,7.233359,CRISPR
1,1,769,1,BRDN0001483495,,CRISPR,BR00118046,A02,,,...,3.346680,0.0,22.536591,13.813405,0.000000,11.167616,0.000000,12.858206,2.091142,CRISPR
2,2,770,2,BRDN0001147364,,CRISPR,BR00118046,A03,,,...,4.243326,0.0,13.472766,8.939457,0.000000,5.308858,6.101765,11.086233,1.757021,CRISPR
3,3,771,3,BRDN0001490272,,CRISPR,BR00118046,A04,,,...,6.150981,0.0,11.912103,8.161925,0.000000,8.464292,3.943527,11.449199,0.000000,CRISPR
4,4,772,4,BRDN0001480510,,CRISPR,BR00118046,A05,,,...,7.687547,0.0,2.291660,23.749684,0.350961,6.065820,1.336212,13.590516,10.231117,CRISPR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7240,3137,19447,333,,DMSO,Compound,BR00117024,N22,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,...,0.000000,0.0,0.000000,4.651270,11.437581,1.880494,0.000000,0.000000,2.737951,Compound
7241,3138,19456,342,,DMSO,Compound,BR00117024,O07,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,...,4.275188,0.0,0.000000,6.340729,3.903886,11.082582,2.497553,6.417137,7.174428,Compound
7242,3139,19467,353,,DMSO,Compound,BR00117024,O18,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,...,1.170106,0.0,0.000000,13.171538,9.162112,1.836388,0.000000,0.000000,8.335145,Compound
7243,3140,19478,364,,DMSO,Compound,BR00117024,P05,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,...,7.709268,0.0,23.506757,15.704053,0.000000,17.983968,0.000000,0.000000,20.321495,Compound


In [55]:
df=pd.read_csv('/data/datacenter/H3C_GPU/projects/yuchen/MLP_short_new.csv')
for i in range(len(df)):
    if df['Metadata_gene'][i] == 'empty':
        df['Metadata_gene'][i] = np.nan 
df=df.drop(columns='Metadata_experiment_type')


Columns (13,16) have mixed types. Specify dtype option on import or set low_memory=False.



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See

In [58]:
sp=df
sp=df.iloc[:,3:-1]
sp

Unnamed: 0,Metadata_broad_sample,Metadata_solvent,Metadata_Plate,Metadata_Well,Metadata_InChIKey,Metadata_pert_iname,Metadata_pubchem_cid,Metadata_gene,Metadata_pert_type,Metadata_control_type,...,290,291,292,293,294,295,296,297,298,299
0,BRD-A86665761-001-01-1,DMSO,BR00117016,A01,TZDUHAJSIBHXDL-UHFFFAOYSA-N,gabapentin-enacarbil,9883900.0,CACNB4,trt,,...,0.0,11.518283,1.683153,5.191323,3.301635,11.081951,0.0,0.000000,2.539623,6.112760
1,BRD-A22032524-074-09-9,DMSO,BR00117016,A03,HTIQEAQVCYTUBX-UHFFFAOYSA-N,amlodipine,2162.0,CACNA2D3,trt,,...,0.0,1.312874,10.275602,3.115033,0.000000,14.450455,0.0,22.283476,1.418894,0.000000
2,BRD-A01078468-001-14-8,DMSO,BR00117016,A04,PBBGSZCBWVPOOL-UHFFFAOYSA-N,hexestrol,3606.0,AKR1C1,trt,,...,0.0,9.814232,6.501790,0.000000,0.000000,12.617857,0.0,6.110269,5.401022,5.705215
3,BRD-K48278478-001-01-2,DMSO,BR00117016,A05,LOUPRKONTZGTKE-AFHBHXEDSA-N,quinine,94175.0,KCNN4,trt,,...,0.0,7.472938,6.407268,2.208799,5.620018,10.163329,0.0,1.344020,3.104664,2.387055
4,BRD-K36574127-001-01-3,DMSO,BR00117016,A06,NYNZQNWKBKUAII-KBXCAEBGSA-N,LOXO-101,46189000.0,NTRK1,trt,,...,0.0,8.887173,0.000000,3.644039,7.865713,8.347805,0.0,1.247775,6.743547,5.183995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13997,,DMSO,BR00117024,N22,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.0,8.267288,0.000000,8.439116,0.000000,2.463392,0.0,13.243841,3.373278,5.203211
13998,,DMSO,BR00117024,O07,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.0,6.741641,1.103867,2.655304,4.464654,5.094829,0.0,5.721407,4.856809,3.289021
13999,,DMSO,BR00117024,O18,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.0,5.214181,1.697345,4.019416,0.000000,3.877396,0.0,12.686780,1.477192,6.545932
14000,,DMSO,BR00117024,P05,IAZDPXIOMUYVGZ-UHFFFAOYSA-N,DMSO,679.0,,control,negcon,...,0.0,9.661363,0.000000,2.748788,0.000000,23.525183,0.0,0.000000,0.766751,14.732885


In [59]:
cats = sp['Metadata_Plate'].unique()

In [60]:
for cat in cats: 
    sp.loc[sp['Metadata_Plate']==cat,:].to_csv('data/'+str(cat)+'_MLP_short_new.csv',sep=',',index=False)

In [61]:
#################################

In [62]:
random_baseline_ap = pd.DataFrame(columns=["ap", "n_matches", "n_reference"])

for cell in experiment_df.Cell_type.unique():
    cell_df = experiment_df.query("Cell_type==@cell")
    modality_1_perturbation = "compound"
    modality_1_experiments_df = cell_df.query("Perturbation==@modality_1_perturbation")
    for modality_1_timepoint in modality_1_experiments_df.Time.unique():
        modality_1_timepoint_df = modality_1_experiments_df.query(
            "Time==@modality_1_timepoint"
        )
        modality_1_df = pd.DataFrame()
        for plate in modality_1_timepoint_df.Assay_Plate_Barcode.unique():
            # Read all the plates
            data_df = utils.load_data(
                plate, "MLP_short_new.csv"
            ).assign(Metadata_modality=modality_1_perturbation)
            modality_1_df = utils.concat_profiles(modality_1_df, data_df)

        # Set Metadata_broad_sample value to "DMSO" for DMSO wells
        modality_1_df[replicate_feature].fillna("DMSO", inplace=True)

        # Remove empty wells
        modality_1_df = utils.remove_empty_wells(modality_1_df)

        # Description
        description = f"{modality_1_perturbation}_{cell}_{utils.time_point(modality_1_perturbation, modality_1_timepoint)}"

        # Calculate replicability mAP
        print(f"Computing {description} replicability")

        match_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "matching": ["Metadata_broad_sample"],
        }

        reference_dict = {
            "filter": {
                "Metadata_pert_type": ["'trt'"],
                "Metadata_control_type": [
                    "'poscon_orf'",
                    "'poscon_diverse'",
                    "'poscon_cp'",
                ],
            },
            "matching": ["Metadata_Plate"],
        }

        metric = utils.AveragePrecision(
            modality_1_df,
            match_dict,
            reference_dict,
            100,
            random_baseline_ap,
            anti_match=False,
        )

        #random_baseline_ap = metric.random_baseline_ap

        replicability_map_df, replicability_fp_df = utils.create_replicability_df(
            replicability_map_df,
            replicability_fp_df,
            metric,
            modality_1_perturbation,
            cell,
            modality_1_timepoint,
        )

        # Remove DMSO wells
        modality_1_df = utils.remove_negcon_and_empty_wells(modality_1_df)

        # Create consensus profiles
        modality_1_consensus_df = utils.consensus(modality_1_df, replicate_feature)

        # Filter out non-replicable compounds
        replicable_compounds = list(
            replicability_map_df[
                (replicability_map_df.Description == description)
                & (replicability_map_df.mAP > 0)
            ][replicate_feature]
        )
        modality_1_consensus_df = modality_1_consensus_df.query(
            "Metadata_broad_sample==@replicable_compounds"
        ).reset_index(drop=True)

        # Adding additional gene annotation metadata
        modality_1_consensus_df = modality_1_consensus_df.merge(
            target1_metadata, on="Metadata_broad_sample", how="left"
        )

        # Expand compound profiles and add new matching_target column
        modality_1_consensus_expanded_df = (
            modality_1_consensus_df.assign(
                Metadata_matching_target=lambda x: x.Metadata_target_list.str.split("|")
            )
            .explode("Metadata_matching_target")
            .reset_index(drop=True)
        )
        modality_1_consensus_df = modality_1_consensus_df.assign(
            Metadata_matching_target=lambda x: x.Metadata_target_list
        )

        # Calculate compound-compound matching
        print(f"Computing {description} matching")

        match_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "matching": ["Metadata_matching_target"],
        }

        reference_dict = {
            "filter": {"Metadata_control_type": ["'negcon'"]},
            "non_matching": ["Metadata_target_list"],
        }

        metric = utils.AveragePrecision(
            modality_1_consensus_expanded_df,
            match_dict,
            reference_dict,
            100,
            random_baseline_ap,
            anti_match=True,
        )

        #random_baseline_ap = metric.random_baseline_ap

        matching_map_df, matching_fp_df = utils.create_matching_df(
            matching_map_df,
            matching_fp_df,
            metric,
            modality_1_perturbation,
            cell,
            modality_1_timepoint,
        )

        all_modality_2_experiments_df = cell_df.query(
            "Perturbation!=@modality_1_perturbation"
        )
        for (
            modality_2_perturbation
        ) in all_modality_2_experiments_df.Perturbation.unique():
            modality_2_experiments_df = all_modality_2_experiments_df.query(
                "Perturbation==@modality_2_perturbation"
            )
            for modality_2_timepoint in modality_2_experiments_df.Time.unique():
                modality_2_timepoint_df = modality_2_experiments_df.query(
                    "Time==@modality_2_timepoint"
                )

                modality_2_df = pd.DataFrame()
                for plate in modality_2_timepoint_df.Assay_Plate_Barcode.unique():
                    data_df = (
                        utils.load_data(
                            plate,
                            "MLP_short_new.csv",
                        )
                        .assign(Metadata_modality=modality_2_perturbation)
                        .assign(Metadata_matching_target=lambda x: x.Metadata_gene)
                        .assign(Metadata_target_list=lambda x: x.Metadata_gene)
                    )
                    modality_2_df = utils.concat_profiles(modality_2_df, data_df)

                # Remove empty wells
                modality_2_df = utils.remove_empty_wells(modality_2_df)

                # Description
                description = f"{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"

                # Calculate replicability mAP

                if not replicability_map_df.Description.str.contains(description).any():
                    print(f"Computing {description} replicability")

                    match_dict = {
                        "filter": {"Metadata_control_type": ["'negcon'"]},
                        "matching": ["Metadata_broad_sample"],
                    }

                    reference_dict = {
                        "filter": {
                            "Metadata_pert_type": ["'trt'"],
                            "Metadata_control_type": [
                                "'poscon_orf'",
                                "'poscon_diverse'",
                                "'poscon_cp'",
                            ],
                        },
                        "matching": ["Metadata_Plate"],
                    }

                    metric = utils.AveragePrecision(
                        modality_2_df,
                        match_dict,
                        reference_dict,
                        100,
                        random_baseline_ap,
                        anti_match=False,
                    )

                    #random_baseline_ap = metric.random_baseline_ap

                    (
                        replicability_map_df,
                        replicability_fp_df,
                    ) = utils.create_replicability_df(
                        replicability_map_df,
                        replicability_fp_df,
                        metric,
                        modality_2_perturbation,
                        cell,
                        modality_2_timepoint,
                    )

                # Remove negcon wells
                modality_2_df = utils.remove_negcon_and_empty_wells(modality_2_df)

                # Create consensus profiles
                modality_2_consensus_df = utils.consensus(
                    modality_2_df, "Metadata_broad_sample"
                )

                # Filter out non-replicable genes
                replicable_genes = list(
                    replicability_map_df[
                        (
                            replicability_map_df.Description
                            == f"{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"
                        )
                        & (replicability_map_df.mAP > 0)
                    ][replicate_feature]
                )
                modality_2_consensus_df = modality_2_consensus_df.query(
                    "Metadata_broad_sample==@replicable_genes"
                ).reset_index(drop=True)

                # Calculate cripsr-crispr matching
                if modality_2_perturbation == "crispr":
                    if not matching_map_df.Description.str.contains(description).any():
                        print(f"Computing {description} matching")

                        match_dict = {
                            "filter": {"Metadata_control_type": ["'negcon'"]},
                            "matching": ["Metadata_gene"],
                        }

                        reference_dict = {
                            "filter": {"Metadata_control_type": ["'negcon'"]},
                            "non_matching": ["Metadata_gene"],
                        }

                        metric = utils.AveragePrecision(
                            modality_2_consensus_df,
                            match_dict,
                            reference_dict,
                            100,
                            random_baseline_ap,
                            anti_match=False,
                        )

                        #random_baseline_ap = metric.random_baseline_ap

                        matching_map_df, matching_fp_df = utils.create_matching_df(
                            matching_map_df,
                            matching_fp_df,
                            metric,
                            modality_2_perturbation,
                            cell,
                            modality_2_timepoint,
                        )

                # Filter out genes that are not perturbed by ORFs or CRISPRs
                perturbed_genes = list(
                    set(modality_2_consensus_df.Metadata_matching_target)
                )
                modality_1_consensus_expanded_filtered_df = (
                    modality_1_consensus_expanded_df.query(
                        "Metadata_matching_target==@perturbed_genes"
                    ).reset_index(drop=True)
                )

                # Calculate gene-compound matching mAP
                description = f"{modality_1_perturbation}_{cell}_{utils.time_point(modality_1_perturbation, modality_1_timepoint)}-{modality_2_perturbation}_{cell}_{utils.time_point(modality_2_perturbation, modality_2_timepoint)}"
                print(f"Computing {description} matching")

                modality_1_modality_2_df = utils.concat_profiles(
                    modality_1_consensus_expanded_filtered_df, modality_2_consensus_df
                )

                match_dict = {
                    "filter": {"Metadata_control_type": ["'negcon'"]},
                    "matching": ["Metadata_matching_target"],
                    "non_matching": ["Metadata_modality"],
                }

                reference_dict = {
                    "filter": {"Metadata_control_type": ["'negcon'"]},
                    "non_matching": ["Metadata_target_list", "Metadata_modality"],
                }

                metric = utils.AveragePrecision(
                    modality_1_modality_2_df,
                    match_dict,
                    reference_dict,
                    100,
                    random_baseline_ap,
                    anti_match=True,
                )

                #random_baseline_ap = metric.random_baseline_ap

                (
                    gene_compound_matching_map_df,
                    gene_compound_matching_fp_df,
                ) = utils.create_gene_compound_matching_df(
                    gene_compound_matching_map_df,
                    gene_compound_matching_fp_df,
                    metric,
                    modality_1_perturbation,
                    modality_2_perturbation,
                    cell,
                    modality_1_timepoint,
                    modality_2_timepoint,
                )

Computing compound_A549_short replicability


100%|█████████████████████████████████████████| 260/260 [00:31<00:00,  8.15it/s]


260
_fp= 0.7576923076923077
Computing compound_A549_short matching


100%|█████████████████████████████████████████| 597/597 [01:26<00:00,  6.91it/s]


280
_fp= 0.2714285714285714
Computing crispr_A549_long replicability


100%|█████████████████████████████████████████| 247/247 [00:42<00:00,  5.80it/s]


247
_fp= 0.7408906882591093
Computing crispr_A549_long matching


100%|█████████████████████████████████████████| 115/115 [00:07<00:00, 14.86it/s]


68
_fp= 0.11764705882352941
Computing compound_A549_short-crispr_A549_long matching


100%|█████████████████████████████████████████| 115/115 [01:11<00:00,  1.60it/s]


110
_fp= 0.08181818181818182
Computing crispr_A549_short replicability


100%|█████████████████████████████████████████| 247/247 [00:34<00:00,  7.09it/s]


247
_fp= 0.7004048582995951
Computing crispr_A549_short matching


100%|█████████████████████████████████████████| 113/113 [00:07<00:00, 14.77it/s]


60
_fp= 0.15
Computing compound_A549_short-crispr_A549_short matching


100%|█████████████████████████████████████████| 113/113 [01:12<00:00,  1.56it/s]


109
_fp= 0.10091743119266056
Computing compound_A549_long replicability


100%|█████████████████████████████████████████| 260/260 [00:25<00:00, 10.19it/s]


260
_fp= 0.8884615384615384
Computing compound_A549_long matching


100%|█████████████████████████████████████████| 668/668 [01:46<00:00,  6.29it/s]


330
_fp= 0.2787878787878788
Computing compound_A549_long-crispr_A549_long matching


100%|█████████████████████████████████████████| 115/115 [01:14<00:00,  1.55it/s]


114
_fp= 0.14035087719298245
Computing compound_A549_long-crispr_A549_short matching


100%|█████████████████████████████████████████| 113/113 [01:12<00:00,  1.56it/s]


112
_fp= 0.09821428571428571
Computing compound_U2OS_short replicability


100%|█████████████████████████████████████████| 260/260 [00:44<00:00,  5.90it/s]


260
_fp= 0.9653846153846154
Computing compound_U2OS_short matching


100%|█████████████████████████████████████████| 699/699 [01:45<00:00,  6.62it/s]


368
_fp= 0.37228260869565216
Computing crispr_U2OS_long replicability


100%|█████████████████████████████████████████| 247/247 [00:34<00:00,  7.12it/s]


247
_fp= 0.582995951417004
Computing crispr_U2OS_long matching


100%|█████████████████████████████████████████| 101/101 [00:07<00:00, 14.29it/s]


43
_fp= 0.3023255813953488
Computing compound_U2OS_short-crispr_U2OS_long matching


100%|█████████████████████████████████████████| 101/101 [01:10<00:00,  1.42it/s]


101
_fp= 0.1188118811881188
Computing crispr_U2OS_short replicability


100%|█████████████████████████████████████████| 247/247 [00:33<00:00,  7.37it/s]


247
_fp= 0.6761133603238867
Computing crispr_U2OS_short matching


100%|█████████████████████████████████████████| 113/113 [00:07<00:00, 14.84it/s]


54
_fp= 0.16666666666666666
Computing compound_U2OS_short-crispr_U2OS_short matching


100%|█████████████████████████████████████████| 113/113 [01:13<00:00,  1.53it/s]


113
_fp= 0.07964601769911504
Computing compound_U2OS_long replicability


100%|█████████████████████████████████████████| 260/260 [00:24<00:00, 10.45it/s]


260
_fp= 0.9076923076923077
Computing compound_U2OS_long matching


100%|█████████████████████████████████████████| 682/682 [01:39<00:00,  6.84it/s]


331
_fp= 0.3685800604229607
Computing compound_U2OS_long-crispr_U2OS_long matching


100%|█████████████████████████████████████████| 101/101 [01:03<00:00,  1.60it/s]


100
_fp= 0.11
Computing compound_U2OS_long-crispr_U2OS_short matching


100%|█████████████████████████████████████████| 113/113 [01:04<00:00,  1.75it/s]

112
_fp= 0.05357142857142857





Tables of mAP values and the fraction positive mAP (fp) are printed and bar plots are plotted.

Replicability fraction positive.

In [63]:
gene_compound_matching_map_df['mAP'].mean()

-0.10123351841422171

In [64]:
gene_compound_matching_fp_df['fp'].mean()

0.098

In [65]:
replicability_fp_df[["Description", "timepoint", "fp"]]#.to_csv('11b.csv')
print(
    replicability_fp_df[["Description", "timepoint", "fp"]].to_markdown(index=False)
)

| Description         |   timepoint |    fp |
|:--------------------|------------:|------:|
| compound_A549_short |          24 | 0.758 |
| crispr_A549_long    |         144 | 0.741 |
| crispr_A549_short   |          96 | 0.7   |
| compound_A549_long  |          48 | 0.888 |
| compound_U2OS_short |          24 | 0.965 |
| crispr_U2OS_long    |         144 | 0.583 |
| crispr_U2OS_short   |          96 | 0.676 |
| compound_U2OS_long  |          48 | 0.908 |


Within perturbation matching fractive positive.

In [66]:
print(matching_fp_df[["Description", "timepoint", "fp"]].to_markdown(index=False))

| Description         |   timepoint |    fp |
|:--------------------|------------:|------:|
| compound_A549_short |          24 | 0.271 |
| crispr_A549_long    |         144 | 0.118 |
| crispr_A549_short   |          96 | 0.15  |
| compound_A549_long  |          48 | 0.279 |
| compound_U2OS_short |          24 | 0.372 |
| crispr_U2OS_long    |         144 | 0.302 |
| crispr_U2OS_short   |          96 | 0.167 |
| compound_U2OS_long  |          48 | 0.369 |


Gene-compound matching fraction positive.

In [67]:
gene_compound_matching_fp_df[["Description", "Cell", "fp"]]#.to_csv('22b.csv')
print(
    gene_compound_matching_fp_df[["Description", "Cell", "fp"]].to_markdown(
        index=False
    )
)

| Description                           | Cell   |    fp |
|:--------------------------------------|:-------|------:|
| compound_A549_short-crispr_A549_long  | A549   | 0.082 |
| compound_A549_short-crispr_A549_short | A549   | 0.101 |
| compound_A549_long-crispr_A549_long   | A549   | 0.14  |
| compound_A549_long-crispr_A549_short  | A549   | 0.098 |
| compound_U2OS_short-crispr_U2OS_long  | U2OS   | 0.119 |
| compound_U2OS_short-crispr_U2OS_short | U2OS   | 0.08  |
| compound_U2OS_long-crispr_U2OS_long   | U2OS   | 0.11  |
| compound_U2OS_long-crispr_U2OS_short  | U2OS   | 0.054 |


In [68]:
import plotly
import plotly.graph_objects as go

In [69]:
y_range=[0,1.0]
fig=go.Figure()
fig = px.bar(
    data_frame=replicability_fp_df,
    x="Modality",
    y="fp",
    facet_row="Cell",
    facet_col="time",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.write_image('11b.png')
fig.show()
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_replicability_fp.png", width=640, height=480, scale=2
#)

In [70]:
fig = px.bar(
    data_frame=matching_fp_df,
    x="Modality",
    y="fp",
    facet_row="Cell",
    facet_col="time",
)
fig.update_layout(
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_matching_fp.png", width=640, height=480, scale=2
#)

In [71]:
y_range=[           0,0.25          ]
fig = px.bar(
    gene_compound_matching_fp_df,
    x="Modality2",
    y="fp",
    facet_col="Modality1",
    facet_row="Cell",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Genetic perturbation"),
    xaxis2=dict(title="Genetic perturbation"),
    yaxis1=dict(title="Fraction Positive"),
    yaxis3=dict(title="Fraction Positive"),
)
#fig.write_image('22b.png')
fig.show()



#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_gene_compound_matching_fp.png",
#    width=640,
#    height=480,
#    scale=2,
#)

In [72]:
y_range=[    -0.5,0.8            ]


fig = px.box(
    data_frame=replicability_map_df,
    x="Modality",
    y="mAP",
    facet_row="Cell",
    facet_col="time",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)


#fig.write_image('33b.png')
fig.show()


In [73]:
fig = px.box(
    data_frame=matching_map_df,
    x="Modality",
    y="mAP",
    facet_row="Cell",
    facet_col="time",title='after norm & demean'
)
fig.update_layout(
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)
#fig.show("png")
#fig.write_image(
#    f"figures/1.CellProfiler_matching_map.png", width=640, height=480, scale=2
#)

In [74]:
y_range=[      -0.8,0.5            ]


fig = px.box(
    data_frame=gene_compound_matching_map_df,
    x="Modality2",
    y="mAP",
    facet_col="Modality1",
    facet_row="Cell",
    title='after norm & demean'
)
fig.update_layout(
    yaxis_range=y_range,
    xaxis1=dict(title="Perturbation"),
    xaxis2=dict(title="Perturbation"),
    yaxis1=dict(title="mAP"),
    yaxis3=dict(title="mAP"),
)


#fig.write_image('44b.png')
fig.show()

Tables of mAP and fp values are written to file.