In [6]:
import pandas as pd
import requests

# Reading data:
disease = ''
filter_column = 'fdr'
threshold = 0.05
file = 'BRCA-1_ExactMatch_LogFC_ALL_gene.stats.annotated.txt'

# These maps needs to be finalized later:
CELL_MAP = {
    'SIDM00146': {'diseaseModel': 'NCIT_C9140', 'tissue': 'bone marrow'} # https://cellmodelpassports.sanger.ac.uk/passports/SIDM01076
}

In [10]:
# Reading and parsing data:
encore_df = (
    pd.read_csv(file, sep=' ')
    .assign(genes=lambda df: df.id.str.split('~'))
    .drop('id', axis=1)
)


encore_df.head()

Unnamed: 0,Note1,Note2,num,All.SIDM00146_CPID1310_p-value,All.SIDM00146_CPID1310_fdr,All.SIDM00146_CPID1310_goodsgrna,All.SIDM00146_CPID1310_lfc,All.SIDM00146_CPID1313_p-value,All.SIDM00146_CPID1313_fdr,All.SIDM00146_CPID1313_goodsgrna,All.SIDM00146_CPID1313_lfc,All.SIDM00146_CPID1316_p-value,All.SIDM00146_CPID1316_fdr,All.SIDM00146_CPID1316_goodsgrna,All.SIDM00146_CPID1316_lfc,genes
0,LibrarySingletons,LibrarySingletons,4,0.061033,0.895601,1,0.10993,0.13422,1.0,1,0.16987,0.98758,1.0,0,-0.002495,"[ABL1, ADAD1]"
1,LibraryCombinations,LibraryCombinations,4,0.94197,1.0,0,-0.014936,0.97306,1.0,0,-0.051022,0.98119,1.0,0,-0.066961,"[ABL1, AKT1]"
2,LibraryCombinations,LibraryCombinations,4,0.98975,1.0,0,-0.059942,0.96151,1.0,0,-0.11264,0.92061,1.0,1,-0.032323,"[ABL1, AR]"
3,LibraryCombinations,LibraryCombinations,4,0.95663,1.0,0,-0.19717,0.96759,1.0,0,-0.16062,0.94644,1.0,0,-0.26832,"[ABL1, ARID1A]"
4,LibraryCombinations,LibraryCombinations,4,0.9343,1.0,1,-0.039618,0.33419,1.0,0,0.09695,0.96665,1.0,0,-0.007884,"[ABL1, ARID1B]"


In [11]:
filter_columns = [
    'All.SIDM00146_CPID1310_fdr', 
    'All.SIDM00146_CPID1313_fdr', 
    'All.SIDM00146_CPID1316_fdr'
]

for filtc in filter_columns:
    hits = encore_df.loc[encore_df[filtc] < threshold]
    print(f'{filtc} -> {len(hits)}')



All.SIDM00146_CPID1310_fdr -> 834
All.SIDM00146_CPID1313_fdr -> 739
All.SIDM00146_CPID1316_fdr -> 782


In [12]:

{
    # constants for data source:
    "datatypeId": "ot_partner",
    "datasourceId": "encore",
    "projectId": "OTAR2062",

    # Disease:
    "diseaseFromSourceId": "NCIT_C9140",

    # Target:
    "targetFromSourceId": "ADAD1",  # interactor 1
    "geneticBackground": "AKT1",  # interactor 2

    # Information on study:
    "studyId": "BRCA-1 ExactMatch LogFC ALL_gene.stats",
    "studyOverview": "",
    
    # Library:
    "crisprScreenLibrary": "",
    
    # Information on cell:
    "biosamplesFromSource": [ # Might not be suitable here
        "UBERON_0002371"  # bone marrow
    ], 
    "cellModelId": "SIDM01076", # cell model passport identifier -> reference on Sanger
    
    # Experimental details:
    "replicates": [
        "CPID1310": {
            "logFoldChange": 0.37098,
            "pValue": 0.000528,
            "falseDiscoveryRate": 0.059907
        },
        "CPID1313": {
            "logFoldChange": 0.41678,
            "pValue": 0.000007,
            "falseDiscoveryRate": 0.004514 # this is significant
        },
        "CPID1316": {
            "logFoldChange": 0.013133,
            "pValue": 4.964300e-05,
            "falseDiscoveryRate": 0.40404
        }
    ]
    
    # Evidence summary:
    "confidence": "1" # Number of significant replicate -> can be inferred
    "effectDirection": "synergistic" # synergistic, antagonistic, mixed -> can be inferred
}

Unnamed: 0,Note1,Note2,num,All.SIDM00146_CPID1310_p-value,All.SIDM00146_CPID1310_fdr,All.SIDM00146_CPID1310_goodsgrna,All.SIDM00146_CPID1310_lfc,All.SIDM00146_CPID1313_p-value,All.SIDM00146_CPID1313_fdr,All.SIDM00146_CPID1313_goodsgrna,All.SIDM00146_CPID1313_lfc,All.SIDM00146_CPID1316_p-value,All.SIDM00146_CPID1316_fdr,All.SIDM00146_CPID1316_goodsgrna,All.SIDM00146_CPID1316_lfc,genes
131,AnchorSingletons,AnchorSingletons,8,0.000528,0.059907,4,0.37098,0.000007,0.004514,5,0.41678,4.964300e-05,0.013133,5,0.40404,"[ADAD1, AKT1]"
132,AnchorSingletons,AnchorSingletons,8,0.000732,0.067379,4,0.24102,0.007676,0.290819,3,0.30487,7.199500e-05,0.017483,5,0.48005,"[ADAD1, AR]"
134,AnchorSingletons,AnchorSingletons,8,0.000014,0.005582,5,0.26691,0.000762,0.070436,4,0.22985,2.781000e-05,0.008684,5,0.27052,"[ADAD1, ARID1B]"
161,AnchorSingletons,AnchorSingletons,8,0.000158,0.029275,4,0.33357,0.000285,0.043446,4,0.41423,2.599100e-07,0.000495,6,0.56185,"[ADAD1, EZH2]"
163,AnchorSingletons,AnchorSingletons,8,0.000355,0.049039,4,0.24416,0.005427,0.244956,3,0.32625,1.702400e-04,0.028952,4,0.38816,"[ADAD1, FGFR1]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18908,AnchorCombinations,AnchorCombinations,16,0.000038,0.003435,15,-1.07640,0.000009,0.001472,15,-1.04850,1.169600e-05,0.001569,16,-1.07680,"[YAP1, MYC]"
18909,AnchorCombinations,AnchorCombinations,16,0.000273,0.041545,4,0.17392,0.000448,0.054995,4,0.19882,2.409400e-04,0.036950,5,0.18986,"[YAP1, PARP1]"
18912,AnchorCombinations,AnchorCombinations,16,0.000109,0.007408,16,-1.18800,0.001826,0.054170,15,-1.21110,3.144900e-05,0.003346,15,-1.27970,"[YAP1, PRMT1]"
18913,AnchorCombinations,AnchorCombinations,16,0.001445,0.043506,15,-0.81465,0.000062,0.005614,16,-1.03990,7.615300e-05,0.006225,16,-1.03660,"[YAP1, PRMT5]"


In [13]:
256 * 45

11520