__Aim:__
- [x] 
- [ ] 

___

### Load ...

In [1]:
import numpy as np 
import pandas as pd
import anndata as ad
import screenpro as scp

from screenpro.load import loadScreenProcessingData, read_screen_pkl

In [2]:
from scripts.util import *

In [3]:
import matplotlib.pyplot as plt

from matplotlib import font_manager as fm
from matplotlib import rcParams

font_files = fm.findSystemFonts(fontpaths=None, fontext='ttf')

for font_file in font_files:
    fm.fontManager.addfont(font_file)

# {f.name for f in matplotlib.font_manager.fontManager.ttflist}

rcParams['font.family'] = ['Arial']

___

In [4]:
import igraph as ig

In [5]:
from tdc.utils.knowledge_graph import KnowledgeGraph, build_KG

In [6]:
wd = '/data_gilbert/home/aarab/AML/Decitabine-treatment/DAC'

### 

In [7]:
comps = load_data(comparisons=True, wd=wd)

### 

- **$\Delta$RNA methylation**  (hl60 cell line)

In [8]:
cell_line_names = {
    'hl60_72h':'HL-60',
    'molm13':'MOLM-13',
    'skm1':'SKM-1'
}

In [15]:
def build_mtyl_kg(cell_line):
    
    mtyl_dict = set_Top_Mtyl(1,0.05,data=comps)

    mtyl_up_kg = build_KG(
        indices = mtyl_dict['up'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'RNA_methylation',# a list or string to assign values
        display_relation= 'hypermethylated',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = mtyl_dict['up'].gene_name.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = mtyl_dict['up'].gene_name.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    mtyl_dn_kg = build_KG(
        indices = mtyl_dict['down'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'RNA_methylation',# a list or string to assign values
        display_relation= 'hypomethylated',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = mtyl_dict['down'].gene_name.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = mtyl_dict['down'].gene_name.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    return pd.concat([mtyl_up_kg.df,mtyl_dn_kg.df])

In [17]:
mtyl_kg = KnowledgeGraph(
    df=build_mtyl_kg('hl60_72h')
)

Subset Top Mtyl data frame:
up:  1704
down: 1210
(fc_thr=1, pv_thr=0.05)


### 

- **$\Delta$RNA expression** (For all 6 AML cell lines)

In [18]:
cell_line_names = {
    'hl60_72h':'HL-60',
    'molm13':'MOLM-13',
    'skm1':'SKM-1'
}

In [19]:
def build_exp_kg(cell_line):
    exp_dict = set_Top_Exp(0.1,0.05,data=comps,comp=cell_line)

    exp_up_kg = build_KG(
        indices = exp_dict['up'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'RNA_expression',# a list or string to assign values
        display_relation= 'up_regulated',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = exp_dict['up'].gene_name.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = exp_dict['up'].gene_name.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    exp_dn_kg = build_KG(
        indices = exp_dict['down'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'RNA_expression',# a list or string to assign values
        display_relation= 'down_regulated',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = exp_dict['down'].gene_name.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = exp_dict['down'].gene_name.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    return pd.concat([exp_up_kg.df,exp_dn_kg.df])

In [20]:
exp_kg = KnowledgeGraph(
    df=pd.concat([
        build_exp_kg('molm13'),
        build_exp_kg('skm1'),
        build_exp_kg('hl60_72h')
    ]).reset_index(drop=True)
)

Subset Top Exp data frame:
up:  2841
down: 4410
(molm13)
(fc_thr=0.1, pv_thr=0.05
Subset Top Exp data frame:
up:  5060
down: 5036
(skm1)
(fc_thr=0.1, pv_thr=0.05
Subset Top Exp data frame:
up:  3512
down: 2732
(hl60_72h)
(fc_thr=0.1, pv_thr=0.05


### 

- **$\Delta$RNA stability** (For all 6 AML cell lines)

In [21]:
def build_stbl_kg(cell_line):
    experiment = cell_line
    
    stbl_dict = set_Top_Stbl(0.1,0.05,data=comps,comp=experiment)

    stbl_up_kg = build_KG(
        indices = stbl_dict['up'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'RNA_stability',# a list or string to assign values
        display_relation= 'up_regulated',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = stbl_dict['up'].gene_name.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = stbl_dict['up'].gene_name.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    stbl_dn_kg = build_KG(
        indices = stbl_dict['down'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'RNA_stability',# a list or string to assign values
        display_relation= 'down_regulated',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = stbl_dict['down'].gene_name.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = stbl_dict['down'].gene_name.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    return pd.concat([stbl_up_kg.df,stbl_dn_kg.df])

In [22]:
stbl_kg = KnowledgeGraph(
    df=pd.concat([
        build_stbl_kg('molm13'),
        build_stbl_kg('skm1'),
        build_stbl_kg('hl60_72h')
    ]).reset_index(drop=True)
)

Subset Top Stbl data frame:
up:  1708
down: 1978
(molm13)
(fc_thr=0.1, pv_thr=0.05
Subset Top Stbl data frame:
up:  1480
down: 1869
(skm1)
(fc_thr=0.1, pv_thr=0.05
Subset Top Stbl data frame:
up:  566
down: 494
(hl60_72h)
(fc_thr=0.1, pv_thr=0.05


### 

- **$\Delta$Phenotype, CRISPRi-screen $\rho$(rho) score** (hl60, skm1, and molm13 cell lines) 

In [23]:
def read_rho_phenotype_table(screen_path,threshold=5):
    df = pd.read_csv(
            f'{screen_path}/genetable_collapsed.txt',
            sep='\t',
            header=list(range(3)), 
            index_col=list(range(1))
        )['rho']['ave_Rep1_Rep2'].reset_index()[
        ['gene','average phenotype of strongest 3','Mann-Whitney p-value']
    ]
    df.columns = ['target','rho score','pvalue']
    df = scp.utils.ann_score_df(df,threshold=threshold,ctrl_label='pseudo')
    return df.set_index('target')

In [24]:
def get_top_hits(df):
    return {
        "up":df.query('label == "resistance_hit"'),
        "down":df.query('label == "sensitivity_hit"')        
    }

def build_rho_kg(df, cell_line):

    rho_dict = get_top_hits(df)

    rho_up_kg = build_KG(
        indices = rho_dict['up'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'CRISPRi',# a list or string to assign values
        display_relation= 'resistance_hit',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = rho_dict['up'].index.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = rho_dict['up'].index.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    rho_dn_kg = build_KG(
        indices = rho_dict['down'].reset_index(drop=True).index,# a list to assign row names of output data frame
        relation = 'CRISPRi',# a list or string to assign values
        display_relation= 'sensitivity_hit',# a list or string to assign values

        x_id = cell_line_names[cell_line],# a list or string to assign values
        x_type = 'cell_line',# a list or string to assign values
        x_name = cell_line_names[cell_line],# a list or string to assign values
        x_source = 'CCLE',# a list or string to assign values

        y_id = rho_dict['down'].index.to_list(),# a list or string to assign values
        y_type = 'gene',# a list or string to assign values
        y_name = rho_dict['down'].index.to_list(),# a list or string to assign values
        y_source = 'NCBI'# a list or string to assign values
    )
    
    return pd.concat([rho_up_kg.df,rho_dn_kg.df])

___

In [25]:
rho_kg = KnowledgeGraph(
    df=pd.concat([
        build_rho_kg(
            read_rho_phenotype_table(f'{wd}/CRISPRi-screen/molm13_exp/'), 
            'molm13'
        ),
        build_rho_kg(
            read_rho_phenotype_table(f'{wd}/CRISPRi-screen/skm1_exp/'), 
            'skm1'
        ),
        build_rho_kg(
            read_rho_phenotype_table(f'{wd}/CRISPRi-screen/hl60_exp1/'), 
            'hl60_72h'
        ),    ]).reset_index(drop=True)
)

# 

In [26]:
DAC_kg = KnowledgeGraph(
    pd.concat([
        exp_kg.df,
        stbl_kg.df,
        rho_kg.df
    ]).reset_index(drop=True)
)

In [41]:
# DAC_kg.get_nodes_by_source('NCBI')

In [45]:
DAC_kg.df.groupby(['relation','display_relation','x_name']).size()

relation        display_relation  x_name 
CRISPRi         resistance_hit    HL-60       251
                                  MOLM-13     117
                                  SKM-1       136
                sensitivity_hit   HL-60       306
                                  MOLM-13     128
                                  SKM-1       683
RNA_expression  down_regulated    HL-60      2732
                                  MOLM-13    4410
                                  SKM-1      5036
                up_regulated      HL-60      3512
                                  MOLM-13    2841
                                  SKM-1      5060
RNA_stability   down_regulated    HL-60       494
                                  MOLM-13    1978
                                  SKM-1      1869
                up_regulated      HL-60       566
                                  MOLM-13    1708
                                  SKM-1      1480
dtype: int64

# 

In [43]:
DAC_hl60_kg = DAC_kg.copy()

DAC_hl60_kg.run_query("x_name == 'HL-60'")

DAC_hl60_kg = KnowledgeGraph(df=pd.concat([DAC_hl60_kg.df, mtyl_kg.df]))

In [46]:
DAC_hl60_kg.df.groupby(['relation','display_relation','x_name']).size()

relation         display_relation  x_name
CRISPRi          resistance_hit    HL-60      251
                 sensitivity_hit   HL-60      306
RNA_expression   down_regulated    HL-60     2732
                 up_regulated      HL-60     3512
RNA_methylation  hypermethylated   HL-60     1704
                 hypomethylated    HL-60     1210
RNA_stability    down_regulated    HL-60      494
                 up_regulated      HL-60      566
dtype: int64

In [47]:
DAC_hl60_kg

<tdc.utils.knowledge_graph.KnowledgeGraph at 0x7f896bd07a00>

# 

In [29]:
%reload_ext watermark

In [45]:
%watermark

Last updated: 2023-12-10T07:37:04.264006-08:00

Python implementation: CPython
Python version       : 3.9.16
IPython version      : 8.14.0

Compiler    : GCC 11.3.0
OS          : Linux
Release     : 3.10.0-957.27.2.el7.x86_64
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit



In [46]:
%watermark --iversions

numpy     : 1.24.4
seaborn   : 0.12.2
anndata   : 0.9.1
screenpro : 0.2.5
matplotlib: 3.7.2
igraph    : 0.10.4
pandas    : 1.5.3



In [48]:
!date

Sun Dec 10 07:37:13 PST 2023
