## Computational Analysis of virus-host protein interactions
### A case study of HPV16 E7 oncogene and human POU5F1 transcription factor

### Load packages

In [1]:
import pandas as pd

import ChIPNet

from importlib import reload

import warnings
warnings.filterwarnings("ignore")

### Reload packages (if changes were made)

In [18]:
reload(ChIPNet);

### Load data

In [3]:
oct4_data_filepath = r'data\oct4_data.txt'
e7_data_filepath = r"data\e7_data.txt"
hgnc_data_filepath = r"data\complexes.txt"

In [4]:
oct4_chip_dir = r"data\chip_data\oct4"
e7_chip_dir = r"data\chip_data\e7"
tcga_dir = r"data\tcga"

In [5]:
oct4_chip_df_list = ChIPNet.chip_data_reader(directory=oct4_chip_dir)
e7_chip_df_list = ChIPNet.chip_data_reader(directory=e7_chip_dir)
tcga_data_list = ChIPNet.deg_reader(directory=tcga_dir)

### Merge data

In [6]:
oct4_chip_df = ChIPNet.chip_df_constructor(input_df_list=oct4_chip_df_list, protein="POU5F1")

In [7]:
oct4_chip_df.head()

Unnamed: 0,TF,Target Gene,macs2,q-value
1,POU5F1,MTRNR2L8,3269.841667,0.0
2,POU5F1,REST,3205.466667,2.84e-321
3,POU5F1,STAT3,2967.2375,1.889078e-297
4,POU5F1,MTRNR2L2,2700.240278,9.461766e-271
5,POU5F1,OBI1,1922.151389,6.093419999999999e-193


In [8]:
e7_chip_df = ChIPNet.chip_df_constructor(input_df_list=e7_chip_df_list, protein="E7")

In [9]:
e7_chip_df.head()

Unnamed: 0,TF,Target Gene,macs2,q-value
1,E7,UBE2T,13732.237179,0.0
2,E7,PCNA,11435.904582,0.0
3,E7,CDKN2D,12014.038462,0.0
4,E7,MCM3,11472.170406,0.0
5,E7,TUBA1B,10221.5,0.0


In [10]:
tcga_df = ChIPNet.deg_df_constructor(deg_data_list=tcga_data_list)

In [11]:
tcga_df.head()

Unnamed: 0,DEGs,Expression
0,FAM188B,"[1.0, 4]"
1,C12ORF75,"[1.0, 3]"
2,IFITM2,"[1.0, 5]"
3,SLC30A1,"[1.0, 2]"
4,SDCCAG3,"[1.0, 2]"


### Compare data to extract target genes

In [13]:
oct4_gene_targets_df = ChIPNet.gene_targets(merged_chip_df=oct4_chip_df, merged_deg_df=tcga_df, protein="POU5F1")

In [15]:
oct4_gene_targets_df.head()

Unnamed: 0,TF,Genes,q-value,Expression
0,POU5F1,OLIG3,2.681225e-14,High
1,POU5F1,LTBR,7.2861819999999995e-37,High
2,POU5F1,ARHGEF38,1.5925139999999998e-264,High
3,POU5F1,RNASET2,1.9232150000000002e-81,High
4,POU5F1,PLA2G2A,9.296796999999999e-58,High


In [16]:
e7_gene_targets_df = ChIPNet.gene_targets(merged_chip_df=e7_chip_df, merged_deg_df=tcga_df, protein="E7")

In [17]:
e7_gene_targets_df.head()

Unnamed: 0,TF,Genes,q-value,Expression
0,E7,CALR,0.0,High
1,E7,TFAP4,0.0,High
2,E7,RPLP0,0.0,High
3,E7,RDH5,0.0,High
4,E7,SETD6,0.0,High


In [19]:
common_gene_targets_df = ChIPNet.common_gene_targets(gene_targets_a=e7_gene_targets_df, gene_targets_b=oct4_gene_targets_df)

In [20]:
common_gene_targets_df

Unnamed: 0,Source,Target,Regulation,Expression
0,POU5F1/E7,POU5F1,activation,High
1,POU5F1/E7,RGS5,activation,High
2,POU5F1/E7,NDUFS3,activation,High
3,POU5F1/E7,CKS1B,activation,High
4,POU5F1/E7,ZNF408,activation,High
5,POU5F1/E7,RAB4B,activation,High
6,POU5F1/E7,EIF3F,activation,High
7,POU5F1/E7,CHMP7,activation,High
8,POU5F1/E7,RNF220,activation,High
9,POU5F1/E7,IER5L,activation,High
