# Ectopy: Ectopic expression analysis in Python

The code below presents a typical pipeline of `ectopy` package to calculate activation frequencies of ectopically expressed genes and to find potential prognostic biomarkers.

## Import data

In [15]:
import pandas as pd
data_dir = 'data/'
data = pd.read_csv(data_dir + 'data.csv', sep=';', index_col='id_sample')
expgroup = pd.read_csv(data_dir + 'expgroup.csv', sep=';', index_col='id_sample')

Display data.

In [12]:
print('Data', data.shape)
data.head(3)

Data (1144, 3)


Unnamed: 0_level_0,DNMT3B,EXO1,MCM10
id_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TCGA-3C-AAAU-01A,2.293107,1.851411,1.472214
TCGA-3C-AALI-01A,2.213523,2.89314,2.099413
TCGA-3C-AALJ-01A,3.160032,1.735396,1.655712


Display a list of available genes.

In [16]:
genes = list(data.columns)
print('Available genes', genes)

Available genes ['DNMT3B', 'EXO1', 'MCM10']


Display expgroup with tissue status (column `group`) and survival data (columns `time` and `event`).

In [14]:
print('Expgroup', expgroup.shape)
expgroup.head(3)

Expgroup (1144, 3)


Unnamed: 0_level_0,group,time,event
id_sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TCGA-3C-AAAU-01A,tumoral,134.9,0.0
TCGA-3C-AALI-01A,tumoral,133.5,0.0
TCGA-3C-AALJ-01A,tumoral,49.13,0.0


## Create normal and tumoral datasets

In [5]:
expgroup_normal = expgroup[expgroup['group']=='normal']
expgroup_tumoral = expgroup[expgroup['group']=='tumoral']
normal = data.loc[expgroup_normal.index, :]
tumoral = data.loc[expgroup_tumoral.index, :]

In [20]:
print('Tumoral:', 'data', tumoral.shape, 'expgroup', expgroup_tumoral.shape)
print('Normal:', 'data', normal.shape, 'expgroup', expgroup_normal.shape)

Tumoral: data (1047, 3) expgroup (1047, 3)
Normal: data (97, 3) expgroup (97, 3)


## Calculate m2sd threshold

In [18]:
from analysis import threshold
m2sd_threshold = threshold.StdDecorator(threshold.MeanTreshold(normal), nb_std=2).calculate_threshold()
print('Threshold m2sd')
print(m2sd_threshold)

Threshold m2sd
DNMT3B    1.396983
EXO1      1.089280
MCM10     0.898020
dtype: float64


## Calculate frequencies of expression above a threshold

In [9]:
from analysis import expression_analysis
frequency = expression_analysis.ExpressionFrequency().calculate_expression_frequency(tumoral, m2sd_threshold)
print('Activation frequency obtained with m2sd threshold in percentage')
print(frequency.head())

Activation frequency obtained with m2sd threshold in percentage
DNMT3B    45.367717
EXO1      81.088825
MCM10     73.925501
dtype: float64


## Calculate adaptive threshold