# Analysis of differential expression across conditions

In [1]:
import pandas as pd
from diffexpr.py_deseq import py_DESeq2
from rpy2.rinterface_lib.callbacks import logger as rpy2_logger
import logging
rpy2_logger.setLevel(logging.ERROR) 

counts = pd.read_csv('Data/DokdoniaCounts.csv', index_col=0)
conditions = [name.split('.sam')[0] for name in counts.columns]
counts.columns = conditions
counts.head()

Unnamed: 0,D_10_R1,D_10_R2,D_10_R3,D_18_R1,D_18_R2,D_18_R3_T1,D_18_R3,D_18_R4,D_25_R1,D_25_R2,...,L_25_R1,L_25_R2,L_25_R3,L_34_R1,L_34_R2,L_34_R3,L_34_R4_T1,L_34_R4_T2,L_34_R4_T3,L_34_R4
MED134_07389,12973,11426,13251,30624,30618,25566,41014,22449,53840,32369,...,39444,41721,37020,39662,30983,27280,43105,44946,39311,57834
MED134_07384,1557,1555,1552,4160,4385,3383,5588,2940,7550,4509,...,5419,5536,5318,5396,4751,4285,5108,4924,4503,7818
MED134_07379,3400,3319,3273,7218,7903,6315,9539,5040,13268,7572,...,9089,9016,8529,10179,7617,7207,9484,8837,8743,13827
MED134_07374,1987,2124,1404,3509,3772,2846,4337,2602,4607,3316,...,3586,3594,3488,4158,3096,2623,3818,3792,3538,5323
MED134_07369,2087,2010,2111,3774,3809,2921,4486,2901,4668,3456,...,3705,4158,3760,5518,3563,2713,3679,3543,3356,5150


In [2]:
# Removing genes with low read counts across samples
min_count = 10
counts = counts[(counts > min_count).all(1)]
counts.reset_index(level=0, inplace=True)
counts.head()

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


Unnamed: 0,index,D_10_R1,D_10_R2,D_10_R3,D_18_R1,D_18_R2,D_18_R3_T1,D_18_R3,D_18_R4,D_25_R1,...,L_25_R1,L_25_R2,L_25_R3,L_34_R1,L_34_R2,L_34_R3,L_34_R4_T1,L_34_R4_T2,L_34_R4_T3,L_34_R4
0,MED134_07389,12973,11426,13251,30624,30618,25566,41014,22449,53840,...,39444,41721,37020,39662,30983,27280,43105,44946,39311,57834
1,MED134_07384,1557,1555,1552,4160,4385,3383,5588,2940,7550,...,5419,5536,5318,5396,4751,4285,5108,4924,4503,7818
2,MED134_07379,3400,3319,3273,7218,7903,6315,9539,5040,13268,...,9089,9016,8529,10179,7617,7207,9484,8837,8743,13827
3,MED134_07374,1987,2124,1404,3509,3772,2846,4337,2602,4607,...,3586,3594,3488,4158,3096,2623,3818,3792,3538,5323
4,MED134_07369,2087,2010,2111,3774,3809,2921,4486,2901,4668,...,3705,4158,3760,5518,3563,2713,3679,3543,3356,5150


In [3]:
meta = pd.DataFrame({
    'lighting': [s[0] for s in conditions],
    'temperature': [s[2:4] for s in conditions],
    'replicate': [s[5:] for s in conditions]
    }, index=conditions)
meta.head()

Unnamed: 0,lighting,temperature,replicate
D_10_R1,D,10,R1
D_10_R2,D,10,R2
D_10_R3,D,10,R3
D_18_R1,D,18,R1
D_18_R2,D,18,R2


In [7]:
dds = py_DESeq2(count_matrix = counts,
               design_matrix = meta,
               design_formula = '~ temperature',
               gene_column = 'index')
    
dds.run_deseq(test="LRT", reduced = '~ 1') 
dds.get_deseq_result()
res = dds.deseq_result 
res.head()

RRuntimeError: Error: $ operator is invalid for atomic vectors
