## Prepare count matrix for `DESeq2`

In [None]:
import pandas as pd

In [None]:
samples = ['ig{:0>2d}'.format(i) for i in range(1,13)]
samples

In [None]:
conditions = [y for x in zip(['co']*6, ['aci']*6) for y in x]
conditions

In [None]:
exp_df = pd.DataFrame({'sample': samples, 'condition': conditions})
exp_df

In [None]:
def deseq_df(exp_df, cond, cond_names, res_dir='../results'):
    assert len(cond) == len(cond_names), 'cond and cond_names must be same length'
    df = pd.DataFrame()
    for cnd,name in zip(cond, cond_names):
        edf = exp_df[exp_df.condition==cnd]
        for i,row in enumerate(edf.iterrows()):
            i += 1
            _, sample = row
            filename = '{res_dir}/{sample}/{sample}.htseq.counts'.format(
                        res_dir=res_dir, sample=sample['sample'])
            ct = pd.read_csv(filename, header=None, names=['gene', 'counts'],
                            sep='\t', skipfooter=5, engine='python')
            if df.empty:
                df = ct.copy()
                df.rename(columns={'counts': '%s%i' % (name,i)}, inplace=True)
            else:
                df['%s%i' % (name,i)] = ct['counts']
    df.set_index('gene', inplace=True)
    df.index.names = [None]
    print(df)
    return df

In [None]:
df1 = deseq_df(exp_df[:6], ('co', 'aci'), ('control', 'Aci'))
df2 = deseq_df(exp_df[6:], ('co', 'aci'), ('control', 'Aci'), res_dir='../data/2018-07-25/results')
df = df1.merge(df2, left_index=True, right_index=True)
#df.to_csv('../results/CvsAci.combined.csv', index_label=False)
df

In [None]:
df = deseq_df(exp_df, ('K', 'NAC'), ('kontrol', 'NAC'))
df.to_csv('../results/KvsNAC.csv', index_label=False)

In [None]:
df.columns

In [None]:
new_columns = {}
xc = yc = 1
for c in df.columns:
    if 'control' in c:
        new_columns[c] = f'control{xc}'
        xc += 1
    elif 'Aci' in c:
        new_columns[c] = f'Aci{yc}'
        yc += 1
        
new_columns

In [None]:
df.rename(columns=new_columns, inplace=True)
df

In [None]:
df.to_csv('../results/CvsAci.combined.csv', index_label=False)