#### Notebook to compare the significant QTS singals by day

In [1]:
!date

Wed Aug 18 17:04:23 EDT 2021


In [2]:
import pandas as pd

In [3]:
# parameters
cohort = 'foundin'
days = ['da0', 'da25', 'da65']

In [4]:
# naming

# directories
home_dir = '/home/gibbsr'
wrk_dir = f'{home_dir}/{cohort}/caqtl'
qts_dir = f'{wrk_dir}/qts'

# input files

# out files
results_file = f'{qts_dir}/{cohort}.qts.csv'

# constants

#### load the results

In [5]:
qts_df = None
for day in days:
    cohort_set = f'{cohort}.{day}'
    qts_file = f'{qts_dir}/{cohort_set}.qts.csv'
    this_df = pd.read_csv(qts_file)
    feature_cnt = len(this_df['Geneid'].unique())
    print(f'day {day} shape is {this_df.shape} for {feature_cnt} features')
    this_df['day'] = day
    qts_df = pd.concat([qts_df, this_df])
    print(f'after day {day} total shape {qts_df.shape}')

day da0 shape is (201560, 7) for 201560 features
after day da0 total shape (201560, 8)
day da25 shape is (201531, 7) for 201531 features
after day da25 total shape (403091, 8)
day da65 shape is (201264, 7) for 201264 features
after day da65 total shape (604355, 8)


In [6]:
print(qts_df.shape)
display(qts_df.sample(5))

(604355, 8)


Unnamed: 0,Geneid,coef,stderr,r2_adj,term_cnt,p-value,bh_fdr,day
198591,chr18_3702793_3705219,-0.009413,0.050246,-0.010837,2.0,0.851816,0.970229,da65
78044,chr4_8624160_8628514,-0.03388,0.065396,-0.008196,2.0,0.605695,0.904797,da65
107286,chr1_81138763_81139383,0.029437,0.057886,-0.008306,2.0,0.612343,0.907001,da65
85740,chr17_76584542_76587351,-0.077863,0.064779,0.00466,2.0,0.232392,0.995827,da0
127290,chr6_131255920_131256370,0.029866,0.071883,-0.008976,2.0,0.678758,0.999971,da25


In [7]:
qts_df['Geneid'].value_counts().value_counts()

3    201097
2       472
1       120
Name: Geneid, dtype: int64

#### subset to significant based on FDR

In [8]:
sig_df = qts_df.loc[qts_df['bh_fdr'] < 0.05]
print(sig_df.shape)
if sig_df.shape[0] > 5:
    display(sig_df.sample(5))

(0, 8)


In [9]:
sig_df['day'].value_counts()

Series([], Name: day, dtype: int64)

In [10]:
sig_df['Geneid'].value_counts().value_counts()

Series([], Name: Geneid, dtype: int64)

In [11]:
sig_df['Geneid'].value_counts().head()

Series([], Name: Geneid, dtype: int64)

In [12]:
sig_df['day'].value_counts()

Series([], Name: day, dtype: int64)

#### save the combined significant QTS results

In [13]:
sig_df.to_csv(results_file, index=False)