In [1]:
import pandas as pd
import numpy as np
import pickle

In [7]:
Conds = ['cold_iWAT']
reps = ['a','b']
SPs = [f'{Cond}{rep}' for Cond in Conds for rep in reps]

In [5]:
for SP in SPs:
    codon_count = pd.read_csv(f'/Data_2/Daehwa/Adipocyte/Other_papers/Xie_etal.2023.Diabetes/Analysis/Ribosome_stalling/v20230902/Xie_{SP}.codon-count.tsv', sep='\t', usecols=['transcript_id','reltostart-asite','count'])
    codon_count = codon_count.rename(columns={'count':f'{SP}_n'})

    if SP==SPs[0]:
        total_codon_count = codon_count.copy()
    else:
        total_codon_count = pd.merge(total_codon_count, codon_count, on=['transcript_id','reltostart-asite'], how='outer')

total_codon_count = total_codon_count.sort_values(['transcript_id','reltostart-asite']).reset_index(drop=True).replace(np.nan, 0)
display(total_codon_count)

Unnamed: 0,reltostart-asite,transcript_id,cold_iWATa_n,cold_iWATb_n
0,33,ENSMUST00000000001.5,0.0,0.0
1,90,ENSMUST00000000001.5,0.0,1.0
2,120,ENSMUST00000000001.5,1.0,0.0
3,123,ENSMUST00000000001.5,0.0,1.0
4,126,ENSMUST00000000001.5,11.0,8.0
...,...,...,...,...
209436,1344,ENSMUST00000239497.2,9.0,8.0
209437,1347,ENSMUST00000239497.2,20.0,16.0
209438,1353,ENSMUST00000239497.2,0.0,1.0
209439,1359,ENSMUST00000239497.2,1.0,0.0


In [8]:
slct_codon_table = pd.DataFrame(columns=['transcript_id','reltostart-asite','codon_num']+[SP+'_n' for SP in SPs]+[SP+'_N' for SP in SPs])
for T_ID, table in total_codon_count.groupby('transcript_id'):
    continue_flag = False

    table = table.set_index('reltostart-asite')
    table = table.reindex(range(table.index.min(), table.index.max()+1, 3), fill_value=0).reset_index()
    table['transcript_id'] = T_ID
    # print(T_ID)

    for day in Conds:
        avg_sum = table[[f'{day}{rep}_n' for rep in reps]].sum().sum()/2
        # print(f'{day} {avg_sum:.1f}, {avg_sum / len(table):.3f}')
        if avg_sum / len(table) < 0.5 : continue_flag = True
        if avg_sum < 100 : continue_flag = True
    if continue_flag : continue

    for SP in SPs:
        table[f'{SP}_N'] = sum(table[f'{SP}_n'])
    table['codon_num'] = len(table)
    
    slct_codon_table = pd.concat([slct_codon_table, table])

slct_codon_table.to_csv('Xie_codon-count.tsv', sep='\t', index=False)
display(slct_codon_table)

Unnamed: 0,transcript_id,reltostart-asite,codon_num,cold_iWATa_n,cold_iWATb_n,cold_iWATa_N,cold_iWATb_N
0,ENSMUST00000000058.7,33,142,0.0,0.0,237.0,132.0
1,ENSMUST00000000058.7,36,142,0.0,0.0,237.0,132.0
2,ENSMUST00000000058.7,39,142,0.0,0.0,237.0,132.0
3,ENSMUST00000000058.7,42,142,0.0,0.0,237.0,132.0
4,ENSMUST00000000058.7,45,142,0.0,0.0,237.0,132.0
...,...,...,...,...,...,...,...
450,ENSMUST00000239497.2,1383,455,0.0,0.0,259.0,201.0
451,ENSMUST00000239497.2,1386,455,0.0,0.0,259.0,201.0
452,ENSMUST00000239497.2,1389,455,0.0,0.0,259.0,201.0
453,ENSMUST00000239497.2,1392,455,0.0,0.0,259.0,201.0


# Stalling score

In [9]:
# slct_codon_table = pd.read_csv('/Data_2/Daehwa/Adipocyte/Analysis/Ribosome_stalling/v20230730/adi_codon-count.tsv', sep='\t')

stall_score = slct_codon_table[['transcript_id','reltostart-asite']].copy()
for SP in [f'{day}{rep}' for day in Conds for rep in reps]:
    stall_score[SP] = slct_codon_table[SP+'_n'] / (slct_codon_table[SP+'_N'].replace(0, np.nan) / slct_codon_table['codon_num'])
stall_score = stall_score.replace(np.nan, 0)

codon_table = pd.DataFrame(columns=['transcript_id','asite','reltostart-asite','codon-asite','aa-asite'])
for SP in [day+rep for day in Conds for rep in reps]:
    codon_data = pd.read_csv(f'/Data_2/Daehwa/Adipocyte/Other_papers/Xie_etal.2023.Diabetes/Alignment/rpf/RPF/Xie_{SP}.rep.codons.data.txt', sep='\t')
    codon_data = codon_data[['transcript_id','asite','reltostart-asite','codon-asite','aa-asite']]
    codon_table = pd.merge(codon_table, codon_data, on=['transcript_id','asite','reltostart-asite','codon-asite','aa-asite'], how='outer')

stall_score = pd.merge(codon_table, stall_score, on=['transcript_id','reltostart-asite'])

with open("Xie_stall-score.df.pickle","wb") as fw:
    pickle.dump(stall_score, fw)
stall_score.to_csv('Xie_stall-score.tsv', sep='\t', index=False)
display(stall_score)

Unnamed: 0,transcript_id,asite,reltostart-asite,codon-asite,aa-asite,cold_iWATa,cold_iWATb
0,ENSMUST00000000058.7,254,78,UAC,Y,0.599156,0.000000
1,ENSMUST00000000058.7,257,81,GCA,A,0.599156,0.000000
2,ENSMUST00000000058.7,260,84,GAU,D,1.797468,0.000000
3,ENSMUST00000000058.7,269,93,AAG,K,13.181435,20.439394
4,ENSMUST00000000058.7,272,96,UAU,Y,1.797468,4.303030
...,...,...,...,...,...,...,...
111928,ENSMUST00000239497.2,1513,1239,CUG,L,0.000000,2.263682
111929,ENSMUST00000239497.2,1549,1275,GAG,E,0.000000,2.263682
111930,ENSMUST00000239497.2,1588,1314,AUC,I,0.000000,2.263682
111931,ENSMUST00000239497.2,1609,1335,GCU,A,0.000000,2.263682
