In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['font.family'] = 'Arial'

In [3]:
import glob
import os
import random
import sys

src_dir = './../src/'
sys.path[0] = src_dir

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from copy import deepcopy
from scipy.stats import spearmanr

from access_biology_data import meta
from aging_tools import inout

In [4]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [5]:
sys.path.append('./../src/')
from aging_tools import inout, export
from access_aging_data import chaperome, earlier_studies, companions, sequencing
from access_biology_data import annotation, relations

In [6]:
# Get aging expression data, filter, and map to ncbi
df_counts, df_meta, df_genes = sequencing.load_cached_aging_map(
    dataset_name='aging_map_tmm_180105',
    unambiguous_to_entrez=True,
    as_entrez=True
)

In [7]:
p = inout.get_internal_path(
    'dynamic/tstoeger/200129_inner_bootstrap/DE/Flu/*.csv')
d = glob.glob(p)

files_to_process = pd.DataFrame(columns=['path'], data = d)
files_to_process['base_name'] = files_to_process['path'].str.extract('.*/(.*).csv', expand=False)

In [8]:
tags = ['tissue', 'pfu', 'age', 'animals']
files_to_process[tags] = \
    files_to_process['base_name'].str.extract(
    '^(.*)_pfu_([0-9]+)_age_([0-9]+)_first_([0-9\-]+)_DE', expand=False)

files_to_process = files_to_process.set_index('base_name', verify_integrity=True)

agg = []
for j, v in files_to_process.iterrows():
    
    df = pd.read_csv(v['path'], usecols=['Symbol', 'log2FoldChange', 'pvalue', 'padj'])

    for tag in tags:
        df.loc[:, tag] = v[tag]

    agg.append(df)

In [9]:
df = pd.concat(agg, axis=0)
df = df.rename(columns={'Symbol': 'gene_ensembl'})


In [10]:
# Add ncbi gene IDs
df = pd.merge(
    df,
    df_genes[['gene_ensembl', 'gene_ncbi']], how='left').set_index('gene_ncbi').reset_index()

# Finally: export

In [11]:
df = df.rename(columns={'log2FoldChange': 'o_over_y'})

In [12]:
p = inout.get_internal_path('datasets/tstoeger/200129_pool_inner_bootstraps/age_groups.csv.gz')
inout.ensure_presence_of_directory(p)
df.to_csv(p, compression='gzip', index=False)