In [1]:
%matplotlib inline 
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd

In [3]:
import sys
sys.path.append('./../src/')
from aging_tools import inout
from access_aging_data import sequencing

In [4]:
set_name = '181028_inclusive_tmm'  # TMM normalized per condition

In [5]:
df_counts, df_meta, df_genes = sequencing.load_cached_aging_map(
    set_name, 
    unambiguous_to_entrez=False, 
    as_entrez=False)

In [6]:
df_meta['tissue'].unique()

array(['AM', 'AT2', 'Adrenal', 'BAT', 'Blood', 'Brain', 'Cerebellum',
       'Esophagus', 'GutEP', 'Heart', 'Kidney', 'LI', 'Liver', 'Lung',
       'MoDC', 'MuscSat', 'SI', 'Skin', 'Stomach', 'WAT'], dtype=object)

In [7]:
allowed = pd.DataFrame(
    [
    ('Adrenal', 0) ,
    ('BAT',  0) ,
    ('Blood',  0) ,
    ('Brain',  0),
    ('Cerebellum', 0) ,
    ('Esophagus',  0) ,
    ('GutEP',  0) ,
    ('Heart',  0) ,
    ('Kidney',  0) ,
    ('LI',  0) ,
    ('Liver',  0) ,
    ('Lung', 0) ,
    ('Lung', 150) ,
    ('MuscSat',  0) ,
    ('SI',  0) ,
    ('Skin',  0) ,
    ('Stomach',  0) ,
    ('WAT', 0)
        
    ],
    columns=['tissue', 'pfu']

)



In [8]:
df_meta = pd.merge(df_meta.reset_index(), allowed)

In [9]:
df_meta = df_meta.set_index('index')

In [10]:
renamer = {
    'Adrenal': 'Adrenal Gland', 
    'BAT': 'Brown adipose tissue', 
    'Blood': 'Blood', 
    'Brain': 'Frontal cortex', 
    'Esophagus': 'Esophagus', 
    'GutEP': 'Gut epithelium', 
    'Heart': 'Heart',
    'Kidney': 'Kidney', 
    'LI': 'Large intestine', 
    'Liver': 'Liver', 
    'Lung': 'Lung', 
    'MuscSat': 'Muscle', 
    'SI': 'Small intestine', 
    'Skin': 'Skin',
    'Stomach': 'Stomach', 
    'WAT': 'White adipose tissue', 
    'Cerebellum': 'Cerebellum'}

In [11]:
df_meta['tissue'] = df_meta['tissue'].replace(renamer)

In [12]:
df_counts = df_counts.reindex(columns=df_meta.index)

In [13]:
df_counts.columns.name = ''

In [14]:
df_meta.index.name = ''

In [15]:
df_counts.head()

Unnamed: 0_level_0,M01_Adrenal_24M_F0_1,M02_Adrenal_24M_F0_2,M03_Adrenal_24M_F0_3,M04_Adrenal_18M_F0_1,M05_Adrenal_18M_F0_2,M06_Adrenal_18M_F0_3,M07_Adrenal_12M_F0_1,M08_Adrenal_12M_F0_2,M09_Adrenal_12M_F0_3,M10_Adrenal_09M_F0_1,...,M21_WAT_18M_F0_6,M22_WAT_12M_F0_4,M23_WAT_12M_F0_5,M24_WAT_12M_F0_6,M25_WAT_09M_F0_4,M26_WAT_09M_F0_5,M27_WAT_09M_F0_6,M28_WAT_04M_F0_4,M29_WAT_04M_F0_5,M30_WAT_04M_F0_6
gene_ensembl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENSMUSG00000000001,75.819859,6.553441,2.73376,19.688101,19.080494,9.460485,10.795543,14.272635,1.219717,22.480682,...,103.667386,79.019196,94.968025,93.155658,98.049049,102.217492,83.73135,104.392333,130.490416,39.147125
ENSMUSG00000000003,0.12553,0.145632,0.130179,0.131254,0.12553,0.139125,0.12553,0.137237,0.135524,0.145037,...,0.181237,0.181237,0.181237,0.181237,0.181237,0.181237,0.181237,0.181237,0.181237,0.181237
ENSMUSG00000000028,78.958099,35.825478,33.716374,23.231959,37.156752,37.28544,24.101677,59.011857,86.193353,34.663762,...,63.432841,52.921113,92.249475,53.283587,75.756936,30.266527,29.360344,95.874208,108.560777,61.258001
ENSMUSG00000000031,67.409378,64.951882,78.497967,89.77774,37.156752,89.735481,9.163658,36.093298,0.135524,34.663762,...,0.181237,67.601285,0.181237,77.569303,0.181237,0.181237,73.763332,76.844356,90.074634,0.181237
ENSMUSG00000000037,0.12553,9.029185,62.876481,8.137748,8.78707,9.043111,65.903023,0.137237,8.266972,0.145037,...,0.181237,0.181237,0.181237,11.055438,0.181237,0.181237,0.181237,76.844356,122.878475,0.181237


In [16]:
df_meta.head()

Unnamed: 0,age,age_unit,mouse_id,pfu,replicate_id,tissue,is_specimen,run_name,harvest_date,experimental_batch,clotted,died_during_intubation,tumor,I7_Index_ID,I5_Index_ID,run_id,orig_sample_name
,,,,,,,,,,,,,,,,,
M01_Adrenal_24M_F0_1,24.0,M,1.0,0.0,1.0,Adrenal Gland,True,170329_NB501488_0082_AH2VV5BGX2,2015-01-19,1.0,False,False,False,D705,D503,82.0,M01_Adrenal_24M_F0_1
M02_Adrenal_24M_F0_2,24.0,M,2.0,0.0,2.0,Adrenal Gland,True,170329_NB501488_0082_AH2VV5BGX2,2015-01-19,1.0,False,False,False,D705,D504,82.0,M02_Adrenal_24M_F0_2
M03_Adrenal_24M_F0_3,24.0,M,3.0,0.0,3.0,Adrenal Gland,True,170329_NB501488_0082_AH2VV5BGX2,2015-01-19,1.0,False,False,False,D705,D505,82.0,M03_Adrenal_24M_F0_3
M04_Adrenal_18M_F0_1,18.0,M,4.0,0.0,1.0,Adrenal Gland,True,170329_NB501488_0082_AH2VV5BGX2,2015-01-19,1.0,False,False,False,D705,D506,82.0,M04_Adrenal_18M_F0_1
M05_Adrenal_18M_F0_2,18.0,M,5.0,0.0,2.0,Adrenal Gland,True,170329_NB501488_0082_AH2VV5BGX2,2015-01-19,1.0,False,False,False,D705,D507,82.0,M05_Adrenal_18M_F0_2


In [17]:
df_genes.head()

Unnamed: 0,gene_ensembl,Chr,Start,End,Length,Strand,gene_ncbi,symbol_ncbi,type_of_gene,nomenclature_status,genes_with_same_ensembl,genbank_validated_rna_length,genbank_validated_rna_cg,has_validated_protein_coding_RNA,has_publication,present_in_genbank_info
0,ENSMUSG00000000001,chr3,108107280,108146146,3262,-,14679.0,Gnai3,protein-coding,O,1.0,3294.0,0.414693,True,True,True
1,ENSMUSG00000000003,chrX,77837901,77853623,1599,-,54192.0,Pbsn,protein-coding,O,1.0,,,False,True,True
2,ENSMUSG00000000028,chr16,18780447,18811987,4722,-,12544.0,Cdc45,protein-coding,O,1.0,1972.5,0.495172,True,True,True
3,ENSMUSG00000000031,chr7,142575529,142578143,6343,-,14955.0,H19,ncRNA,O,1.0,,,False,True,True
4,ENSMUSG00000000037,chrX,161117193,161258213,23080,+,107815.0,Scml2,protein-coding,O,1.0,4791.0,0.415127,True,True,True


In [18]:
export_folder = inout.get_internal_path('datasets/general/webpage/v200311')
inout.ensure_presence_of_directory(export_folder)

In [19]:
df_counts.to_csv(
    os.path.join(
        export_folder,
        'df_counts.csv'
    )
)

In [20]:
df_meta.to_csv(
    os.path.join(
        export_folder,
        'df_meta.csv'
    )
)

In [21]:
df_genes.to_csv(
    os.path.join(
        export_folder,
        'df_genes.csv'
    ), index=False
)