In [2]:
### header ###
__author__ = "Hunter Bennett"
__license__ = "BSD"
__email__ = "hunter.r.bennett@gmail.com"
%load_ext autoreload
%autoreload 2
%matplotlib inline
### imports ###
import sys
import os
import re
import glob
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt 
import seaborn as sns

matplotlib.rcParams['savefig.dpi'] = 200
sys.setrecursionlimit(3000)
sns.set(font_scale=1)
sns.set_context('talk')
sns.set_style('white')

# import custom functions
sys.path.insert(0, '/home/h1bennet/code/')
from hbUtils import ngs_qc, quantile_normalize_df
from homer_preprocessing import read_homer_gene_exp, import_homer_diffgene, pull_comparisons_get_diff
from plotting_scripts import label_point, pca_rpkm_mat, get_diff_volcano, plot_exp_rpkm, gene_list_bar_plot

In [3]:
workingDirectory = '/home/h1bennet/brain_aging/results/00_Microglia_RNA/'
dataDirectory = '/data/mm10/Brain_MPSIIIA/RNA/Microglia/WT/'
if not os.path.isdir(workingDirectory):
    os.mkdir(workingDirectory)
os.chdir(workingDirectory)


# Run Oyoung's pipeline to process RNA-seq data consistantly.

In [22]:
p21days_paths = [dataDirectory+'mouse_Bl6_WT_M_P21_Microglia_RNA_1_JOS_ATTCCT',
                 dataDirectory+'mouse_Bl6_WT_M_P21_Microglia_RNA_2_JOS_GTGGCC',
                 dataDirectory+'mouse_Bl6_WT_M_P21_Microglia_RNA_3_JOS_ACAGTG',
                 dataDirectory+'mouse_Bl6_WT_M_P21_Microglia_RNA_4_JOS_CTTGTA']

# p6weeks_paths = [dataDirectory+'mouse_Bl6_WT_M_6weeks_Microglia_RNA_1_JOS_BC',
#                  dataDirectory+'mouse_Bl6_WT_M_6weeks_Microglia_RNA_2_JOS_BC',
#                  dataDirectory+'mouse_Bl6_WT_M_6weeks_Microglia_RNA_3_JOS_BC']

p4months_paths = [dataDirectory+'mouse_Bl6_WT_F_4months_Microglia_RNA_1_JOS_GTAGAG',
                  dataDirectory+'mouse_Bl6_WT_M_4months_Microglia_RNA_2_JOS_GTGGCC',
                  dataDirectory+'mouse_MPSIIIAhet_M_microglia_RNA_4month_19212_AL_l20190515_GTGGCC']

p8months_paths = [dataDirectory+'mouse_Bl6_WT_M_8months_Microglia_RNA_1_JOS_CACCGG',
                  dataDirectory+'mouse_Bl6_WT_M_8months_Microglia_RNA_2_JOS_CATGGC']

p21months_paths = [dataDirectory+'C57_Microglia_RNA_PolyA_WT_21Months_DGO_150721_DGO_15_08_21',
                   dataDirectory+'C57_Microglia_RNA_PolyA_WT_22Months_DGO_15_04_06']

p24months_paths = [dataDirectory+'mouse_Bl6_WT_M_24months_Microglia_RNA_1_JOS_AGTTCC',
                   dataDirectory+'mouse_Bl6_WT_M_26months_Microglia_RNA_1_JOS_CAGATC',
                   dataDirectory+'mouse_Bl6_WT_M_26months_Microglia_RNA_2_JOS_CTTGTA']

In [23]:
p21days_ids = ['rep1', 'rep2', 'rep3', 'rep4']
# p6weeks_ids = ['rep1', 'rep2', 'rep3']
p4months_ids = ['rep1', 'rep2', 'rep3']
p8months_ids = ['rep1', 'rep2']
p21months_ids = ['rep1', 'rep2']
p24months_ids = ['rep1', 'rep2', 'rep3']

In [25]:
ids = ['microglia_p21days',
       'microglia_p4months',
       'microglia_p8months',
       'microglia_p21months',
       'microglia_p24months']
       
colors = [matplotlib.colors.rgb2hex(reds(.4)),
          matplotlib.colors.rgb2hex(reds(.5)),
          matplotlib.colors.rgb2hex(reds(.6)),
          matplotlib.colors.rgb2hex(reds(.7)),
          matplotlib.colors.rgb2hex(reds(.8))]
       
paths = [';'.join(p21days_paths),
         ';'.join(p4months_paths),
         ';'.join(p8months_paths),
         ';'.join(p21months_paths),
         ';'.join(p24months_paths)]
       
short_ids = [';'.join([ids[0]+'_'+i for i in p21days_ids]),
             ';'.join([ids[1]+'_'+i for i in p4months_ids]),
             ';'.join([ids[2]+'_'+i for i in p8months_ids]),
             ';'.join([ids[3]+'_'+i for i in p21months_ids]),
             ';'.join([ids[4]+'_'+i for i in p24months_ids])]

In [26]:
print([i.count(';') for i in paths])
print([i.count(';') for i in short_ids])

[3, 2, 1, 1, 2]
[3, 2, 1, 1, 2]


In [27]:
pd.DataFrame([ids,colors,paths,short_ids]).T.to_csv('./sampleDef.txt',
                                                    sep='\t',
                                                    header=False,
                                                    index=False)

# Take a look at the qc

In [33]:
directories = [p21days_paths,
               p4months_paths,
               p8months_paths,
               p21months_paths,
               p24months_paths]

unlist_dir = []
for i in directories:
    for j in i:
        unlist_dir.append(j)

In [36]:
with open('./makeHub.sh', 'w') as f:
    hubcall = ['makeMultiWigHub.pl hrb_BrainAging_QC_Microglia_RNA mm10',
               '-gradient 252,146,114 153,0,13',
               '-d \\\n', ' \\\n'.join(unlist_dir)]
    
    f.write(' '.join(hubcall))
        

# Run O'young RNA pipeline

Run the code below in BASH

    source activate r-ouyangPipe
    
    mkdir ./expression/
    
    mkdir ./differential/
    
    rnaQuan.R ./sampleDef.txt -o ./expression/

### Make sure to check the PCA pdf file for outliers
two of the 6 week samples are too clonal for use (clonality > 15)