In [1]:
### header ###
__author__ = "Hunter Bennett"
__license__ = "BSD"
__email__ = "hunter.r.bennett@gmail.com"
%load_ext autoreload
%autoreload 2
### imports ###
import sys
%matplotlib inline
import os
import re
import glob
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt 
import seaborn as sns
matplotlib.rcParams['savefig.dpi'] = 200
sns.set(font_scale=1)
sns.set_context('talk')
sns.set_style('white')

# import custom functions
import sys
sys.path.insert(0, '/home/h1bennet/code/')
from hbUtils import ngs_qc, quantile_normalize_df
from plotting_scripts import label_point, pca_rpkm_mat, get_diff_volcano
from homer_preprocessing import read_annotated_peaks, import_homer_diffpeak, pull_comparisons_get_diff
from gene_expression_plots import survey_strain_amln_gene_exp, survey_strain_ccl4_gene_exp

In [2]:
dataDirectory = ''
workingDirectory = '/home/h1bennet/strains/results/01_Strains_AMLN_H3K27Ac/'
if not os.path.isdir(workingDirectory):
    os.mkdir(workingDirectory)
os.chdir(workingDirectory)

In [3]:
if not os.path.isdir('./motif_scanning/'):
    os.mkdir('./motif_scanning/')

In [4]:
np.sort(glob.glob('./motif_calling/*act*'))

array(['./motif_calling/00_c57_intersection_act_distal_peaks.txt',
       './motif_calling/00_c57_intersection_act_peaks.txt',
       './motif_calling/00_c57_union_act_distal_peaks.txt',
       './motif_calling/00_c57_union_act_peaks.txt',
       './motif_calling/01_balbc_intersection_act_distal_peaks.txt',
       './motif_calling/01_balbc_intersection_act_peaks.txt',
       './motif_calling/01_balbc_union_act_distal_peaks.txt',
       './motif_calling/01_balbc_union_act_peaks.txt',
       './motif_calling/02_aj_intersection_act_distal_peaks.txt',
       './motif_calling/02_aj_intersection_act_peaks.txt',
       './motif_calling/02_aj_union_act_distal_peaks.txt',
       './motif_calling/02_aj_union_act_peaks.txt'], dtype='<U58')

# Homer motifs can be found here:
    /bioinformatics/homer/motifs   

### Look for instances of the TFE3 motif in balb specific peaks

    findMotifsGenome.pl ./motif_calling/01_balbc_intersection_act_peaks.txt \
    mm10 ./motif_scanning/ -find /bioinformatics/homer/motifs/tfe3.motif \
    > ./motif_scanning/01_balbc_intersection_act_peaks_TFE3.txt
    
    findMotifsGenome.pl ./motif_calling/01_balbc_intersection_act_peaks.txt \
    mm10 ./motif_scanning/ -find /bioinformatics/homer/motifs/mitf.motif \
    > ./motif_scanning/01_balbc_intersection_act_peaks_MITF.txt
    
    findMotifsGenome.pl ./motif_calling/00_c57_intersection_act_peaks.txt \
    mm10 ./motif_scanning/ -find /bioinformatics/homer/motifs/tfe3.motif \
    > ./motif_scanning/00_c57_intersection_act_peaks_TFE3.txt

    findMotifsGenome.pl ./motif_calling/00_c57_intersection_act_peaks.txt \
    mm10 ./motif_scanning/ -find /bioinformatics/homer/motifs/mitf.motif \
    > ./motif_scanning/00_c57_intersection_act_peaks_MITF.txt
    
### Annotate peaks style

    annotatePeaks.pl ./motif_calling/01_balbc_intersection_act_peaks.txt \
    mm10 -m /bioinformatics/homer/motifs/tfe3.motif \
    > ./motif_scanning/01_balbc_intersection_act_peaks_annoTFE3.txt
    
    annotatePeaks.pl ./motif_calling/01_balbc_intersection_act_peaks.txt \
    mm10 -m /bioinformatics/homer/motifs/mitf.motif \
    > ./motif_scanning/01_balbc_intersection_act_peaks_annoMITF.txt
    
    annotatePeaks.pl ./motif_calling/00_c57_intersection_act_peaks.txt \
    mm10 -m /bioinformatics/homer/motifs/tfe3.motif \
    > ./motif_scanning/00_c57_intersection_act_peaks_annoTFE3.txt
    
    annotatePeaks.pl ./motif_calling/00_c57_intersection_act_peaks.txt \
    mm10 -m /bioinformatics/homer/motifs/mitf.motif \
    > ./motif_scanning/00_c57_intersection_act_peaks_annoMITF.txt

### Look for instances of strain mutations in balb specific peaks

    MMARGE.pl annotate_mutations -file ./motif_scanning/01_balbc_intersection_act_peaks_annoMITF.txt \
    -ind balbcj, aj
    
    MMARGE.pl annotate_mutations -file ./motif_scanning/00_c57_intersection_act_peaks_annoMITF.txt \
    -ind balbcj, aj

### Extract sequences from promising areas of genome

In [6]:
if not os.path.isdir('./motif_sequences/'):
    os.mkdir('./motif_sequences/')

    MMARGE.pl extract_sequences -file ./motif_scanning/01_balbc_intersection_act_peaks_annoMITF.txt \
    -ind c57bl6j balbcj aj
    
    mv sequences.txt ./motif_sequences/01_balbc_intersection_act_peaks_sequences.txt