In [112]:
from igv import IGV, Reference, Track
import pandas as pd
from IPython.display import display, HTML

In [113]:
import sys, time
sys.path.append("/users/pshannon/github/fimoService/client-python")
from FimoClient import *
sys.path.append("/users/pshannon/github/getDNAService/client-python")
from GetDNAClient import *

In [114]:
fimo = FimoClient("whovian", 5558)
assert(fimo.getHost() == 'whovian:5558')
dnaService = GetDNAClient("hg38")
assert(dnaService.getSequence("chr1", 1, 5) == 'NNNNN')

## The VRK2-associated [SNP](https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs13384219)

hg38:  chr2:57907323

In [115]:
chrom = "chr2"
loc = 57907323
shoulder = 7
snpLocus = "%s:%d-%d" % (chrom, loc-shoulder, loc+shoulder)
viewRange = "chr2:57,907,026-57,907,985"

In [116]:
igv = IGV(locus=viewRange, reference=Reference(id="hg38"), 
          tracks=[Track(
                 name="Genes", 
                 url="//s3.amazonaws.com/igv.broadinstitute.org/annotations/hg38/genes/gencode.v24.annotation.sorted.gtf.gz",
                 indexURL="//s3.amazonaws.com/igv.broadinstitute.org/annotations/hg38/genes/gencode.v24.annotation.sorted.gtf.gz.tbi",
                 display_mode="EXPANDED")])
igv

In [117]:
trackTbl = pd.DataFrame([[chrom, loc-shoulder, loc+shoulder, "snp"]])
trackTbl.to_csv("vrk2Snp.bed", sep="\t", header=False, index=False)
newTrack = Track(name="rs13384219 ", format="bed", indexed=False, 
                 url="http://whovian:10005/files/shared/vrk2Snp.bed", 
                 display_mode='EXPANDED');

In [118]:
igv.load_track(newTrack)

Loading track into IGV.js


In [119]:
snpLocus

'chr2:57907316-57907330'

In [120]:
baseSequence = dnaService.getSequence(chrom, loc-shoulder, loc+shoulder)
print(baseSequence)

AGCATGCAAATTAGA


In [121]:
len(baseSequence)

15

In [122]:
end1 = shoulder
start2 = shoulder + 1
end2 = len(baseSequence)
mutA = baseSequence[0:end1] + 'A' + baseSequence[start2:end2]
mutG = baseSequence[0:end1] + 'G' + baseSequence[start2:end2]
mutT = baseSequence[0:end1] + 'T' + baseSequence[start2:end2]

In [123]:
baseSequence

'AGCATGCAAATTAGA'

In [124]:
pd.DataFrame([[baseSequence], [mutG]])

Unnamed: 0,0
0,AGCATGCAAATTAGA
1,AGCATGCGAATTAGA


In [125]:
tbl_fimo = fimo.request({"wt": baseSequence, "mutG": mutG})

the highest-scoring motif on the + strand [MA0784.1](http://jaspar.genereg.net/cgi-bin/jaspar_db.pl?rm=present&collection=CORE&ID=MA0784.1)

In [126]:
tbl_fimo

Unnamed: 0,#pattern name,matched sequence,p-value,q-value,score,sequence name,start,stop,strand
0,MA0507.1,CTAATTTGCATGC,5e-06,6.2e-05,15.3793,wt,2,14,-
1,MA0784.1,AGCATGCAAATTAG,8e-06,6.2e-05,14.0862,wt,1,14,+
2,MA0785.1,AGCATGCAAATT,1.5e-05,0.000242,13.06,wt,1,12,+
3,MA0792.1,CATGCAAAT,2.4e-05,0.000676,12.3186,wt,3,11,+
4,MA0788.1,AGCATGCAAATTA,2.5e-05,0.000301,12.44,wt,1,13,+
5,MA0787.1,GCATGCAAATTA,3.3e-05,0.000527,12.0323,wt,2,13,+
6,MA0068.2,CGAATTAG,4e-05,0.00127,11.4179,mutG,7,14,+
7,MA0786.1,GCATGCAAATTA,5.1e-05,0.000821,11.5181,wt,2,13,+
8,MA0789.1,CATGCAAAT,7.9e-05,0.00222,10.8571,wt,3,11,+
9,MA0068.2,CTAATTCG,8.4e-05,0.00134,10.9851,mutG,7,14,-


In [135]:
tbl_fimo.to_csv("vrk2-publication-prep/fimoResults.tsv", sep="\t")

In [128]:
import psycopg2 as psql

In [129]:
db  = psql.connect("dbname=hg38 user=pshannon")

In [130]:
pd.read_sql_query("select * from motifsgenes where motif in ('MA0068.2')", db)

Unnamed: 0,motif,tf
0,MA0068.2,PAX4
1,MA0068.2,PAX6
2,MA0068.2,PAX3
3,MA0068.2,PAX7


In [132]:
motifTFmap = pd.read_sql_query("select * from motifsgenes where motif in ('MA0068.2', 'MA0507.1', 'MA0784.1', 'MA0785.1', 'MA0792.1', 'MA0788.1', 'MA0787.1', 'MA0786.1', 'MA0789.1')", db)

In [134]:
motifTFmap.to_csv("vrk2-publication-prep/motifTF.tsv", sep="\t")

In [87]:
HTML(motifTFmap.to_html())

Unnamed: 0,motif,tf
0,MA0507.1,POU2F2
1,MA0784.1,POU1F1
2,MA0785.1,POU2F1
3,MA0786.1,POU3F1
4,MA0787.1,POU3F2
5,MA0788.1,POU3F3
6,MA0789.1,POU3F4
7,MA0792.1,POU5F1B
8,MA0507.1,HDX
9,MA0507.1,HMBOX1
