Notebook to generate images that compare all 3 datasets in one image

In [21]:
from universal import UniversalDS, ChrData
from topologicalFeatures import Cliques, Bases, BasesOfBases


In [22]:
A = UniversalDS("../sampleData/data-pvalue-0.7-fin-min.json")
B = UniversalDS("../sampleData/data-pvalue-5-fin-min.json")
C = UniversalDS("../sampleData/data-pvalue-10-fin-min.json")
A.DS = "Tissue pcHi-C"
B.DS = "Blood cell pcHi-C"
C.DS = "Tissue Hi-C"

In [23]:
ch = "chr6"
aChData = ChrData(A, ch)
bChData = ChrData(B, ch)
cChData = ChrData(C, ch)

In [24]:
import matplotlib.pyplot as plt

In [25]:
aLinks = aChData.allLinks
linkPointsA = [aChData.segmentIndToMidpoint[el[0]] for el in aLinks] +\
                [aChData.segmentIndToMidpoint[el[1]] for el in aLinks]

bLinks = bChData.allLinks
linkPointsB = [bChData.segmentIndToMidpoint[el[0]] for el in bLinks] +\
                [bChData.segmentIndToMidpoint[el[1]] for el in bLinks]

cLinks = cChData.allLinks
linkPointsC = [cChData.segmentIndToMidpoint[el[0]] for el in cLinks] +\
                [cChData.segmentIndToMidpoint[el[1]] for el in cLinks]

In [26]:
title = "Link loci in different datasets"
fig, ax = plt.subplots()
print("start {ch}".format(ch=ch))


print(len(aLinks), len(bLinks), len(cLinks))

bins = 250
ax.hist(x=linkPointsA, bins=bins, histtype='step', label=A.DS, fill=True, alpha=.3, density=True, facecolor="red", hatch='-----', linewidth=0.1, edgecolor="black")
ax.hist(x=linkPointsB, bins=bins, histtype='step', label=B.DS, fill=True, alpha=.3, density=True, facecolor="black", hatch='\\\\\\\\\\', linewidth=0.1, edgecolor="black")
ax.hist(x=linkPointsC, bins=bins, histtype='step', label=C.DS, fill=True, alpha=.4, density=True, facecolor="blue", hatch='//////', linewidth=0.1, edgecolor="black")
ax.set_xlim(left=0)
ax.ticklabel_format(axis='x', scilimits=(6,6) )



ax.set_title("{ch}".format(ch=ch))
ax.tick_params(labelsize=24)
# axs[num[0], num[1]].set_ylim(ymin=0, ymax=maxVal)
# axs[num[0], num[1]].set_xlim(xmin=0, xmax=maxVal)
# axs[num[0], num[1]].plot([0,maxVal], [0,maxVal])

ax.legend(loc="upper right") 

#plt.yscale("log")


ax.set(xlabel='Loci in Mbp')
ax.set(ylabel='Normalized count of link endpoints')
ax.xaxis.label.set_fontsize(28)
ax.yaxis.label.set_fontsize(28)
fig.suptitle(title, size=32)
fig.set_size_inches(16.,10.)
#plt.savefig('{oa}-{v}-compare-randomized-{o}-vs-{r}.png'.format(oa=objectAnalyzed, v=variant, o=U.DS, r=R.DS), dpi=300)
plt.savefig('IMG-{title}.png'.format(title=title), dpi=600)
plt.savefig('IMG-{title}.svg'.format(title=title), dpi=600)
plt.savefig('IMG-{title}.pdf'.format(title=title), dpi=800)
#plt.show()
plt.clf()

start chr6
255839 47834 266949


<Figure size 1600x1000 with 0 Axes>

In [27]:
A3 = Cliques(aChData, minC3TissueCount=1)
aLinks = A3.getLinksList()
linkPointsA = [aChData.segmentIndToMidpoint[el[0]] for el in aLinks] +\
                [aChData.segmentIndToMidpoint[el[1]] for el in aLinks]

B3 = Cliques(bChData, minC3TissueCount=1)
bLinks = B3.getLinksList()
linkPointsB = [bChData.segmentIndToMidpoint[el[0]] for el in bLinks] +\
                [bChData.segmentIndToMidpoint[el[1]] for el in bLinks]

C3 = Cliques(cChData, minC3TissueCount=1)
cLinks = C3.getLinksList()
linkPointsC = [cChData.segmentIndToMidpoint[el[0]] for el in cLinks] +\
                [cChData.segmentIndToMidpoint[el[1]] for el in cLinks]

In [28]:
title = "C3 loci in different datasets"
fig, ax = plt.subplots()
print("start {ch}".format(ch=ch))


print(len(aLinks), len(bLinks), len(cLinks))

bins = 250
ax.hist(x=linkPointsA, bins=bins, histtype='step', label=A.DS, fill=True, alpha=.3, density=True, facecolor="red", hatch='-----', linewidth=0.1, edgecolor="black")
ax.hist(x=linkPointsB, bins=bins, histtype='step', label=B.DS, fill=True, alpha=.3, density=True, facecolor="black", hatch='\\\\\\\\\\', linewidth=0.1, edgecolor="black")
ax.hist(x=linkPointsC, bins=bins, histtype='step', label=C.DS, fill=True, alpha=.4, density=True, facecolor="blue", hatch='//////', linewidth=0.1, edgecolor="black")

ax.set_xlim(left=0)


ax.set_title("{ch}".format(ch=ch))
ax.tick_params(labelsize=24)
# axs[num[0], num[1]].set_ylim(ymin=0, ymax=maxVal)
# axs[num[0], num[1]].set_xlim(xmin=0, xmax=maxVal)
# axs[num[0], num[1]].plot([0,maxVal], [0,maxVal])

ax.legend(loc="upper right")
ax.ticklabel_format(axis='x', scilimits=(6,6) )
#plt.yscale("log")


ax.set(xlabel='Loci in Mbp')
ax.set(ylabel='Normalized count of C3 endpoints')
ax.xaxis.label.set_fontsize(28)
ax.yaxis.label.set_fontsize(28)
fig.suptitle(title, size=32)
fig.set_size_inches(16.,10.)
#plt.savefig('{oa}-{v}-compare-randomized-{o}-vs-{r}.png'.format(oa=objectAnalyzed, v=variant, o=U.DS, r=R.DS), dpi=300)
plt.savefig('IMG-{title}.png'.format(title=title), dpi=600)
plt.savefig('IMG-{title}.svg'.format(title=title), dpi=600)
plt.savefig('IMG-{title}.pdf'.format(title=title), dpi=800)

#plt.show()
plt.clf()

start chr6
383748 173304 966645


<Figure size 1600x1000 with 0 Axes>

In [29]:
bobA = BasesOfBases(aChData)
bobB = BasesOfBases(bChData)
bobC = BasesOfBases(cChData)

from numpy import log2
baseDeg = int(log2(len(bobA.links)))
aLinks = bobA.reduce(baseDeg)
linkPointsA = [aChData.segmentIndToMidpoint[el[0]] for el in aLinks] +\
                [aChData.segmentIndToMidpoint[el[1]] for el in aLinks]

baseDeg = int(log2(len(bobB.links)))
bLinks = bobB.reduce(baseDeg)
linkPointsB = [bChData.segmentIndToMidpoint[el[0]] for el in bLinks] +\
                [bChData.segmentIndToMidpoint[el[1]] for el in bLinks]

baseDeg = int(log2(len(bobC.links)))
cLinks = bobC.reduce(baseDeg)
linkPointsC = [cChData.segmentIndToMidpoint[el[0]] for el in cLinks] +\
                [cChData.segmentIndToMidpoint[el[1]] for el in cLinks]


Bases 17 of bases calculated. 4771 bases found
Bases 15 of bases calculated. 3098 bases found
Bases 18 of bases calculated. 12952 bases found


In [30]:
title = "Support S(log2(link count)) loci in different datasets"
fig, ax = plt.subplots()
print("start {ch}".format(ch=ch))


print(len(aLinks), len(bLinks), len(cLinks))

bins = 250
ax.hist(x=linkPointsA, bins=bins, histtype='step', label=A.DS, fill=True, alpha=.3, density=True, facecolor="red", hatch='-----', linewidth=0.1, edgecolor="black")
ax.hist(x=linkPointsB, bins=bins, histtype='step', label=B.DS, fill=True, alpha=.3, density=True, facecolor="black", hatch='\\\\\\\\\\', linewidth=0.1, edgecolor="black")
ax.hist(x=linkPointsC, bins=bins, histtype='step', label=C.DS, fill=True, alpha=.4, density=True, facecolor="blue", hatch='//////', linewidth=0.1, edgecolor="black")

ax.set_xlim(left=0)


ax.set_title("{ch}".format(ch=ch))
ax.tick_params(labelsize=24)
# axs[num[0], num[1]].set_ylim(ymin=0, ymax=maxVal)
# axs[num[0], num[1]].set_xlim(xmin=0, xmax=maxVal)
# axs[num[0], num[1]].plot([0,maxVal], [0,maxVal])

ax.legend(loc="upper right")
ax.ticklabel_format(axis='x', scilimits=(6,6) )
#plt.yscale("log")


ax.set(xlabel='Loci in Mbp')
ax.set(ylabel='Normalized count of S(k) endpoints')
ax.xaxis.label.set_fontsize(28)
ax.yaxis.label.set_fontsize(28)
fig.suptitle(title, size=32)
fig.set_size_inches(16.,10.)
#plt.savefig('{oa}-{v}-compare-randomized-{o}-vs-{r}.png'.format(oa=objectAnalyzed, v=variant, o=U.DS, r=R.DS), dpi=300)
plt.savefig('IMG-{title}.png'.format(title=title), dpi=600)
plt.savefig('IMG-{title}.svg'.format(title=title), dpi=600)
plt.savefig('IMG-{title}.pdf'.format(title=title), dpi=800)

#plt.show()
plt.clf()

start chr6
4771 3098 12952


<Figure size 1600x1000 with 0 Axes>