In [1]:
!git clone https://github.com/blpercha/ebc.git

Cloning into 'ebc'...
remote: Enumerating objects: 324, done.[K
remote: Total 324 (delta 0), reused 0 (delta 0), pack-reused 324[K
Receiving objects: 100% (324/324), 1.42 MiB | 21.72 MiB/s, done.
Resolving deltas: 100% (183/183), done.


In [2]:
import sys
sys.path.insert(0,'/content/ebc')

In [3]:
from ebc import EBC
from matrix import SparseMatrix
import pandas as pd
import numpy as np

In [4]:
#Creating a zero matrix according to the number of drug-gene pairs 
mat = np.zeros([3514, 3514], dtype = int)
print(mat[0][0])

0


In [5]:
with open("/content/ebc/resources/matrix-ebc-paper-dense.tsv", "r") as f:
        data = []
        for line in f:
            sl = line.split("\t")
            if len(sl) < 5:  # headers
                continue
            data.append([sl[0], sl[2], float(sl[4])])

matrix = SparseMatrix([3514, 1232])
matrix.read_data(data)
matrix.normalize()
# print(matrix)
ebc = EBC(matrix, [30, 125], 10, 1e-10, 0.01)

#Running EBC 1000 times
for k in range(1000):
    cXY, objective, it = ebc.run()
    #only taking the drug-gene pair cluster as cXY[0] corresponds to that and cXY[1] corresponds to dependency path clusters
    for i in range(len(cXY[0])):
        for j in range(len(cXY[0])):
            #Checking if pair[i] is assigned to the same cluster as pair[j]
            if (cXY[0][i] == cXY[0][j]):
                #Incrementing the count at that position
                mat[i][j] = mat[i][j] + 1
                
print(mat)                

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
--> Running iteration 1 .. objective value = 3.571478
--> Running iteration 2 .. objective value = 3.522732
--> Running iteration 3 .. objective value = 3.508818
--> Running iteration 4 .. objective value = 3.502224
EBC finished in 4 iterations, with final objective value 3.5022
Running EBC on a 2-d sparse matrix with size [3514, 1232] ...
Randomly initializing clusters, with cluster number on each axis: [30, 125] ...
--> Running iteration 1 .. objective value = 3.473585
--> Running iteration 2 .. objective value = 3.433978
--> Running iteration 3 .. objective value = 3.424722
EBC finished in 3 iterations, with final objective value 3.4247
Running EBC on a 2-d sparse matrix with size [3514, 1232] ...
Randomly initializing clusters, with cluster number on each axis: [30, 125] ...
--> Running iteration 1 .. objective value = 3.492894
--> Running iteration 2 .. objective value = 3.430550
--> Running iteration 3 .. objective 

In [6]:
#Getting the drug-gene pair label
with open("/content/ebc/resources/matrix-ebc-paper-dense.tsv", "r") as f:
        label = []
        for line in f:
            sl = line.split("\t")
            label.append(sl[0])
labels = list(dict.fromkeys(label))
print(labels)

['(flavopiridol,nf-kappab)', '(tnf-r2,tnf-r1)', '(il-2,il-5)', '(il-11,il-10)', '(fgf-7,fgf-2)', '(clopidogrel,p-selectin)', '(fgf-7,fgf-1)', '(il-11,il-13)', '(propranolol,beta2)', '(isoflurane,caspase-3)', '(indomethacin,phospholipase)', '(anti-il-2,cd25)', '(zuclopenthixol,cyp2d6)', '(doxycycline,smad7)', '(il-2,il-1)', '(corticosteroids,leukotriene)', '(gliclazide,niddm)', '(tolbutamide,niddm)', '(dipyridamole,pde5)', '(geldanamycin,hsp90)', '(melatonin,bcl-2)', '(carbamates,faah)', '(pge2,pgd2)', '(losartan,urat1)', '(aldosterone,bmp-6)', '(gm-csf,il-13)', '(prostacyclin,cox-1)', '(astemizole,cyp2j2)', '(prostacyclin,cox-2)', '(gm-csf,il-15)', '(amiloride,kallikrein)', '(diphenhydramine,cyp2d6)', '(gm-csf,il-10)', '(valsartan,adiponectin)', '(forskolin,sf-1)', '(gm-csf,il-11)', '(il-11,gm-csf)', '(capecitabine,dlts)', '(pranlukast,cyslt1)', '(cetuximab,egfr)', '(gm-csf,il-18)', '(cysteamine,gastrin)', '(nadh,nad1)', '(nadh,nad3)', '(telmisartan,abca1)', '(abciximab,p-selectin)', '

In [7]:
df = pd.DataFrame(mat)
df.columns = labels
df.index = labels
df



Unnamed: 0,"(flavopiridol,nf-kappab)","(tnf-r2,tnf-r1)","(il-2,il-5)","(il-11,il-10)","(fgf-7,fgf-2)","(clopidogrel,p-selectin)","(fgf-7,fgf-1)","(il-11,il-13)","(propranolol,beta2)","(isoflurane,caspase-3)","(indomethacin,phospholipase)","(anti-il-2,cd25)","(zuclopenthixol,cyp2d6)","(doxycycline,smad7)","(il-2,il-1)","(corticosteroids,leukotriene)","(gliclazide,niddm)","(tolbutamide,niddm)","(dipyridamole,pde5)","(geldanamycin,hsp90)","(melatonin,bcl-2)","(carbamates,faah)","(pge2,pgd2)","(losartan,urat1)","(aldosterone,bmp-6)","(gm-csf,il-13)","(prostacyclin,cox-1)","(astemizole,cyp2j2)","(prostacyclin,cox-2)","(gm-csf,il-15)","(amiloride,kallikrein)","(diphenhydramine,cyp2d6)","(gm-csf,il-10)","(valsartan,adiponectin)","(forskolin,sf-1)","(gm-csf,il-11)","(il-11,gm-csf)","(capecitabine,dlts)","(pranlukast,cyslt1)","(cetuximab,egfr)",...,"(zanamivir,neuraminidase)","(pravastatin,adiponectin)","(verapamil,calmodulin)","(enalapril,enos)","(abatacept,ctla-4)","(indomethacin,ppargamma)","(daunorubicin,prolidase)","(rosiglitazone,glut4)","(fluoxetine,cyp2d6)","(galanthamine,cholinesterase)","(atorvastatin,fasl)","(atorvastatin,mcm6)","(fibrates,pai-1)","(ritonavir,cd36)","(mitomycin,rad51)","(melatonin,sirt1)","(zidovudine,ifn-alpha)","(losartan,at1r)","(il-2,perlecan)","(ciclosporin,p-gp)","(atra,p-gp)","(disulfiram,5-lipoxygenase)","(verapamil,p-gp)","(cerivastatin,statin)","(carbachol,rgs2)","(doxycycline,vegf)","(collagenase,mmp1)","(ndga,15-lox)","(doxorubicin,mdr1)","(pge2,aromatase)","(erythromycin,p-gp)","(methoxsalen,cyp2a6)","(atra,nf-kappab)","(delavirdine,cyp3a)","(pge2,ifn-gamma)","(dexamethasone,nfkappab)","(propafenone,cyp1a2)","(cisplatin,brca1)","(ndga,nrf2)","(zolmitriptan,5-ht1b)"
"(flavopiridol,nf-kappab)",1000,17,6,3,6,10,8,11,15,258,8,39,3,14,5,9,8,13,5,0,18,22,6,10,12,11,14,3,27,4,10,8,17,11,18,4,9,13,13,12,...,3,9,3,17,20,379,32,9,4,1,16,10,14,13,81,102,6,16,14,9,13,15,4,12,15,26,35,11,1,72,9,2,25,3,8,3,3,9,105,170
"(tnf-r2,tnf-r1)",17,1000,290,185,166,13,167,138,34,50,10,100,5,21,285,18,40,39,7,8,105,6,242,3,14,198,58,4,134,175,10,8,215,50,103,132,217,20,29,51,...,5,17,5,42,55,12,16,88,4,2,43,7,11,6,51,8,119,38,24,8,5,9,8,53,30,154,73,5,20,106,9,5,29,1,183,5,1,29,48,54
"(il-2,il-5)",6,290,1000,333,206,11,207,188,9,79,15,19,7,17,407,7,20,46,0,9,141,13,387,1,14,371,30,1,77,283,11,8,410,22,106,192,181,32,12,14,...,2,22,1,17,43,17,17,123,4,0,55,6,31,16,78,11,120,8,7,17,3,4,11,34,40,99,16,2,22,160,16,1,63,2,274,0,2,56,64,21
"(il-11,il-10)",3,185,333,1000,497,9,425,257,4,18,4,29,1,13,355,3,57,24,0,5,60,4,447,1,6,555,23,0,48,537,5,7,340,25,50,431,272,9,5,9,...,0,20,0,21,39,11,8,61,2,0,26,7,18,7,20,6,319,3,14,5,4,1,3,30,14,80,17,0,5,59,3,0,21,0,243,0,0,21,31,7
"(fgf-7,fgf-2)",6,166,206,497,1000,9,488,214,6,8,0,28,0,19,301,4,64,13,0,2,34,2,355,1,4,469,14,1,31,566,2,1,271,24,43,492,300,9,5,7,...,0,25,1,10,33,10,5,32,0,1,25,5,10,10,9,7,562,4,14,2,10,0,1,28,13,77,17,1,3,22,0,0,11,0,255,2,0,13,13,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"(dexamethasone,nfkappab)",3,5,0,0,2,0,1,1,17,0,74,0,11,7,1,23,4,6,416,100,1,72,0,173,21,0,13,746,15,0,106,41,0,3,0,2,0,6,19,62,...,236,5,697,14,5,13,43,2,182,586,1,70,4,4,4,18,3,29,15,31,9,126,109,74,5,6,0,297,2,0,28,321,4,313,0,1000,313,83,0,2
"(propafenone,cyp1a2)",3,1,2,0,0,1,3,1,10,7,54,3,179,9,0,10,7,11,261,67,1,92,1,188,15,0,18,387,8,0,97,51,1,3,1,2,0,11,10,27,...,89,8,316,18,12,12,54,3,291,226,3,65,6,5,8,12,1,16,23,97,14,146,132,52,11,26,3,230,52,8,96,273,2,1000,2,313,1000,69,2,2
"(cisplatin,brca1)",9,29,56,21,13,21,12,25,14,18,32,10,59,102,8,13,48,29,46,20,32,20,19,35,34,9,26,77,25,15,37,39,12,21,28,20,15,101,20,8,...,28,97,97,26,37,35,20,18,36,36,34,48,45,87,53,24,21,7,27,61,62,23,57,43,100,106,14,65,58,14,59,68,48,69,14,83,69,1000,25,24
"(ndga,nrf2)",105,48,64,31,13,22,28,21,3,93,5,22,4,9,45,1,17,18,0,6,88,24,43,3,27,40,19,0,42,20,11,21,78,110,118,15,26,12,4,8,...,2,11,0,31,12,85,12,232,2,0,46,14,73,22,33,79,16,1,21,11,14,4,7,15,15,21,13,2,9,111,7,0,66,2,72,0,2,25,1000,23


In [8]:
df.to_csv('cluster.csv')