In [1]:
import numpy as np
import pandas as pd
import pubchempy as pcp
from rdkit import Chem
from rdkit.Chem import AllChem
from sklearn.metrics import jaccard_score

In [2]:
jsonObj = pd.read_json("final_train_set.jsonl", lines=True)
jsonObj2 = pd.read_json("final_test_set.jsonl", lines=True)

In [3]:
drug_pairs = []

for index, row in jsonObj[["spans"]].iterrows():
    this_row = row.tolist()[0]
    if len(this_row) == 2:
        drug_pairs.append([this_row[0]["text"].lower(), this_row[1]["text"].lower()])
        
for index, row in jsonObj2[["spans"]].iterrows():
    this_row = row.tolist()[0]
    if len(this_row) == 2:
        drug_pairs.append([this_row[0]["text"].lower(), this_row[1]["text"].lower()])

In [4]:
len(drug_pairs)

821

In [5]:
drug_pairs

[['d-cycloserine', 'lidocaine'],
 ['propranolol', 'amiodarone'],
 ['moxifloxacin', 'tobramycin'],
 ['chenodeoxycholic', 'simvastatin'],
 ['ifosfamide', 'etoposide'],
 ['paclitaxel', 'bevacizumab'],
 ['cyclosporine', 'infliximab'],
 ['curcumin', 'metformin'],
 ['brodalumab', 'ustekinumab'],
 ['flutamide', 'bicalutamide'],
 ['paclitaxel', 'trastuzumab'],
 ['aspirin', 'clopidogrel'],
 ['trastuzumab', 'lapatinib'],
 ['lidocaine', 'oxybutynin'],
 ['ozogamicin', 'blinatumomab'],
 ['bevacizumab', 'capecitabine'],
 ['paclitaxel', 'carboplatin'],
 ['erythropoietin', 'trastuzumab'],
 ['paclitaxel', 'ifosfamide'],
 ['sorafenib', 'erlotinib'],
 ['adalimumab', 'infliximab'],
 ['bendamustine', 'chlorambucil'],
 ['diclofenac', 'octreotide'],
 ['caspofungin', 'amphotericin'],
 ['haloperidol', 'carbamazepine'],
 ['quinidine', 'verapamil'],
 ['azithromycin', 'cefuroxime'],
 ['epirubicin', 'cyclophosphamide'],
 ['palbociclib', 'letrozole'],
 ['halothane', 'succinylcholine'],
 ['mitomycin', 'vinblastine']

In [6]:
def get_fingerprint(chem_name):
    compound = pcp.get_compounds(chem_name, 'name')[0]
    mol = Chem.MolFromSmiles(compound.isomeric_smiles)
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, useChirality=True, radius=2, nBits = 256, bitInfo={})
    vector = np.array(fp)
    return vector

In [7]:
drug_pair_to_fingerprints = dict()

for drug_pair in drug_pairs:
    try:        
        fp_1 = get_fingerprint(drug_pair[0]).tolist()
        fp_2 = get_fingerprint(drug_pair[1]).tolist()
        
        print(drug_pair[0], drug_pair[1])
    except:
        continue
    drug_pair_to_fingerprints[(drug_pair[0], drug_pair[1])] = (fp_1, fp_2)

d-cycloserine lidocaine
propranolol amiodarone
moxifloxacin tobramycin
ifosfamide etoposide
curcumin metformin
flutamide bicalutamide
aspirin clopidogrel
lidocaine oxybutynin
paclitaxel carboplatin
paclitaxel ifosfamide
sorafenib erlotinib
bendamustine chlorambucil
diclofenac octreotide
caspofungin amphotericin
haloperidol carbamazepine
quinidine verapamil
azithromycin cefuroxime
epirubicin cyclophosphamide
palbociclib letrozole
halothane succinylcholine
mitomycin vinblastine
olmesartan azelnidipine
succinylcholine pancuronium
vinorelbine carboplatin
dexamethasone paclitaxel
flunarizine diltiazem
moclobemide mianserin
epinephrine clonidine
enoxaparin heparin
ifosfamide etoposide
esomeprazole omeprazole
ifosfamide cyclophosphamide
piperacillin netilmicin
methotrexate cyclosporin
trametinib cobimetinib
thalidomide cyclosporine
gemcitabine epirubicin
nicotine haloperidol
tacrolimus ganciclovir
nifedipine hydralazine
imipramine fluoxetine
theophylline somatostatin
etoposide prednisone
gent

artemisinin lumefantrine
ketoconazole genistein
erlotinib docetaxel
ifosfamide etoposide
paclitaxel sorafenib
clindamycin gentamicin
doxorubicin paclitaxel
chloroquine auranofin
carboplatin etoposide
paclitaxel carboplatin
melatonin cyclophosphamide
metformin gliclazide
oxcarbazepine carbamazepine
prasugrel clopidogrel
ketorolac bupivacaine
paclitaxel carboplatin
diclofenac carbamazepine
ursodeoxycholic acid taurine
fluconazole itraconazole
paclitaxel carboplatin
imipenem ceftazidime
selinexor bortezomib
mycophenolate mycophenolic acid
tacrolimus prednisone
gentamicin rifampin
etoposide doxorubicin
levodopa carbidopa
erythromycin tetracycline
tamoxifen gefitinib
finasteride minoxidil
dasatinib imatinib
gefitinib erlotinib
paclitaxel carboplatin
ondansetron metoclopramide
paclitaxel carboplatin
isoprenaline salbutamol
artesunate amodiaquine
cefepime levofloxacin
everolimus octreotide
prednisone azathioprine
amphotericin rifabutin
gemcitabine doxorubicin
amphotericin voriconazole
entacap

In [8]:
len(drug_pair_to_fingerprints)

576

In [9]:
for key, value in drug_pair_to_fingerprints.items():
    print(key[0], key[1], jaccard_score(value[0], value[1]))

d-cycloserine lidocaine 0.09302325581395349
propranolol amiodarone 0.18309859154929578
moxifloxacin tobramycin 0.10989010989010989
ifosfamide etoposide 0.0963855421686747
curcumin metformin 0.0975609756097561
flutamide bicalutamide 0.40350877192982454
aspirin clopidogrel 0.23076923076923078
lidocaine oxybutynin 0.24561403508771928
paclitaxel carboplatin 0.0963855421686747
paclitaxel ifosfamide 0.1276595744680851
sorafenib erlotinib 0.24324324324324326
bendamustine chlorambucil 0.5306122448979592
diclofenac octreotide 0.1875
caspofungin amphotericin 0.2672413793103448
haloperidol carbamazepine 0.14285714285714285
quinidine verapamil 0.15
azithromycin cefuroxime 0.22115384615384615
epirubicin cyclophosphamide 0.13157894736842105
palbociclib letrozole 0.1232876712328767
halothane succinylcholine 0.037037037037037035
mitomycin vinblastine 0.17592592592592593
olmesartan azelnidipine 0.2
succinylcholine pancuronium 0.13559322033898305
vinorelbine carboplatin 0.09411764705882353
dexamethasone

methylergometrine oxytocin 0.2777777777777778
cytarabine etoposide 0.1686746987951807
famotidine bismuth 0.0
alprazolam imipramine 0.2222222222222222
pazopanib sunitinib 0.23809523809523808
amphotericin rifampin 0.30434782608695654
curcumin irinotecan 0.18823529411764706
sirolimus tacrolimus 0.4954128440366973
palbociclib fulvestrant 0.18478260869565216
daptomycin vancomycin 0.3673469387755102
tetracycline chlortetracycline 0.7931034482758621
propranolol primidone 0.23404255319148937
oxaliplatin fluorouracil 0.058823529411764705
pamidronate thalidomide 0.11904761904761904
iohexol iodixanol 0.9473684210526315
vildagliptin voglibose 0.16666666666666666
paroxetine fluvoxamine 0.14084507042253522
oxaliplatin gemcitabine 0.12
cyclophosphamide paclitaxel 0.08602150537634409
mefloquine azithromycin 0.1595744680851064
pirfenidone candesartan 0.1746031746031746
difluprednate prednisolone 0.3835616438356164
panobinostat vorinostat 0.2545454545454545
nitrendipine nicardipine 0.7090909090909091
br

capecitabine tegafur 0.234375
daclatasvir asunaprevir 0.29896907216494845
mefloquine artemether 0.10144927536231885
irinotecan oxaliplatin 0.12658227848101267
dapagliflozin furosemide 0.19696969696969696
goserelin tamoxifen 0.14912280701754385
atorvastatin amlodipine 0.175
clozapine risperidone 0.20512820512820512
warfarin miltefosine 0.10526315789473684
digoxin dronedarone 0.18446601941747573
brimonidine latanoprost 0.10666666666666667
propofol rocuronium 0.04054054054054054
ribociclib letrozole 0.14285714285714285
lapatinib gemcitabine 0.20930232558139536
xylose chondroitin 0.17307692307692307
nifedipine hydrochlorothiazide 0.10344827586206896
pantoprazole docetaxel 0.18181818181818182
clopidogrel aspirin 0.23076923076923078
zidovudine ciprofloxacin 0.1917808219178082
cyclophosphamide prednisolone 0.0625
foretinib lapatinib 0.3163265306122449
metronidazole vancomycin 0.1592920353982301
amiodarone carvedilol 0.17073170731707318
pemetrexed gemcitabine 0.1917808219178082
artesunate mefl

In [30]:
drugbank = pd.read_csv("drugbank.csv", header = None)

In [31]:
len(drugbank)

3958

In [32]:
drugbank = [drug.lower() for drug in drugbank.iloc[:, 0].tolist()]

In [13]:
drugbank_fingerprints = {}

In [16]:
for drug in drugbank:
    try:
        drugbank_fingerprints[drug] = get_fingerprint(drug).tolist()
        print(drug)
    except:
        continue

lepirudin
bivalirudin
leuprolide
sermorelin
urokinase
goserelin
erythropoietin
salmon calcitonin
pegfilgrastim
thyrotropin alfa
gramicidin d
insulin human
menotropins
desmopressin
glucagon
insulin lispro
insulin glargine
cetrorelix
pegaspargase
eptifibatide
daptomycin
capromab pendetide
cyclosporine
urofollitropin
agalsidase beta
octreotide
interferon alfa-2b
abarelix
oxytocin
enfuvirtide
pyridoxal phosphate
cyanocobalamin
ademetionine
pyruvic acid
phenylalanine
biotin
choline
l-lysine
ascorbic acid
aspartic acid
ornithine
l-glutamine
adenosine phosphate
alpha-linolenic acid
methionine
tyrosine
calcitriol
lutein
cystine
succinic acid
riboflavin
n-acetylglucosamine
glutamic acid
glutathione
glycine
calcifediol
creatine
tryptophan
cysteine
thiamine
ergocalciferol
citrulline
threonine
nadh
folic acid
icosapent
valine
vitamin a
vitamin e
pyridoxine
lipoic acid
cholecalciferol
menadione
adenine
asparagine
pravastatin
fluvoxamine
valsartan
ramipril
masoprocol
flunisolide
baclofen
amphetamine



gadobenic acid
zileuton
modafinil
deferoxamine
scopolamine
carbinoxamine
etodolac
prilocaine
epinastine
tranylcypromine
isoflurane
ethotoin
tretinoin
hexachlorophene
dolasetron
clopidogrel
tetracycline
meropenem
potassium chloride
irinotecan
methimazole
mometasone
metyrosine
clavulanic acid
olopatadine
hydrocortamate
alprostadil
clidinium
malathion
etoposide
hydroflumethiazide
tirofiban
oxcarbazepine
propiomazine
roxithromycin
nalidixic acid
phenelzine
propantheline
estradiol
mefenamic acid
acyclovir
naproxen
gadopentetic acid
perindopril
uracil mustard
tripelennamine
haloprogin
primidone
sulfasalazine
candesartan cilexetil
tolazoline
gentamicin
tazarotene
fenoldopam
halazepam
alfentanil
colistin
dicyclomine
minaprine
pentoxifylline
proparacaine
indapamide
tropicamide
biperiden
ribavirin
phenylbutazone
fentanyl
meloxicam
sodium lauryl sulfate
orciprenaline
rosoxacin
propofol
acetazolamide
tadalafil
carprofen
disulfiram
ethynodiol diacetate
enprofylline
levomenthol
natamycin
cinoxacin
f

thymol
gamma-aminobutyric acid
terlipressin
cholic acid
nicotinamide
fusidic acid
resveratrol
sucrose
pregnenolone
d-methionine
piretanide
oxitriptan
lauric acid
glycolic acid
pidolic acid
acetylcholine
flavin adenine dinucleotide
acetic acid
propyl alcohol
stearic acid
oteracil
flavin mononucleotide
phenol
glutathione disulfide
brivudine
hemin
oxyphenbutazone
methylcobalamin
ribostamycin
deoxycholic acid
tromethamine
doconexent
propanoic acid
benzoic acid
palmitic acid
formaldehyde
urea
d-serine
ergosterol
pyrophosphoric acid
fructose
dequalinium
didecyldimethylammonium
oleic acid
citric acid
carbocisteine
taurocholic acid
lactic acid
lactose
cholesterol
gluconolactone
latamoxef
trioxsalen
thiotepa
estriol
estrone sulfate
quinestrol
carboxin
hesperidin
iodipamide
nimesulide
bifonazole
benoxaprofen
bithionol
clioquinol
dantron
metamizole
nialamide
nomifensine
oxeladin
phenolphthalein
prenylamine
thenalidine
urethane
zimelidine
rapacuronium
maraviroc
clofedanol
cyclandelate
cyproterone 



mangafodipir
mebutamate
methenamine
methylnaltrexone
nepafenac
niclosamide
nonoxynol-9
phenyl aminosalicylate
plerixafor
plicamycin
polidocanol
povidone-iodine
pralatrexate
protokylol
pyrithione
pyrvinium
raltegravir
phenylbutyric acid
sulconazole
sulfameter
tinzaparin
tiopronin
triethylenetetramine
triptorelin
unoprostone
viomycin
chloramphenicol succinate
flavone
parecoxib
triclosan
salicylamide
antazoline
dimetindene
nandrolone decanoate
dichloroacetic acid
nadroparin
triflusal
lurasidone
ticagrelor
tafluprost
ivacaftor
azilsartan medoxomil
spinosad
ioflupane i-123
deferiprone
lomitapide
vismodegib
acetylcarnitine
pitavastatin
cholecystokinin
rilpivirine
crizotinib
ulipristal
fingolimod
tesamorelin
eribulin
gabapentin enacarbil
boceprevir
fidaxomicin
cabozantinib
ruxolitinib
teriflunomide
vemurafenib
linagliptin
perampanel




gadoxetic acid
icosapent ethyl
carfilzomib
linaclotide
mirabegron
tofacitinib
regorafenib
aclidinium
glucarpidase
enzalutamide
teduglutide
ponatinib
bedaquiline
formestane
fluticasone furoate
canagliflozin
dimethyl fumarate
glycerol phenylbutyrate
pomalidomide
trametinib
dabrafenib
afatinib
ferric carboxymaltose
levomilnacipran
dolutegravir
riociguat
macitentan
luliconazole
sofosbuvir
chlorcyclizine
magaldrate
isoxsuprine
isoxicam
isoconazole
isoaminile
iopanoic acid
iopamidol
inositol nicotinate
ifenprodil
hexetidine
gemeprost
fusafungine
fursultiamine
dimetotiazine
fluocortolone
fluclorolone acetonide
floctafenine
fenbufen
etidocaine
ethoheptazine
diosmin
articaine
canrenoic acid
butriptyline
bromhexine
bisacodyl
aliskiren
ledipasvir
vorapaxar
miltefosine
suvorexant
empagliflozin
eliglustat
efinaconazole
tavaborole
tedizolid phosphate
albiglutide
finafloxacin
netupitant
naloxegol
ceftolozane
ibrutinib
idelalisib
acipimox
amorolfine
atosiban
avibactam
cannabidiol
ceritinib
ciprofibrat



indium in-111 pentetate
krypton kr 81m
sodium phosphate
insulin beef
sodium carbonate
glycerin
sodium sulfate
indium in-111 oxyquinoline
enalaprilat
rubidium rb-82
iofetamine i-123
magnesium carbonate
potassium lactate
sodium fluorophosphate
iotrolan
acrivastine
indium in-111 chloride
cetyl alcohol
avobenzone
octinoxate
strontium chloride sr-89
thiosulfuric acid
ferric ammonium citrate
fludeoxyglucose (18f)
urea c-13
talc
simethicone
urea c-14
mequinol
hydroquinone
secretin porcine
secretin human
ecamsule
octocrylene
titanium dioxide
methyl salicylate
iobenguane sulfate i-123
thonzonium
dexchlorpheniramine maleate
sodium glycerophosphate
choline c-11
insulin degludec
ixazomib
levmetamfetamine
casein
egg white
egg yolk
oat
orange
black pepper
vanilla
fig
rhubarb
laurus nobilis
nutmeg
paprika
basil
stannous chloride
carboxymethylcellulose
povidone
octisalate
homosalate
selenic acid
phenyl salicylate
hydrogen fluoride
cetylpyridinium
dimethicone
hypromellose
trolamine salicylate
silver ni



ferric sulfate
zinc picolinate
zeaxanthin
exametazime
tetrofosmin
homatropine
rose bengal
light green sf yellowish
oftasceine
sulisobenzone
pentoxyverine
magnesium glycinate
pantethine
cobamamide
ferrous cysteine glycinate
aluminum zirconium octachlorohydrex gly
menthyl salicylate
panthenol
alcloxa
bemotrizinol
amiloxate
aluminum chlorohydrex propylene glycol
ferrous bisglycinate
arbutin
enzacamene
dioxybenzone
ethylhexyl methoxycrylene
methylcellulose
magnesium aluminum silicate
lycopene
thonzylamine
aluminum sulfate
cyclomethicone 5
copper gluconate
zinc gluconate
tocopherol
poloxamer 407
hexylresorcinol
chromium picolinate
levomefolic acid
fluoride ion
diacetyl benzoyl lathyrol
bisoctrizole
calcium glycerophosphate
sea salt
protocatechualdehyde
diethylamino hydroxybenzoyl hexyl benzoate
methyl undecenoyl leucinate
dihydroergocornine
dihydro-alpha-ergocryptine
epicriptine
aluminum zirconium pentachlorohydrex gly
dl-methylephedrine
brilliant green cation
diethyltoluamide
dl-dimyristoy



indium in-111 pentetreotide
angiotensin ii
revefenacin
brexanolone
apalutamide
valbenazine
deflazacort
selinexor
delafloxacin
lemborexant
duvelisib
dacomitinib
binimetinib
glasdegib
elagolix
entrectinib
benznidazole
diacerein
avatrombopag
abemaciclib
fostamatinib
alpelisib
tecovirimat
voxilaprevir
sarecycline
letermovir
gadolinium
telotristat ethyl
mannitol busulfan
vaborbactam
sultamicillin
lorlatinib
gilteritinib
erdafitinib
citicoline
deutetrabenazine
edaravone
triclabendazole
brigatinib
polihexanide
propiverine
nefopam
p-nitrobiphenyl
doravirine
dopexamine
cantharidin
eravacycline
rucaparib
temocillin
diaminopropanol tetraacetic acid
betrixaban
siponimod
relebactam
bromperidol
iobitridol
steviolbioside
omadacycline
taurolidine
lynestrenol
copanlisib
piritramide
fedratinib
mizolastine
nitrite
oxetacaine
benzodiazepine
mebeverine
pentetreotide
plazomicin
protionamide
perazine
fenpropidin
iodide
gaxilose
benserazide
dinoprost
boscalid
trifarotene
perflubutane
lefamulin
secnidazole
peg



ferric pyrophosphate citrate
magnesium acetate
baloxavir marboxil
alpha-tocopherol succinate
d-alpha-tocopherol acetate
alpha-tocopherol acetate
choline salicylate
pentetic acid
sodium bisulfite
bromotheophylline
fosnetupitant
benzoin
acetyl sulfisoxazole
dichlorobenzene
sodium zirconium cyclosilicate
imidurea
medronic acid
betiatide
bisphenol a diglycidyl ether
butylparaben
cianidanol
dimercaptosuccinic acid
linoleic acid
tetrakis(2-methoxyisobutylisocyanide)copper(i) tetrafluoroborate
alpha-arbutin
butylene glycol
silodrate
phenylethyl resorcinol
racementhol
p-phenylenediamine
distearyldimonium
chloric acid
phosphorus
synthetic camphor
microcrystalline cellulose




oxidronic acid
diazolidinylurea
dipentamethylenethiuram disulfide
alpha-amyl cinnamaldehyde
benzylparaben
propylparaben
tetramethylthiuram monosulfide
nickel sulfate
dichromate
geraniol
cinnamaldehyde
aripiprazole lauroxil
cinnamyl alcohol
hydroxycitronellal
isoeugenol
ethylenediamine
diphenylguanidine
n
ditiocarb zinc
zinc dibutyldithiocarbamate
4-(isopropylamino)diphenylamine
methylchloroisothiazolinone
quaternium-15
bromothalonil
thiohexam
morpholinylmercaptobenzothiazole
disperse blue 106
cobalt chloride
methylparaben
quinoline yellow ws
caviar
elm
wormwood
chamomile
dl-alpha-tocopherol
dl-alpha tocopheryl acetate
calcium phosphate dihydrate
sodium ascorbate
calcium ascorbate
magnesium ascorbate
zinc ascorbate
niacinamide ascorbate
zinc acetate
ferrous gluconate
ferrous succinate
ferrous ascorbate
ferrous fumarate
sodium molybdate
potassium acetate
potassium sulfate
potassium
sodium phosphate
sodium phosphate
sodium phosphate
sodium borate
lithium hydroxide
lithium citrate
lithium 

In [17]:
drugbank_fingerprints

{'lepirudin': [0,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  0,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  0,
  1,
  1,
  1,
  0,
  1,
  1,
  0,
  0,
  0,
  1,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  1,
  0,
  1,
  1,
  1,
  0,
  1,
  0,
  1,
  1,
  0,
  1,
  0,
  

In [42]:
drug_pair_to_drug_pairs = {}

edge_nodes = []

for key, value in drug_pair_to_fingerprints.items():
    drug_1 = key[0]
    drug_2 = key[1]
    drug_1_fp = value[0]
    drug_2_fp = value[1]
    
    best_drug_1_match = None
    best_drug_2_match = None
    
    max_jaccard_1 = 0
    max_jaccard_2 = 0
    
    for key, value in drugbank_fingerprints.items():
        drug_1_jaccard = jaccard_score(value, drug_1_fp)
        
        if drug_1_jaccard > max_jaccard_1 and key != drug_1 and key != drug_2:
            max_jaccard_1 = drug_1_jaccard
            best_drug_1_match = key
    
    for key, value in drugbank_fingerprints.items():
        drug_2_jaccard = jaccard_score(value, drug_2_fp)
        
        if drug_2_jaccard > max_jaccard_2 and key != drug_1 and key != drug_2:
            max_jaccard_2 = drug_1_jaccard
            best_drug_2_match = key
            
    jaccard = jaccard_score(drug_1_fp, drug_2_fp)
    
    if jaccard > 0.5 and jaccard != 1.0:    
        print([drug_1, drug_2, str(jaccard)])
        edge_nodes.append([drug_1, drug_2, str(jaccard)])
    
    if max_jaccard_1 > 0.5 and max_jaccard_1 != 1.0:    
        print([drug_1, best_drug_1_match, str(max_jaccard_1)])
        edge_nodes.append([drug_1, best_drug_1_match, str(max_jaccard_1)])
    
    if max_jaccard_2 > 0.5 and max_jaccard_2 != 1.0:
        print([drug_2, best_drug_2_match, str(max_jaccard_2)])
        edge_nodes.append([drug_2, best_drug_2_match, str(max_jaccard_2)])

['propranolol', 'pindolol', '0.6904761904761905']
['moxifloxacin', 'gatifloxacin', '0.7419354838709677']
['curcumin', 'ethyl ferulate', '0.6944444444444444']
['paclitaxel', 'docetaxel', '0.8255813953488372']
['paclitaxel', 'docetaxel', '0.8255813953488372']
['sorafenib', 'regorafenib', '0.8245614035087719']
['bendamustine', 'chlorambucil', '0.5306122448979592']
['diclofenac', 'aceclofenac', '0.75']
['caspofungin', 'anidulafungin', '0.5894736842105263']
['haloperidol', 'bromperidol', '0.8409090909090909']
['quinidine', 'quinine', '0.7636363636363637']
['azithromycin', 'erythromycin', '0.7972972972972973']
['epirubicin', 'doxorubicin', '0.9076923076923077']
['palbociclib', 'ribociclib', '0.6666666666666666']
['olmesartan', 'losartan', '0.6']
['succinylcholine', 'carbamoylcholine', '0.6363636363636364']
['vinorelbine', 'vinflunine', '0.8202247191011236']
['dexamethasone', 'betamethasone', '0.8545454545454545']
['flunarizine', 'cinnarizine', '0.8055555555555556']
['epinephrine', 'racepinep

['raloxifene', 'arzoxifene', '0.6909090909090909']
['desogestrel', 'etonogestrel', '0.75']
['ampicillin', 'amoxicillin', '0.8723404255319149']
['cefazolin', 'cefamandole', '0.581081081081081']
['vinorelbine', 'vinflunine', '0.8202247191011236']
['etoposide', 'teniposide', '0.7945205479452054']
['voriconazole', 'efinaconazole', '0.6440677966101694']
['sulpiride', 'sultopride', '0.8367346938775511']
['roxithromycin', 'clarithromycin', '0.725']
['roxithromycin', 'erythromycin', '0.7625']
['cytarabine', 'gemcitabine', '0.6444444444444445']
['estrone', 'estrone sulfate', '0.7555555555555555']
['abiraterone', 'prasterone', '0.5454545454545454']
['diclofenac', 'aceclofenac', '0.75']
['epinephrine', 'racepinephrine', '0.7586206896551724']
['paclitaxel', 'docetaxel', '0.8255813953488372']
['perindopril', 'ramipril', '0.7291666666666666']
['paromomycin', 'framycetin', '0.8653846153846154']
['ribociclib', 'palbociclib', '0.6666666666666666']
['nifedipine', 'nisoldipine', '0.8095238095238095']
['e

['rosiglitazone', 'pioglitazone', '0.5818181818181818']
['entecavir', 'penciclovir', '0.5192307692307693']
['paclitaxel', 'docetaxel', '0.8255813953488372']
['tamoxifen', 'toremifene', '0.775']
['sorafenib', 'regorafenib', '0.8245614035087719']
['sorafenib', 'regorafenib', '0.8245614035087719']
['nilotinib', 'imatinib', '0.5844155844155844']
['somatostatin', 'octreotide', '0.6666666666666666']
['paclitaxel', 'docetaxel', '0.8255813953488372']
['propranolol', 'pindolol', '0.6904761904761905']
['decitabine', 'azacitidine', '0.7317073170731707']
['docetaxel', 'paclitaxel', '0.8255813953488372']
['enalapril', 'captopril', '0.5098039215686274']
['enalapril', 'enalaprilat', '0.875']
['decitabine', 'azacitidine', '0.7317073170731707']
['raloxifene', 'arzoxifene', '0.6909090909090909']
['dolutegravir', 'bictegravir', '0.5342465753424658']
['abiraterone', 'prasterone', '0.5454545454545454']
['topotecan', 'irinotecan', '0.6024096385542169']
['pemetrexed', 'folic acid', '0.5873015873015873']
['at

In [46]:
edge_nodes = pd.DataFrame(edge_nodes)

In [50]:
edge_nodes = edge_nodes.drop_duplicates()

In [51]:
edge_nodes

Unnamed: 0,0,1,2
0,propranolol,pindolol,0.6904761904761905
1,moxifloxacin,gatifloxacin,0.7419354838709677
2,curcumin,ethyl ferulate,0.6944444444444444
3,paclitaxel,docetaxel,0.8255813953488372
5,sorafenib,regorafenib,0.8245614035087719
...,...,...,...
353,isepamicin,plazomicin,0.6219512195121951
357,aztreonam,ceftazidime,0.5540540540540541
358,pindolol,propranolol,0.6904761904761905
359,pindolol,oxprenolol,0.5652173913043478


In [52]:
edge_nodes.to_csv("edge_nodes.csv")

In [64]:
edge_nodes = pd.read_csv("edge_nodes.csv", index_col = 0)

In [65]:
edge_nodes

Unnamed: 0,0,1,2
0,propranolol,pindolol,0.690476
1,moxifloxacin,gatifloxacin,0.741935
2,curcumin,ethyl ferulate,0.694444
3,paclitaxel,docetaxel,0.825581
5,sorafenib,regorafenib,0.824561
...,...,...,...
353,isepamicin,plazomicin,0.621951
357,aztreonam,ceftazidime,0.554054
358,pindolol,propranolol,0.690476
359,pindolol,oxprenolol,0.565217


In [6]:
interactions = pd.read_csv("interactions.tsv", sep = "\t")

In [9]:
drug_targets = interactions[["drug_name", "gene_name"]]

In [14]:
drug_targets = drug_targets.dropna()

In [16]:
drug_targets["drug_name"] = drug_targets["drug_name"].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drug_targets["drug_name"] = drug_targets["drug_name"].str.lower()


In [17]:
drug_targets

Unnamed: 0,drug_name,gene_name
0,bms-387032,CDK7
1,nifekalant,VDR
2,bisacodyl,GPR55
3,pentachlorophenol,NR1H4
5,prednisone,APOE
...,...,...
85449,chembl541400,CSF1R
85452,chembl546797,CDK8
85455,infigratinib,PIK3C3
85458,resveratrol,PRKCA


In [51]:
drugbank_dataframe = pd.DataFrame(drugbank, columns = ["drug"])

In [52]:
mapped_drugbank = pd.merge(drugbank_dataframe, drug_targets, left_on = "drug", right_on = "drug_name", how = "left")[["drug", "gene_name"]]

In [58]:
mapped_drugbank = mapped_drugbank.dropna()

In [69]:
mapped_drugbank = mapped_drugbank.groupby('drug')['gene_name'].agg(list).dropna().reset_index()

In [70]:
mapped_drugbank_dictionary = mapped_drugbank.set_index('drug')['gene_name'].to_dict()

In [71]:
mapped_drugbank_dictionary

{'2-mercaptobenzothiazole': ['IDO1',
  'VDR',
  'KDM4E',
  'ALDH1A1',
  'EHMT2',
  'NFE2L2',
  'KAT2A',
  'HPGD'],
 'abacavir': ['LST1',
  'ABCC3',
  'HLA-B',
  'ADH1A',
  'ABCC2',
  'LTB',
  'ABCC1',
  'ADH1C',
  'ADH1B',
  'LTA',
  'ADK'],
 'abaloparatide': ['PTH1R', 'PTH2R', 'PTH1R'],
 'abarelix': ['GNRHR', 'KLK3', 'GNRHR', 'GNRHR', 'GNRHR', 'GNRHR'],
 'abatacept': ['CD80',
  'CD86',
  'CD80',
  'CD86',
  'CTLA4',
  'CD80',
  'CD80',
  'CD86',
  'CD80',
  'CD80',
  'CD86',
  'CD86',
  'CD86',
  'CD80'],
 'abciximab': ['ITGB3',
  'ITGAV',
  'FGB',
  'ITGA2B',
  'FGA',
  'ITGA2B',
  'ITGB3',
  'ITGA2B',
  'ITGA2B',
  'ITGB3',
  'FGG',
  'ITGB3'],
 'abemaciclib': ['CDKN2A',
  'KRAS',
  'CDK4',
  'CCND3',
  'CDK6',
  'KRAS',
  'SMARCA4',
  'CDK4',
  'PGR',
  'ESR1',
  'NRAS',
  'CDKN2A',
  'CDK4',
  'ESR1',
  'CDK4',
  'ERBB2',
  'CDK4',
  'ESR2',
  'CDK4',
  'CCND1',
  'CDK6',
  'TP53',
  'ERBB2',
  'CDK6',
  'CDKN2A',
  'CDK6',
  'CCND2'],
 'abiraterone': ['DSE',
  'PTEN',
  'CYP17A1'

In [66]:
edge_nodes

Unnamed: 0,0,1,2
0,propranolol,pindolol,0.690476
1,moxifloxacin,gatifloxacin,0.741935
2,curcumin,ethyl ferulate,0.694444
3,paclitaxel,docetaxel,0.825581
5,sorafenib,regorafenib,0.824561
...,...,...,...
353,isepamicin,plazomicin,0.621951
357,aztreonam,ceftazidime,0.554054
358,pindolol,propranolol,0.690476
359,pindolol,oxprenolol,0.565217


In [100]:
edges = []

for index in range(len(edge_nodes)):
    try:
        set_1 = set(mapped_drugbank_dictionary[edge_nodes.iloc[index, 0]])
        set_2 = set(mapped_drugbank_dictionary[edge_nodes.iloc[index, 1]])
        intersection = set_1.intersection(set_2)
        
        if len(intersection) > 0:
            edges.append([edge_nodes.iloc[index, 0], edge_nodes.iloc[index, 1], list(intersection)])        
    except:
        continue

In [101]:
edges = pd.DataFrame(edges, columns = ["edge_1", "edge_2", "geneset"])

In [102]:
edges

Unnamed: 0,edge_1,edge_2,geneset
0,propranolol,pindolol,"[ADRB1, ADRB2]"
1,paclitaxel,docetaxel,"[PIK3CA, GSTM1, TUBB3, ABCC1, KRAS, NOTCH1, SO..."
2,sorafenib,regorafenib,"[PIK3CA, KIT, FLT1, ABL1, YES1, KRAS, FLT4, FG..."
3,diclofenac,aceclofenac,[CYP2C9]
4,haloperidol,bromperidol,"[CYP2D6, ABCB1, CYP3A4, DRD2]"
...,...,...,...
138,atracurium,atracurium besylate,[CHRNA1]
139,lansoprazole,dexlansoprazole,"[ATP4A, CYP2C19, ATP4B]"
140,terbutaline,clenbuterol,[ADRB2]
141,pindolol,propranolol,"[ADRB1, ADRB2]"


In [117]:
edge_nodes.columns = ["edge_1", "edge_2", "similarity"]

In [118]:
edge_nodes

Unnamed: 0,edge_1,edge_2,similarity
0,propranolol,pindolol,0.690476
1,moxifloxacin,gatifloxacin,0.741935
2,curcumin,ethyl ferulate,0.694444
3,paclitaxel,docetaxel,0.825581
5,sorafenib,regorafenib,0.824561
...,...,...,...
353,isepamicin,plazomicin,0.621951
357,aztreonam,ceftazidime,0.554054
358,pindolol,propranolol,0.690476
359,pindolol,oxprenolol,0.565217


In [114]:
edges

Unnamed: 0,edge_1,edge_2,geneset
0,propranolol,pindolol,"[ADRB1, ADRB2]"
1,paclitaxel,docetaxel,"[PIK3CA, GSTM1, TUBB3, ABCC1, KRAS, NOTCH1, SO..."
2,sorafenib,regorafenib,"[PIK3CA, KIT, FLT1, ABL1, YES1, KRAS, FLT4, FG..."
3,diclofenac,aceclofenac,[CYP2C9]
4,haloperidol,bromperidol,"[CYP2D6, ABCB1, CYP3A4, DRD2]"
...,...,...,...
138,atracurium,atracurium besylate,[CHRNA1]
139,lansoprazole,dexlansoprazole,"[ATP4A, CYP2C19, ATP4B]"
140,terbutaline,clenbuterol,[ADRB2]
141,pindolol,propranolol,"[ADRB1, ADRB2]"


In [119]:
merged_df = pd.merge(edges, edge_nodes, how='inner', on=['edge_1', 'edge_2'])

In [120]:
merged_df

Unnamed: 0,edge_1,edge_2,geneset,similarity
0,propranolol,pindolol,"[ADRB1, ADRB2]",0.690476
1,paclitaxel,docetaxel,"[PIK3CA, GSTM1, TUBB3, ABCC1, KRAS, NOTCH1, SO...",0.825581
2,sorafenib,regorafenib,"[PIK3CA, KIT, FLT1, ABL1, YES1, KRAS, FLT4, FG...",0.824561
3,diclofenac,aceclofenac,[CYP2C9],0.750000
4,haloperidol,bromperidol,"[CYP2D6, ABCB1, CYP3A4, DRD2]",0.840909
...,...,...,...,...
138,atracurium,atracurium besylate,[CHRNA1],0.844828
139,lansoprazole,dexlansoprazole,"[ATP4A, CYP2C19, ATP4B]",0.840000
140,terbutaline,clenbuterol,[ADRB2],0.666667
141,pindolol,propranolol,"[ADRB1, ADRB2]",0.690476


In [121]:
merged_df.to_csv("output.csv")