In [26]:
puts `head -3 "./raw-data/Drug-Diseasetriples.tsv"`

﻿Drug	Drug_id	Disease	Disease_id
2,2-dichloro-1,1-difluoroethyl difluoromethyl ether	C0613621	Hereditary Diseases	C0019247
Valproic Acid	C0042291	Neurodegenerative Disorders	C0524851


# Drug_id is a UMLS term



# Map UMLS to PubChem CUI and formal name

In [51]:
require 'rest-client'

def map_umls_to_cid(cui)
  api_key = ENV["APIKEY"] # Replace with BioPortal API key
  begin
    # cui = "C0019247"
    url = "https://data.bioontology.org/search?q=#{cui}&ontologies=MESH&require_exact_match=true&apikey=#{api_key}"
    warn url
    response = RestClient.get(url)
  rescue
    warn "umls lookup failed #{response.inspect}"
    return false
  end
  mappings = []
  data = JSON.parse(response)

#  if hit = data.dig('collection', 0)
  if hit = data.dig('collection')
    hit.each do |h|
      compound_name = h&.dig('prefLabel')
      mesh = h&.dig('@id')
      linksurl = h&.dig('links', 'mappings')
      mappings << { cui: cui, mesh: mesh, compound_name: compound_name, linksurl: linksurl}
    end
    return mappings
  else
    warn "No data found for #{cui}\n"
    return false
  end

rescue StandardError => e
  warn "No data found for #{cui} Error: #{e.inspect}\n"
  return false
end

# Example
puts map_umls_to_cid('C0613621')
  

https://data.bioontology.org/search?q=C0613621&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243


{:cui=>"C0613621", :mesh=>"http://purl.bioontology.org/ontology/MESH/C030536", :compound_name=>"2,2-dichloro-1,1-difluoroethyl difluoromethyl ether", :linksurl=>"https://data.bioontology.org/ontologies/MESH/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FMESH%2FC030536/mappings"}


In [52]:
require 'rest-client'
def map_name_to_cid(name)
  # PubChem REST is stupid, and consumes names that are only partially URI encoded!  (spaces substituted) and rejects fully URI encoded strings!
  # so I a forced to roll my own URI escaper... so stupid!
  name = name.gsub(/\s/, "%20")
  url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/#{name}/cids/JSON"
  warn url
  begin
    response = RestClient.get(url)
  rescue
    warn "name lookup failed #{response.inspect}"
    return false
  end
  data = JSON.parse(response)
#   warn JSON.pretty_generate data
#   abort

  cids = JSON.parse(response.body).dig('IdentifierList', 'CID')
  
  { name: name, cid: cids&.first || 'No CID found' }
  
end

# Example
mappings = map_umls_to_cid('C0613621')
mappings.each do |map|
  warn map_name_to_cid(map[:compound_name])
end




https://data.bioontology.org/search?q=C0613621&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/2,2-dichloro-1,1-difluoroethyl%20difluoromethyl%20ether/cids/JSON
{:name=>"2,2-dichloro-1,1-difluoroethyl%20difluoromethyl%20ether", :cid=>152803}


[{:cui=>"C0613621", :mesh=>"http://purl.bioontology.org/ontology/MESH/C030536", :compound_name=>"2,2-dichloro-1,1-difluoroethyl difluoromethyl ether", :linksurl=>"https://data.bioontology.org/ontologies/MESH/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FMESH%2FC030536/mappings"}]

# Iteration over all UMLS terms

In [56]:
require 'json'
require 'faraday'
require 'csv'

CSVFILE = "./raw-data/Drug-Diseasetriples.tsv".freeze
OUTPUT = "./mappings/drug-mappings.tsv".freeze
ERRORFILE = "./raw-data/druf-mapping-errors.txt".freeze

error = File.open(ERRORFILE, "w")
out = File.open(OUTPUT, "w")
out.write CSV.generate_line(["demokratis_umlsid","demokratis_label","pubchem_cid","IUPACname"])


CSV.foreach(CSVFILE, headers: true, col_sep: "\t") do |row|

  # Drug	Drug_id	    Disease	   Disease_id
  name = row["Drug"]
  umls = row["Drug_id"]
  # first lookup cui
  mappings = map_umls_to_cid(umls)
  unless mappings
    warn "failed lookup for #{umls} #{name}"
    error.write "failed lookup for #{umls} #{name}"
    next
  end
  
  result = []
  mappings.each do |map|
    hash = map_name_to_cid(map[:compound_name])
    unless hash
      warn "failed name lookup for UMLS #{umls} #{map[:compound_name]}"
      error.write "failed name lookup for UMLS #{umls} #{map[:compound_name]}"
      next
    end
    cid = hash[:cid]
    cid_guid = "https://pubchem.ncbi.nlm.nih.gov/compound/#{cid}"
    out.write CSV.generate_line(["http://purl.bioontology.org/ontology/MESH/#{umls}",name,cid_guid,map[:name]])
  end
end

puts "DONE!"
  
  
  

https://data.bioontology.org/search?q=C0613621&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/2,2-dichloro-1,1-difluoroethyl%20difluoromethyl%20ether/cids/JSON
https://data.bioontology.org/search?q=C0042291&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0026457&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Monoamine%20Oxidase%20Inhibitors/cids/JSON
name lookup failed nil
failed name lookup for UMLS C0026457 Monoamine Oxidase Inhibitors
https://data.bioontology.org/search?q=C0059747&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/ethyl%20acetate/cids/JSON
https://data.bioontology.org/search?q=C1567285&ontologies=MESH&require_exact_mat

https://data.bioontology.org/search?q=C0041044&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0013030&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Dopamine/cids/JSON
https://data.bioontology.org/search?q=C0054201&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0009079&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0649350&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Rivastigmine/cids/JSON
https://data.bioontology.org/search?q=C0016967&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0527316&ontolog

https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/epalrestat/cids/JSON
https://data.bioontology.org/search?q=C1176309&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0299792&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Aripiprazole/cids/JSON
https://data.bioontology.org/search?q=C0772257&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0244821&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0012582&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0074710&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/n

https://data.bioontology.org/search?q=C0008845&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0085228&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C1533126&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0085208&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0245561&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Duloxetine%20Hydrochloride/cids/JSON
https://data.bioontology.org/search?q=C0074393&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0034263&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-b

https://data.bioontology.org/search?q=C0017725&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0074414&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Sevoflurane/cids/JSON
https://data.bioontology.org/search?q=C0071097&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0026549&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0242531&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0039601&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0246631&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f9724

https://data.bioontology.org/search?q=C0031412&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C2983812&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0126174&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0540623&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0075632&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Sumatriptan/cids/JSON
https://data.bioontology.org/search?q=C0008286&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0033229&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f9724

https://data.bioontology.org/search?q=C1996349&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C2348308&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C1995652&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0028351&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0699680&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0645102&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/oleyloxyethylphosphorylcholine/cids/JSON
https://data.bioontology.org/search?q=C0013030&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-43

https://data.bioontology.org/search?q=C0004147&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0076107&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0073096&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0053799&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0022860&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0070166&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0001927&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0014479&ontologies=MESH&require_exact

https://data.bioontology.org/search?q=C0107994&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0019590&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Histamine%20Antagonists/cids/JSON
name lookup failed nil
failed name lookup for UMLS C0019590 Histamine Antagonists
https://data.bioontology.org/search?q=C0282151&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0032594&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Polysaccharides/cids/JSON
name lookup failed nil
failed name lookup for UMLS C0032594 Polysaccharides
https://data.bioontology.org/search?q=C0070302&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://da

https://data.bioontology.org/search?q=C1306772&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0073992&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0032952&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0873137&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0027358&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0258259&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/3-(2,4-dimethoxybenzylidene)anabaseine/cids/JSON
https://data.bioontology.org/search?q=C0025859&ontologies=MESH&require_exact_match=true&apikey=74027bd8

https://data.bioontology.org/search?q=C0025677&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C1174893&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/levocetirizine/cids/JSON
https://data.bioontology.org/search?q=C2930696&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0003360&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C1870115&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0077857&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0257343&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f9

https://data.bioontology.org/search?q=C2983812&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C1870115&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0083701&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/naftopidil/cids/JSON
https://data.bioontology.org/search?q=C0025242&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0126174&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C2983812&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0024808&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243

https://data.bioontology.org/search?q=C0016365&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0003360&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0048996&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/5-(3-hydroxybenzoyl)-2-thiophenesulfonamide/cids/JSON
https://data.bioontology.org/search?q=C0003360&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0077768&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://data.bioontology.org/search?q=C0001617&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Adrenal%20Cortex%20Hormones/cids/JSON
name

https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Ivabradine/cids/JSON
https://data.bioontology.org/search?q=C0095278&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/epalrestat/cids/JSON
https://data.bioontology.org/search?q=C1101148&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/bilastine/cids/JSON
https://data.bioontology.org/search?q=C0541746&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Thiazides/cids/JSON
name lookup failed nil
failed name lookup for UMLS C0541746 Thiazides
https://data.bioontology.org/search?q=C0006982&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/Carbidopa/cids/JSON
https://data.bioonto

name lookup failed nil
failed name lookup for UMLS C0042890 Vitamins


DONE!
