In [26]:
puts `head -3 "./raw-data/Drug-Diseasetriples.tsv"`

﻿Drug	Drug_id	Disease	Disease_id
2,2-dichloro-1,1-difluoroethyl difluoromethyl ether	C0613621	Hereditary Diseases	C0019247
Valproic Acid	C0042291	Neurodegenerative Disorders	C0524851


# Drug_id is a MeSH Supplementary Concept Record (SCR) identifier from the Medical Subject Headings (MeSH) vocabulary,



# Map MeSH to PubChem CUI and formal name

In [36]:
require 'rest-client'

def map_umls_to_cid(cui)
  api_key = ENV["APIKEY"] # Replace with BioPortal API key
  begin
    # cui = "C0019247"
    url = "https://data.bioontology.org/search?q=#{cui}&ontologies=MESH&require_exact_match=true&apikey=#{api_key}"
    warn url
    response = RestClient.get(url)
  rescue
    warn "lookup failed #{response.inspect}"
    return false
  end
  mappings = []
  data = JSON.parse(response)

#  if hit = data.dig('collection', 0)
  if hit = data.dig('collection')
    hit.each do |h|
      compound_name = h&.dig('prefLabel')
      mesh = h&.dig('@id')
      linksurl = h&.dig('links', 'mappings')
      mappings << { cui: cui, mesh: mesh, compound_name: compound_name, linksurl: linksurl}
    end
    return mappings
  else
    warn "No data found for #{cui}\n"
    return false
  end

rescue StandardError => e
  warn "No data found for #{cui} Error: #{e.inspect}\n"
  return false
end

# Example
puts map_umls_to_cid('C0613621')
  

https://data.bioontology.org/search?q=C0613621&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243


{:cui=>"C0613621", :mesh=>"http://purl.bioontology.org/ontology/MESH/C030536", :compound_name=>"2,2-dichloro-1,1-difluoroethyl difluoromethyl ether", :linksurl=>"https://data.bioontology.org/ontologies/MESH/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FMESH%2FC030536/mappings"}


In [40]:
require 'rest-client'
def map_name_to_cid(name)
  # PubChem REST is stupid, and consumes names that are only partially URI encoded!  (spaces substituted) and rejects fully URI encoded strings!
  # so I a forced to roll my own URI escaper... so stupid!
  name = name.gsub(/\s/, "%20")
  url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/#{name}/cids/JSON"
  warn url
  begin
    response = RestClient.get(url)
  rescue
    warn "lookup failed #{response.inspect}"
    return false
  end
  data = JSON.parse(response)
  warn JSON.pretty_generate data
  abort

  if response.status == 200
    cids = JSON.parse(response.body).dig('IdentifierList', 'CID')
    { name: name, cid: cids&.first || 'No CID found' }
  else
    { name: name, error: "No CID found (Status: #{response.status})" }
  end
rescue Faraday::TimeoutError
  { name: name, error: 'Timeout error' }
rescue StandardError => e
  { name: name, error: "Error: #{e.message}" }
end

# Example
mappings = map_umls_to_cid('C0613621')
mappings.each do |map|
  
  warn map_name_to_cid(map[:compound_name])
end




https://data.bioontology.org/search?q=C0613621&ontologies=MESH&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
{:name=>"2,2-dichloro-1,1-difluoroethyl difluoromethyl ether", :error=>"Error: undefined method `encode' for URI:Module"}


[{:cui=>"C0613621", :mesh=>"http://purl.bioontology.org/ontology/MESH/C030536", :compound_name=>"2,2-dichloro-1,1-difluoroethyl difluoromethyl ether", :linksurl=>"https://data.bioontology.org/ontologies/MESH/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FMESH%2FC030536/mappings"}]

In [28]:
def map_cid_to_iupacname(cid)
  name = ""
  # https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/119/JSON
  conn = Faraday.new(url: 'https://pubchem.ncbi.nlm.nih.gov')
  response = conn.get('/rest/pug/compound/cid/' + cid.to_s + '/JSON', { 'Content-Type' => 'application/json' })

  if response.status == 200
#         {
#           "urn": {
#             "label": "IUPAC Name",
#             "name": "Allowed",
#             "datatype": 1,
#             "version": "2.7.0",
#             "software": "Lexichem TK",
#             "source": "OpenEye Scientific Software",
#             "release": "2025.04.14"
#           },
#           "value": {
#             "sval": "4-aminobutanoic acid"
#           }
#         },
    json = JSON.parse(response.body)
    found = false
    json["PC_Compounds"][0]["props"].each do |prop|
      next unless prop["urn"]["label"] == "IUPAC Name"
      if prop["urn"]["name"] == "Preferred"
        name = prop["value"]["sval"]
        found = true
      end
    end
    warn "CID #{cid} didnt map at all" unless found
    return false unless found
  end
  return {name: name}

rescue StandardError => e
  warn "Error: #{e.inspect}"
  return false
end

# Example
puts map_cid_to_iupacname('119')

{:name=>"4-aminobutanoic acid"}


# Iteration over all mesh terms

In [32]:
require 'json'
require 'faraday'
require 'csv'

CSVFILE = "./raw-data/Drug-Diseasetriples.tsv".freeze
OUTPUT = "./mappings/drug-mappings.tsv".freeze

out = File.open(OUTPUT, "w")
out.write CSV.generate_line(["demokratis_meshid","demokratis_label","CID","IUPACname"])


CSV.foreach(CSVFILE, headers: true, col_sep: "\t") do |row|

  # Drug	Drug_id	    Disease	   Disease_id
  name = row["Drug"]
  mesh = row["Drug_id"]
  # first lookup cui
  hash = map_umls_to_cid(mesh)  # might return false
#   puts hash
  if hash
    cid = hash[:cid]
    response = map_cid_to_iupacname(cid)
    if response
      hash.merge!(map_cid_to_iupacname(cid))
    else
      next
    end
  else
    next
  end
  cid_guid = "https://pubchem.ncbi.nlm.nih.gov/compound/#{cid}"
#   puts CSV.generate_line([mesh,biovistaname,cid,hash[:name]])
  out.write CSV.generate_line(["http://purl.bioontology.org/ontology/MESH/#{mesh}",name,cid_guid,hash[:name]])
end

puts "DONE!"
  
  
  

https://data.bioontology.org/search?q=C0613621&ontologies=RXNORM,SNOMEDCT,DRUGBANK&require_exact_match=true&apikey=74027bd8-6be0-4329-be22-aa3717f97243
