In [1]:
puts `ls -l raw-data`


total 484
-rw-rw-r-- 1 osboxes osboxes  34777 Aug 12 17:45 Demokritos-KG-information.xlsx
-rw-rw-r-- 1 osboxes osboxes 207331 Aug 12 17:45 Disease-Therapeutic_Area.tsv
-rw-rw-r-- 1 osboxes osboxes  36869 Aug 12 17:45 Drug-Diseasetriples.tsv
-rw-rw-r-- 1 osboxes osboxes 111643 Aug 12 17:45 Drug-Drug_type.tsv
-rw-rw-r-- 1 osboxes osboxes  32600 Aug 12 17:45 Drug-Genetriples.tsv
-rw-rw-r-- 1 osboxes osboxes  51800 Aug 12 17:45 Gene-Diseasetriples.tsv
-rw-rw-r-- 1 osboxes osboxes   7965 Aug 12 17:45 Gene-Pathwaytriples.tsv


In [6]:
puts `head -2 "raw-data/Drug-Diseasetriples.tsv"`
puts `head -2 "raw-data/Disease-Therapeutic_Area.tsv"`
puts `head -2 "raw-data/Gene-Diseasetriples.tsv"`


﻿Drug	Drug_id	Disease	Disease_id
2,2-dichloro-1,1-difluoroethyl difluoromethyl ether	C0613621	Hereditary Diseases	C0019247
Disease	Disease_id	Therapeutic_area	Therapeutic_area_id	
Third cranial nerve disorder	C0271353	Cranial nerve diseases	C0010266	
﻿Gene	Gene_id	Disease	Disease_id
WASF1 gene	C1421479	Hereditary Diseases	C0019247


In [7]:
require 'csv'

cuisall = {}
["./raw-data/Drug-Diseasetriples.tsv","raw-data/Disease-Therapeutic_Area.tsv","raw-data/Gene-Diseasetriples.tsv"].each do |fn|
  CSV.foreach(fn, headers: true, col_sep: "\t") do |row|
      cuisall[row["Disease_id"]] = 1  # this is a UMLS CUI
  end
end
cuis = cuisall.keys
puts cuis.length

3431


In [8]:
require "rest-client"
require 'json'


def map_cui_to_disease(cui)
  api_key = ENV["APIKEY"] # Replace with BioPortal API key
  begin
    # cui = "C0019247"
    response = RestClient.get("https://data.bioontology.org/search?q=#{cui}&ontologies=SNOMEDCT&require_exact_match=true&apikey=#{api_key}")
  rescue
    return false
  end
  mappings = []
  data = JSON.parse(response)

#  if hit = data.dig('collection', 0)
  if hit = data.dig('collection')
    hit.each do |h|
      disease_name = h&.dig('prefLabel')
      snomed = h&.dig('@id')
      linksurl = h&.dig('links', 'mappings')
      mappings << { cui: cui, snomed: snomed, disease_name: disease_name, linksurl: linksurl}
    end
    return mappings
  else
    warn "No data found for #{cui}\n"
    return false
  end

rescue StandardError => e
  warn "No data found for #{cui} Error: #{e.inspect}\n"
  return false
end




def orphanet_lookup(linksurl)
  api_key = ENV["APIKEY"] # Replace with BioPortal API key
  linksurl = linksurl + "?apikey=#{api_key}"
#   warn linksurl
  response = RestClient.get(linksurl)  # returns a list of individual mappings
  r = JSON.parse(response)
  orpha = ""
  r.each do |map|
#     warn map.class
    classes = map.dig('classes')
    classes.each do |cclass|  # "@id": "http://www.orpha.net/ORDO/Orphanet_98755","@type": "http://www.w3.org/2002/07/owl#Class",
      id = cclass.dig('@id')
      orpha = id if id =~ /ORDO/  # can also get MONDO and DOID here!
    end
  end
  return {orpha_id: orpha}
rescue StandardError => e
  warn "error #{e.inspect}"
  return false
end


:orphanet_lookup

In [11]:
require "rest-client"
require 'csv'

f = File.open('./mappings/disease-mappings.map', 'w')
e = File.open('./mappings/disease-errors.txt', 'w')
f.sync = true # Ensure immediate writes
e.sync = true # Ensure immediate writes
f.write CSV.generate_line(["source","snomedct","orpha","prefname"])


cuis.each do |cui|
  cui_result = map_cui_to_disease(cui) # [LIST{:cui=>"C0019247", :snomed=>"http://purl.bioontology.org/ontology/SNOMEDCT/32895009", :disease_name=>"Hereditary disease", :linksurl=>"https://data.bioontology.org/ontologies/SNOMEDCT/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FSNOMEDCT%2F32895009/mappings"}
#   warn  cui_result
  # example
#   {:cui=>"C0019247", :snomed=>"http://purl.bioontology.org/ontology/SNOMEDCT/32895009", :disease_name=>"Hereditary disease", :linksurl=>"https://data.bioontology.org/ontologies/SNOMEDCT/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FSNOMEDCT%2F32895009/mappings"}
#   {:cui=>"C0019247", :snomed=>"http://purl.bioontology.org/ontology/SNOMEDCT/782964007", :disease_name=>"Genetic disease", :linksurl=>"https://data.bioontology.org/ontologies/SNOMEDCT/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FSNOMEDCT%2F782964007/mappings"}
#   abort
  
  if cui_result == false
    e.write "error getting #{cui}\n"
    next
  end
  cui_result.each do |res|
    # warn res.class
    linksurl = res[:linksurl]
    orpha = orphanet_lookup(linksurl)
    warn "orpha #{orpha}"
    orphacode = orpha[:orpha_id]
    orphacode = "https://fake.orpha.net/not-found" if orphacode&.empty?
    f.write CSV.generate_line([cui,res[:snomed],orphacode,res[:disease_name]])
  end
    
end

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_95"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_69"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_98808"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_98473"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_619284"}
orpha {:orpha_id=>""}
orpha {:orpha

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_1560"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_251295"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_98980"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_207"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_53271"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:or

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_83476"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_83593"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/

orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_648562"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_685"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_274"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_98434"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_806"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha

orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_2133"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_90039"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_2132"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_353253"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_68356"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_955"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_i

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_3032"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_303"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_304"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_305"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_83597"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_137817"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>

orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_2909"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_352763"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_209"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_263432"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_542592"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_66646"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_66646"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_309144"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_760"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_476113"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_397596"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_504523"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_379"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_3385"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_108"}
orpha {:orpha_id=>""

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_180"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_98979"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:or

orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_2670"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_33108"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_96061"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_66631"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_531"}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_93308"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>"http://www.orpha.net/ORDO/Orphanet_98955"}
orpha {:orpha_id=>""}
orpha {:orpha_id=>""}


["C0019247", "C0524851", "C0678236", "C0007760", "C0026650", "C0242422", "C0024408", "C0011168", "C0002395", "C0030567", "C0013384", "C0015397", "C0679225", "C0742034", "C0694564", "C0262405", "C0085078", "C0027765", "C0018889", "C0035258", "C0031117", "C0004096", "C0016719", "C0442874", "C0520679", "C0042769", "C0002726", "C0150055", "C0014544", "C0242567", "C0011860", "C0270764", "C2931688", "C0006111", "C1851920", "C0020459", "C0021167", "C0026850", "C0011847", "C0027404", "C0028754", "C0003123", "C1145670", "C0021831", "C0085584", "C0027126", "C0026848", "C0025517", "C3714844", "C0032285", "C1290884", "C0751651", "C0023264", "C0268237", "C1857355", "C0001125", "C0917796", "C2982732", "C0038454", "C0035078", "C0221505", "C0221355", "C0494475", "C0878544", "C0149931", "C0020615", "C0006112", "C0033377", "C0038220", "C0268595", "C0155320", "C0022658", "C0027868", "C0036690", "C0011849", "C0023890", "C0042345", "C0018799", "C0022661", "C0031485", "C0268465", "C0423092", "C0020538", "C0