In [1]:
puts `ls -l raw-data`


total 484
-rw-rw-r-- 1 osboxes osboxes  34777 May 29 07:33 Demokritos-KG-information.xlsx
-rw-rw-r-- 1 osboxes osboxes 207331 May 29 07:33 Disease-Therapeutic_Area.tsv
-rw-rw-r-- 1 osboxes osboxes  36869 May 29 07:33 Drug-Disease triples.tsv
-rw-rw-r-- 1 osboxes osboxes 111643 May 29 09:32 Drug-Drug_type.tsv
-rw-rw-r-- 1 osboxes osboxes  32600 May 29 07:33 Drug-Gene triples.tsv
-rw-rw-r-- 1 osboxes osboxes  51800 May 29 07:33 Gene-Disease triples.tsv
-rw-rw-r-- 1 osboxes osboxes   7965 May 29 07:33 Gene-Pathway triples.tsv


In [2]:
puts `head "raw-data/Disease-Therapeutic_Area.tsv"`

Disease	Disease_id	Therapeutic_area	Therapeutic_area_id	
Third cranial nerve disorder	C0271353	Cranial nerve diseases	C0010266	
Other specified trigeminal nerve disorders	C0029834	Cranial nerve diseases	C0010266	
Marcus Gunn phenomenon	C0266521	Cranial nerve diseases	C0010266	
Cranial nerve palsies	C0151311	Cranial nerve diseases	C0010266	
Disorders of other cranial nerves	C0154730	Cranial nerve diseases	C0010266	
Cranial Nerve Neoplasms, Malignant	C0153644	Cranial nerve diseases	C0010266	
Disorder of the optic nerve	C0029132	Cranial nerve diseases	C0010266	
Facial Nerve Diseases	C0015464	Cranial nerve diseases	C0010266	
Superior Oblique Palsy, Neurogenic	C0751939	Cranial nerve diseases	C0010266	


# Column 4 = MedGen CUI

UMLS requires an API key, that has onerous reporting requirements.  

Skip it and use OBO, where the ICD vocabularies use the same CUI

e.g. https://data.bioontology.org/search?q=C0010266&ontologies=ICD10CM&require_exact_match=true&apikey=...

In [13]:
# puts `curl -H "Accept: application/json" "https://mygene.info/v3/query?q=C1538301&fields=symbol,ensembl.gene,uniprot"`
require "rest-client"
require 'json'

apikey = ENV["APIKEY"] # Replace with BioPortal API key

puts JSON.pretty_generate(JSON.parse(RestClient.get("https://data.bioontology.org/search?q=C0010266&ontologies=ICD10CM&require_exact_match=true&apikey=#{apikey}").body))


{
  "page": 1,
  "pageCount": 1,
  "totalCount": 1,
  "prevPage": null,
  "nextPage": null,
  "links": {
    "nextPage": null,
    "prevPage": null
  },
  "collection": [
    {
      "prefLabel": "Cranial nerve disorder, unspecified",
      "cui": [
        "C0010266"
      ],
      "semanticType": [
        "T047"
      ],
      "obsolete": false,
      "matchType": "cui",
      "ontologyType": "ONTOLOGY",
      "provisional": false,
      "@id": "http://purl.bioontology.org/ontology/ICD10CM/G52.9",
      "@type": "http://www.w3.org/2002/07/owl#Class",
      "links": {
        "self": "https://data.bioontology.org/ontologies/ICD10CM/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FICD10CM%2FG52.9",
        "ontology": "https://data.bioontology.org/ontologies/ICD10CM",
        "children": "https://data.bioontology.org/ontologies/ICD10CM/classes/http%3A%2F%2Fpurl.bioontology.org%2Fontology%2FICD10CM%2FG52.9/children",
        "parents": "https://data.bioontology.org/ontologies/ICD

In [15]:

require "rest-client"
require 'json'


def map_cui_to_ICD(cui)
  api_key = ENV["APIKEY"] # Replace with BioPortal API key
  begin
    response = RestClient.get("https://data.bioontology.org/search?q=#{cui}&ontologies=ICD10&require_exact_match=true&apikey=#{api_key}")
  rescue
    return false
  end
  mappings = []
  data = JSON.parse(response)

  if hit = data.dig('collection')
    hit.each do |h|
      therapeuticarea = h&.dig('prefLabel')
      icd10 = h&.dig('@id')
      mappings << { cui: cui, therapeuticarea: therapeuticarea, icd10: icd10}
    end
    return mappings
  else
    warn "No data found for #{cui}\n"
    return false
  end

rescue StandardError => e
  warn "No data found for #{cui} Error: #{e.inspect}\n"
  return false
end




:map_cui_to_ICD

In [17]:

# Example usage
cuis = ['C0010266'] 
results = cuis.map do |cui|
  cui_result = map_cui_to_ICD(cui)
  puts cui_result
#   abort
  next if cui_result == false

end

{:cui=>"C0010266", :therapeuticarea=>"Cranial nerve disorder, unspecified", :icd10=>"http://purl.bioontology.org/ontology/ICD10/G52.9"}


[nil]

# The MedGen CUI to ICD10 Mapping





In [28]:
require 'csv'

f = File.open('./mappings/therapeutic-mappings.csv', 'w')
e = File.open('./mappings/therapeutic-errors.txt', 'w')
f.sync = true # Ensure immediate writes
e.sync = true # Ensure immediate writes
f.write CSV.generate_line(["source","icd10","prefname"])


CSVFILE="raw-data/Disease-Therapeutic_Area.tsv"

seen={}
CSV.foreach(CSVFILE, headers: true, col_sep: "\t") do |row|
  next if row.size < 3
  cui = row["Therapeutic_area_id"]  # this is a UMLS CUI
  next if seen[cui]
  seen[cui] = 1
  cui_result = map_cui_to_ICD(cui) # {:cui=>"C0010266", :therapeuticarea=>"Cranial nerve disorder, unspecified", :icd10=>"http://purl.bioontology.org/ontology/ICD10/G52.9"}
warn cui_result.class
  if cui_result == false
    e.write "error getting #{cui}\n"
    next
  elsif cui_result.empty?
    e.write "No ICD10 matches for #{cui}\n"
    next    
  end
  
  cui_result.each do |res|
    warn res
    f.write CSV.generate_line([cui,res[:icd10],res[:therapeuticarea]])
  end
end
f.close
e.close


Array
{:cui=>"C0010266", :therapeuticarea=>"Cranial nerve disorder, unspecified", :icd10=>"http://purl.bioontology.org/ontology/ICD10/G52.9"}
Array
Array
{:cui=>"C0042594", :therapeuticarea=>"Disorders of vestibular function", :icd10=>"http://purl.bioontology.org/ontology/ICD10/H81"}
{:cui=>"C0042594", :therapeuticarea=>"Disorder of vestibular function, unspecified", :icd10=>"http://purl.bioontology.org/ontology/ICD10/H81.9"}
Array
Array
Array
Array
{:cui=>"C0027121", :therapeuticarea=>"Myositis", :icd10=>"http://purl.bioontology.org/ontology/ICD10/M60"}
{:cui=>"C0027121", :therapeuticarea=>"Myositis, unspecified", :icd10=>"http://purl.bioontology.org/ontology/ICD10/M60.9"}
Array
Array
{:cui=>"C0242172", :therapeuticarea=>"Female pelvic inflammatory disease, unspecified", :icd10=>"http://purl.bioontology.org/ontology/ICD10/N73.9"}
{:cui=>"C0242172", :therapeuticarea=>"Inflammatory diseases of female pelvic organs", :icd10=>"http://purl.bioontology.org/ontology/ICD10/N70-N77.9"}
Array
A

Interrupt: 