In [None]:
puts `pwd`
puts `ls -l`
puts `head rawdata/drug-disease.csv`

In [None]:
require 'csv'
puts `pwd`

targets = {}

CSV.foreach('./rawdata/drug-disease.csv', headers: true) do |row|
  next if row.size < 3

  source = row[0]
  target = row[2] || ''

  target = row[2].match(/([^_]+)/)[1]
  targets[target] = 1

end
puts "Targets"
puts targets.keys

# Process file

Need to process both the source and the target

Do both drug and drug type simultaneously - getting info from BigCat SPARQL endpoint


In [None]:
require_relative '../Lookups/metadata_functions'
require 'csv'

f1 = File.open('./mappings/drug-disease-mappings.csv', 'w')
f2 = File.open('./mappings/drug-drugtype-mappings.csv', 'w')
e = File.open('./mappings/drug-disease-errors.txt', 'w')
f1.sync = true # Ensure immediate writes
f2.sync = true # Ensure immediate writes
e.sync = true # Ensure immediate writes
f1.write "radboudsource,sourceguid,sourcelabel,radboudtarget,targetguid,targetlabel\n"
f2.write "radboudsource,sourceguid,sourcelabel,targettype\n"

seen = {}
count = 1
CSV.foreach('./rawdata/drug-disease.csv', headers: true) do |row|
  
  next if row.size < 3

  # PROCESS SOURCE COLUMN #######################################
  radboudsource = row[0]
  # always CHEMBL
  lookup = CHEMBL.new(molecule: radboudsource)

  sourceurl = "http://rdf.ebi.ac.uk/resource/chembl/molecule/#{radboudsource}"

  sourcetitle, moleculetype = lookup.lookup_title_and_type
  # lookup.lookup_title_by_uri(term_uri: url, ontology: ontology)
  if sourcetitle
    warn "#{radboudsource}, #{sourceurl}, #{sourcetitle}"
  else # this shoujld never happen because the raw curie is returned if it fails
    warn "No match for #{source}, #{url}"
    e.write "No match for #{source}, #{url}"
    next
  end
  f2.write CSV.generate_line([radboudsource, sourceurl, sourcetitle, moleculetype])

  
# PROCESS TARGET COLUMN #######################################
  radboudtarget = row[2]
  ontology = radboudtarget.match(/([^_]+)_/)[1]

  if ontology == "EFO"
    targeturl = "http://www.ebi.ac.uk/efo/#{radboudtarget}"
  elsif ontology == "OTAR"
    targeturl = "http://www.ebi.ac.uk/efo/#{radboudtarget}"
  elsif ontology == "DOID"
    targeturl = "https://api.disease-ontology.org/v1/terms/#{radboudtarget}"
  elsif ontology == "Orphanet"
    targeturl = "http://www.orpha.net/ORDO/#{radboudtarget}"
  else
    targeturl = "http://purl.obolibrary.org/obo/#{radboudtarget}"
  end

  targettitle = ontology_annotations(uri: targeturl)
  # lookup.lookup_title_by_uri(term_uri: url, ontology: ontology)
  if targettitle
    warn "#{radboudtarget}, #{targeturl}, #{targettitle}"
  else # this shoujld never happen because the raw curie is returned if it fails
    warn "No match for #{radboudtarget}, #{targeturl}"
    e.write "No match for #{radboudtarget}, #{targeturl}"
    next  
  end

  f1.write CSV.generate_line([radboudsource, sourceurl, sourcetitle, radboudtarget, targeturl, targettitle])

end

f1.close
f2.close
e.close