# Web access

In [11]:
require 'rest-client'
require './web_access'

##------------------- Mark's fuction to prevent web accessing errors --------------------
#puts "now I load the fetch function"
def fetch(url, headers = {accept: "*/*"}, user = "", pass="")
  response = RestClient::Request.execute({
    method: :get,
    url: url.to_s,
    user: user,
    password: pass,
    headers: headers})
  return response
  
  rescue RestClient::ExceptionWithResponse => e
    $stderr.puts e.response
    response = false
    return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
  rescue RestClient::Exception => e
    $stderr.puts e.response
    response = false
    return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
  rescue Exception => e
    $stderr.puts e
    response = false
    return response  # now we are returning 'False', and we will check that with an \"if\" statement in our main code
end


#####
#####      WEB ACCESSING FUNCTIONS 
#####

#------------------- get protein ID --------------------
def get_protID(my_gene)
  address="http://togows.org/entry/ebi-uniprot/#{my_gene}/accessions.json"
  response = fetch(address)  
  if response  # if there is a response to calling that URI
    body = response.body  # get the "body" of the response
    data = JSON.parse(response.body)
    return data[0]
  end
end

#------------------- get KEGG annotations --------------------
def get_KEGG(my_gene)
  address = "http://togows.org/entry/kegg-genes/ath:#{my_gene}/pathways.json"
  my_KEGG_list=Array.new
  response = fetch(address)  
  if response  # if there is a response to calling that URI
    body = response.body  # get the "body" of the response
    data = JSON.parse(response.body)
    my_KEGG_list.push(data[0].to_a)
    unless data[0].nil?
      if data[0].length != 0
        my_KEGG_list = data[0].to_a
      end
    end
    return my_KEGG_list
  else 
    return Array.new
  end
end


#------------------- get GO annotations --------------------
def get_GO(my_gene)
  address = "http://togows.dbcls.jp/entry/uniprot/#{my_gene}/dr.json"
  my_GO_list=Array.new
  response = fetch(address)  
  if response  # if there is a response to calling that URI
    body = response.body  # get the "body" of the response
    data = JSON.parse(response.body)
    if data[0]["GO"]
      data[0]["GO"].each do |go| 
      if go[1] =~ /P:/#if its a biological process it will have the key 'P'
        my_GO_list.push(go[0..1])
      end
    end
      return my_GO_list
    end
  else 
    return Array.new
  end
end


#------------------- get gene interactions --------------------

def get_interactions(my_gene,all_genes)
  address="http://www.ebi.ac.uk/Tools/webservices/psicquic/intact/webservices/current/search/query/#{my_gene}?format=tab25"
  response = fetch(address)
  interactions_array=Array.new
  #puts "get_interactions function has been called"
  
  if response  # if there is a response to calling that URI
    body = response.body  # get the "body" of the response
    data = body.split("\n").to_a #first I split the rows into different arrays


    (0..data.length-1).each do |i|
      
      data[i] = data[i].split("\t") #then I create an array's element per each tab separated value 
      
      
      unless data[i][9].include?("3702") && data[i][10].include?("3702") #if both proteins are NOT from Arabidopsis thaliana (taxa code 3702)
        next # discard this protein interaction, jumps to next posible interaction
      end
        
        
      intact_miscore = data[i][14].split(":")[1]
      if intact_miscore.to_f < 0.485  # if interaction's score is under cutoff (according to doi:10.1093/database/bau131)
        next # discard this protein interaction, jumps to next posible interaction
      end
          
    
      data[i] = data[i][4..5] #these columns of the tab25 format cointain the gene locus name      
      (0..data[i].length-1).each do |k|
        #puts data[i][k].scan(/A[Tt]\d[Gg]\d\d\d\d\d/)
        interactions_array.push(data[i][k].scan(/A[Tt]\d[Gg]\d\d\d\d\d/)) #find the agi code in the text, retrieve it and add it to the array
        end
      end    


    
    interactions_array = interactions_array.flatten.uniq  #unnest the arrray, delete repetitions and interactions with itself
    interactions_array.map!(&:upcase) #make all the codes uppercase so that I can compare them latter
    interactions_array = interactions_array- [my_gene.upcase] #dont include my own gene as an interaction
    #puts "interactions_array created"
    
    return interactions_array
    
  end
end


:get_interactions

In [12]:
#------------------- LOAD INFO FROM GENE FILE --------------------

#Now I open the gene file and create some arrays that will help create the Class objects
gene_file = File.new("/Users/sara/BioCompu/Programming_challenges/Task2/ArabidopsisSubNetwork_GeneList.txt","r")

genes_list=Array.new 
gene_file.each do |agi| # iterate over the elements of gene_file
  genes_list.push(agi.strip.upcase!) #add AGI codes to a list
end

AT4g27030
AT5g54270
AT1g21400
AT5g19120
AT2g13360
AT4g05180
AT1g22690
AT2g45170
AT4g09650
AT5g55620
AT1g31330
AT1g80440
AT3g28270
AT4g12800
AT5g04140
AT5g64040
AT1g29660
AT1g15820
AT1g64500
AT1g03130
AT2g20670
AT4g17090
AT1g23740
AT2g46340
AT4g17460
AT1g07010
AT1g32220
AT2g03750
AT3g47470
AT4g12830
AT5g05690
AT5g64410
AT2g21330
AT1g15980
AT2g01890
AT1g12250
AT2g21210
AT4g28660
AT1g52230
AT3g18890
AT4g22890
AT1g11850
AT1g37130
AT2g04039
AT3g48420
AT4g19170
AT5g07020
AT5g65010
AT3g01500
AT1g25230
AT2g34430
AT1g18060
AT2g30600
AT4g33010
AT1g65490
AT3g21670
AT4g27260
AT1g12780
AT1g55480
AT2g17880
AT3g49160
AT4g21210
AT5g14740
AT4g37980
AT1g29395
AT2g39730
AT1g20020
AT2g34420
AT4g37800
AT1g70820
AT3g26740
AT4g28780
AT1g12900
AT1g64680
AT2g25200
AT3g55800
AT4g21280
AT5g15850
AT1g06680
AT2g43550
AT3g02730
AT1g42970
AT2g39470
AT5g17300
AT1g77490
AT3g27690
AT5g01015
AT1g14150
AT1g64720
AT2g26080
AT3g56940
AT4g28030
AT5g20250
AT1g54040
AT3g21870
AT3g48360
AT1g61520
AT2g43010
AT5g22920
AT1g78370



>

# Network generator

In [50]:
def interactions(mygenes_array)
  output_array=Array.new
  mygenes_array.each do |gene|
    interactions = get_interactions(gene,mygenes_array)
      if interactions != [] # if there are interacions
        output_array.push([gene,interactions].flatten) # add that interaction as [my_gene,interaction1,interaction2, ...] 
      end
  end
  return output_array
end

def get_interactions_array(all_genes_array, my_genes)
  new_level=Array.new
  all_genes_array.each do |int| #takes each [my_gene,interaction1,interaction2, ...] 
    new_level.push(int[1..-1]) # include only [interaction1,interaction2, ...], exclude my_gene
  end
  new_level.flatten!.uniq! # un-nest array and remove duplications
  new_level = new_level - (new_level & my_genes) #do not include genes from the previous interaction level
  return new_level
end


:interaction_connections

In [51]:
int_L1=interactions(genes_list) #interactions of co-expressed genes -> [my_geneA, interaction1, interaction2, ...]

[["AT5G04140", "AT4G37930"], ["AT4G17460", "AT3G07220", "AT1G70920", "AT3G15030", "AT2G44910", "AT4G16780", "AT5G12980", "AT3G60390", "AT5G47370", "AT2G22800", "AT2G45680", "AT1G30490", "AT4G37790"], ["AT4G22890", "AT1G60950"], ["AT4G19170", "AT5G17790"], ["AT5G15850", "AT1G26780", "AT4G10240", "AT5G27910", "AT2G31380", "AT1G08970", "AT5G54470", "AT4G38960", "AT1G42050", "AT5G65050", "AT4G12210", "AT3G02150", "AT3G01140", "AT3G07220", "AT5G23000", "AT2G46340", "AT2G46350", "AT3G15354", "AT1G53090"], ["AT5G17300", "AT3G15030", "AT5G08330", "AT1G76510", "AT1G60250", "AT3G02150", "AT3G18550", "AT5G15210"], ["AT1G54040", "AT4G23810"], ["AT3G21870", "AT3G48750"], ["AT3G48360", "AT4G09570"], ["AT2G43010", "AT1G09530", "AT2G18790", "AT1G09570"], ["AT1G71030", "AT1G60250", "AT4G09820", "AT1G56650", "AT4G32980"], ["AT3G45780", "AT5G64330"], ["AT3G62410", "AT3G26650"], ["AT3G61260", "AT3G17210", "AT3G48040", "AT3G55440"], ["AT4G39710", "AT4G03280"]]

In [64]:
genes_L2 = get_interactions_array(int_L1,genes_list) #[interaction1, interaction2, ...] with no duplicates
pre_int_L2 = interactions(genes_L2) # interactions of first level interactions -> [interaction1, intA, my_geneA, interaction 2, ...]
members = [genes_list,genes_L2].flatten!

["AT4G27030", "AT5G54270", "AT1G21400", "AT5G19120", "AT2G13360", "AT4G05180", "AT1G22690", "AT2G45170", "AT4G09650", "AT5G55620", "AT1G31330", "AT1G80440", "AT3G28270", "AT4G12800", "AT5G04140", "AT5G64040", "AT1G29660", "AT1G15820", "AT1G64500", "AT1G03130", "AT2G20670", "AT4G17090", "AT1G23740", "AT2G46340", "AT4G17460", "AT1G07010", "AT1G32220", "AT2G03750", "AT3G47470", "AT4G12830", "AT5G05690", "AT5G64410", "AT2G21330", "AT1G15980", "AT2G01890", "AT1G12250", "AT2G21210", "AT4G28660", "AT1G52230", "AT3G18890", "AT4G22890", "AT1G11850", "AT1G37130", "AT2G04039", "AT3G48420", "AT4G19170", "AT5G07020", "AT5G65010", "AT3G01500", "AT1G25230", "AT2G34430", "AT1G18060", "AT2G30600", "AT4G33010", "AT1G65490", "AT3G21670", "AT4G27260", "AT1G12780", "AT1G55480", "AT2G17880", "AT3G49160", "AT4G21210", "AT5G14740", "AT4G37980", "AT1G29395", "AT2G39730", "AT1G20020", "AT2G34420", "AT4G37800", "AT1G70820", "AT3G26740", "AT4G28780", "AT1G12900", "AT1G64680", "AT2G25200", "AT3G55800", "AT4G21280"

In [67]:
int_L2=[]
pre_int_L2.each do |int|
  real_int=[]
  int[1..-1].each do |gen_inter|
    if members.include? gen_inter
      real_int.push(gen_inter)
    end
  end
  int_L2.push([int[0],real_int].flatten)
end
      


[["AT4G37930", "AT5G04140"], ["AT3G07220", "AT1G21200", "AT5G39760", "AT1G54060", "AT1G11510", "AT3G16500", "AT5G67300", "AT3G28920", "AT1G32640", "AT5G06950", "AT4G02640", "AT4G16780", "AT4G03250", "AT5G57180", "AT3G54390", "AT5G01380", "AT5G53950", "AT3G61600", "AT5G60970", "AT4G37940", "AT4G17460", "AT1G30460", "AT1G25550", "AT4G00390", "AT2G38250", "AT5G47370", "AT2G02540", "AT2G25650", "AT3G21810", "AT1G04370", "AT1G60250", "AT5G13790", "AT1G43700", "AT3G23030", "AT2G01940", "AT1G61730", "AT4G00270", "AT4G06634", "AT2G46970", "AT1G42220", "AT3G21175", "AT5G28040", "AT5G15210", "AT3G15030", "AT3G45150", "AT5G15850"], ["AT1G70920", "AT2G44910", "AT4G16780", "AT3G60390", "AT2G22800", "AT4G37790", "AT4G17460", "AT5G47370"], ["AT3G15030", "AT1G30500", "AT5G61380", "AT1G01160", "AT5G17300", "AT4G20380", "AT5G65210", "AT3G11580", "AT5G26749", "AT5G08070", "AT3G20260", "AT3G02150", "AT1G30490", "AT1G35490", "AT4G37940", "AT1G68360", "AT2G17950", "AT2G47700", "AT1G72210", "AT1G75520", "AT1

In [69]:
all=[int_L1,int_L2].flatten!(1)

[["AT5G04140", "AT4G37930"], ["AT4G17460", "AT3G07220", "AT1G70920", "AT3G15030", "AT2G44910", "AT4G16780", "AT5G12980", "AT3G60390", "AT5G47370", "AT2G22800", "AT2G45680", "AT1G30490", "AT4G37790"], ["AT4G22890", "AT1G60950"], ["AT4G19170", "AT5G17790"], ["AT5G15850", "AT1G26780", "AT4G10240", "AT5G27910", "AT2G31380", "AT1G08970", "AT5G54470", "AT4G38960", "AT1G42050", "AT5G65050", "AT4G12210", "AT3G02150", "AT3G01140", "AT3G07220", "AT5G23000", "AT2G46340", "AT2G46350", "AT3G15354", "AT1G53090"], ["AT5G17300", "AT3G15030", "AT5G08330", "AT1G76510", "AT1G60250", "AT3G02150", "AT3G18550", "AT5G15210"], ["AT1G54040", "AT4G23810"], ["AT3G21870", "AT3G48750"], ["AT3G48360", "AT4G09570"], ["AT2G43010", "AT1G09530", "AT2G18790", "AT1G09570"], ["AT1G71030", "AT1G60250", "AT4G09820", "AT1G56650", "AT4G32980"], ["AT3G45780", "AT5G64330"], ["AT3G62410", "AT3G26650"], ["AT3G61260", "AT3G17210", "AT3G48040", "AT3G55440"], ["AT4G39710", "AT4G03280"], ["AT4G37930", "AT5G04140"], ["AT3G07220", "AT4

In [70]:
networks=Array.new
all.each do |item| # get one array of interactions, for example [gene1 , gene2, gene3]
  net=Array.new
  item.each do |elem| # get one of those genes, for example gene1
    (0..all.length-1).each do |i| #iterate over the arrays again (like all[0] = [gene1 , gene2, gene3])
      if all[i].any? elem #if the array contains that gene (for example gene1 is in all[0])
        net.push(all[i].flatten) # include that array of interactions to my new array 
      end
    end
  end
  
  net.flatten! # unnest array
  net.uniq! # delete repetitions
  net.sort! # sort array
  if net.length > 2 && (genes_list&net).length > 1 # if the network has more than 2 members and contains two genes from the original list of co-expressed genes
    networks.push(net)
    #puts " my network is \n #{net} \t"
  end
end
     
networks.uniq! #remove duplicated networks

[["AT1G08970", "AT1G26780", "AT1G30490", "AT1G42050", "AT1G53090", "AT1G60250", "AT1G70920", "AT1G71030", "AT1G76510", "AT2G22800", "AT2G31380", "AT2G44910", "AT2G45680", "AT2G46340", "AT2G46350", "AT3G01140", "AT3G02150", "AT3G07220", "AT3G15030", "AT3G15354", "AT3G18550", "AT3G60390", "AT4G10240", "AT4G12210", "AT4G16780", "AT4G17460", "AT4G37790", "AT4G38960", "AT5G08330", "AT5G12980", "AT5G15210", "AT5G15850", "AT5G17300", "AT5G23000", "AT5G27910", "AT5G47370", "AT5G54470", "AT5G65050"], ["AT1G08970", "AT1G26780", "AT1G30490", "AT1G42050", "AT1G53090", "AT1G56650", "AT1G60250", "AT1G70920", "AT1G71030", "AT1G76510", "AT2G22800", "AT2G31380", "AT2G44910", "AT2G45680", "AT2G46340", "AT2G46350", "AT3G01140", "AT3G02150", "AT3G07220", "AT3G15030", "AT3G15354", "AT3G18550", "AT3G60390", "AT4G09820", "AT4G10240", "AT4G12210", "AT4G16780", "AT4G17460", "AT4G32980", "AT4G37790", "AT4G38960", "AT5G08330", "AT5G12980", "AT5G15210", "AT5G15850", "AT5G17300", "AT5G23000", "AT5G27910", "AT5G473

In [71]:
networks.flatten.uniq.length


41

In [60]:
count_good=0
count_bad=0

networks.each do |net|
  if (net.flatten & genes_list).length >1
    count_good +=1
  else
    count_bad +=1
  end
end

puts "good: #{count_good}, and bad: #{count_bad}"

good: 32, and bad: 0


In [47]:
a=[1,2,3,4,5,6]
b=[2,4,5,6,7,8]
a&b

[2, 4, 5, 6]

In [48]:
a - (a&b)

[1, 3]

In [49]:
a

[1, 2, 3, 4, 5, 6]