# Extract orthology data from OMA API

## 1. Install

In [1]:
%%bash

pip install coreapi

Collecting coreapi
  Using cached coreapi-2.3.3-py2.py3-none-any.whl (25 kB)
Collecting uritemplate
  Using cached uritemplate-3.0.1-py2.py3-none-any.whl (15 kB)
Collecting coreschema
  Downloading coreschema-0.0.4.tar.gz (10 kB)
Collecting itypes
  Using cached itypes-1.2.0-py2.py3-none-any.whl (4.8 kB)
Building wheels for collected packages: coreschema
  Building wheel for coreschema (setup.py): started
  Building wheel for coreschema (setup.py): finished with status 'done'
  Created wheel for coreschema: filename=coreschema-0.0.4-py3-none-any.whl size=15032 sha256=3fea0c641b099c36beaeef92e36c8bcc34a1500b2b43733ccd6d4ceee6607c1c
  Stored in directory: /home/averstichele/.cache/pip/wheels/29/28/77/73539fe0ce161da7a571cd8278a9296adbda257406bb6f100d
Successfully built coreschema
Installing collected packages: uritemplate, coreschema, itypes, coreapi
Successfully installed coreapi-2.3.3 coreschema-0.0.4 itypes-1.2.0 uritemplate-3.0.1


## 2. Initiate

In [None]:
import pandas as pd
from coreapi import Client

client=Client()
document=client.get('https://omabrowser.org/api/docs')

## 3. Extract orthologs 

In [80]:
SPECIES='MAIZE'
IDs=['AT3G52180','AT4G30500','AT4G35500']

data=[]
for ID in IDs:
    d=pd.DataFrame(client.action(document, ['protein','orthologs'],params={"entry_id":ID}))
    d['query']=ID
    data.append(d)
data=pd.concat(data)
    

## 4. Filter species

In [81]:
data_species=data[data['species'].astype(str).str.contains(SPECIES)]

In [82]:
data_species

Unnamed: 0,entry_nr,entry_url,omaid,canonicalid,sequence_md5,sequence_length,species,oma_group,oma_hog_id,chromosome,locus,is_main_isoform,rel_type,distance,score,query
8,14062166,https://omabrowser.org/api/protein/14062166/,MAIZE07161,B4FJN0,7be3b373a02b396706e8ecb752408b14,373,"{'code': 'MAIZE', 'taxon_id': 4577, 'species':...",522474,HOG:0212555,1,"{'start': 2629741, 'end': 2635693, 'strand': -1}",True,1:1,35.335701,1579.349976,AT3G52180
198,14104078,https://omabrowser.org/api/protein/14104078/,MAIZE49073,B4FPU6,96c0ee664185c8b448bdb61716ef6cd1,174,"{'code': 'MAIZE', 'taxon_id': 4577, 'species':...",910085,HOG:0513146,4,"{'start': 157331300, 'end': 157334121, 'strand...",True,m:1,46.0,1020.169983,AT4G30500
354,14085265,https://omabrowser.org/api/protein/14085265/,MAIZE30260,B4G1A4,4fdb909eabc33759c3f06dfac451fd9b,424,"{'code': 'MAIZE', 'taxon_id': 4577, 'species':...",598285,HOG:0500414.12ffv.3775b,2,"{'start': 204222660, 'end': 204225399, 'strand...",True,m:1,38.582901,2778.570068,AT4G35500


## 4. Cross reference

In [83]:
xref=[]
for omaid,query in zip(data_species['omaid'],data_species['query']):
    x=pd.DataFrame(client.action(document, ['protein','xref'],params={"entry_id":omaid}))[['xref']]
    x.columns=[query]
    xref.append(x)
pd.concat(xref,axis=1)

Unnamed: 0,AT3G52180,AT4G30500,AT4G35500
0,A0A317Y671,A0A3L6F0X9,A0A3L6FVC6
1,B4FJN0,B4FPU6,B4G1A4
2,NP_001136639,XP_008676505,NP_001142305
3,XP_008665116,100192887,100274474
4,100216768,Zm00001d051405,Zm00001d006308
5,Zm00001d027309,Zm00001d051405_P001,Zm00001d006308_P001
6,Zm00001d027309_P005,Zm00001d051405_T001,Zm00001d006308_T001
7,Zm00001d027309_T005,Zm00001d051405,Zm00001d006308
8,Zm00001d027309,Zm00001d051405_P001,Zm00001d006308_P001
9,Zm00001d027309_P005,Zm00001d051405_T001,Zm00001d006308_T001
