## Objective: To identify genotype-phenotype trait association in yeast
### Develop a workflow to identify genes indirectly associated with a certain yeast phenotype (butanol tolerance) usint EKP and visualize them in an interactive knowledge graph


### Load the API scripts with login credentials


In [23]:
library(dplyr)
library(tidyr)
library(sqldf)
source("EuretosInfrastructure.R")
options(warn=-1)




Retrieving page 0
Retrieving page 1
Retrieving page 2
Retrieving page 3
Retrieving page 4
Retrieving page 5
Retrieving page 6
Retrieving page 7
Retrieving page 8
Retrieving page 9
Retrieving page 10
Retrieving page 11


### DSM workflow starts here:
### Load Input data provided by DSM this data consists of a list of yeast genes and a list of terms that represent butanol tolerance

In [24]:
yeast_genes<-read.csv("yeast_genes_sgdID.csv",header=TRUE)

## Step 1a : Get the starting concept identifiers

In [25]:
query = "/external/concepts/search"
start<-getConceptID(as.character(yeast_genes[,1]))



















In [26]:
head(start)

Id,EKP_Concept_Id,name
s000004214,4042749,ucc1 (saccharomyces cerevisiae s288c)
s000005850,4044156,pro2 (saccharomyces cerevisiae s288c)
s000003573,4043331,snx4 (saccharomyces cerevisiae s288c)
s000004640,4044162,msn2 (saccharomyces cerevisiae s288c)
s000001086,4045917,dog1 (saccharomyces cerevisiae s288c)
s000005393,4044097,mse1 (saccharomyces cerevisiae s288c)


## Step 1b: Get the ending concept identifiers for "resistance to chemicals"

In [27]:
query = "/external/concepts/search"
end <- getConceptID("resistance to chemicals")





In [28]:
head(end)

Id,EKP_Concept_Id,name
resistance to chemicals,41742,social resistance
resistance to chemicals,640662,tonga (geographic area)
resistance to chemicals,702742,togo
resistance to chemicals,831196,"tryptophan 2,3-dioxygenase (homo sapiens)"
resistance to chemicals,1226570,toxicity aspects
resistance to chemicals,2862982,resistance process


## Step 2: Get Indirect relationships from EKP for ending terms "resistance to chemicals"

In [29]:
resistance2Chemicals<-getIndirectRelation(start,end)

df<-fromJSON(toJSON(resistance2Chemicals),flatten=TRUE)

do.call(rbind,df) %>% as.data.frame ->b



















































































































































In [30]:
head(resistance2Chemicals)

## parse only the relationships

In [31]:
rel<-b[,2]

### collapse into a list
dfs<-do.call(rbind,rel)


tt<-fromJSON(toJSON(dfs),flatten = TRUE)
row.names(tt)<-NULL
colnames(tt)<-NULL

tt[,1]<-unlist(tt[,1])
tt[,2]<-unlist(tt[,2])
tt[,3]<-sapply(tt[,3], paste0, collapse=",")
colnames(tt)<-c("sub","obj","pred")

tt%>% mutate(pred=strsplit(as.character(pred),",")) %>% unnest(pred) -> tripleId
row.names(tt)<-NULL
tripleId<-tripleId[,c(1,3,2)]


### Step 3: Map human redeable triples from the reference database

In [32]:
pred<-read.csv("Reference_Predicate_List.csv",header=TRUE)
pred<-pred[,c(2,3)]
colnames(pred)<-c("pred","names")

subject<-getConceptName(tripleId[,1])
object<-getConceptName(tripleId[,3])
predicate<-sqldf('select * from tripleId left join pred on pred.Pred=tripleId.Pred')

tripleName<-cbind(subject[,2],as.character(predicate[,5]),object[,2])
































































































































































































































































































































































































































































































































































































































































































































































































































In [33]:
head(tripleName)

0,1,2
ucc1 (saccharomyces cerevisiae s288c),is functionally related to,parachlorophenol
parachlorophenol,interacts with,chemicals
ucc1 (saccharomyces cerevisiae s288c),is functionally related to,1-butanol
1-butanol,interacts with,chemicals
1-butanol,is a,chemicals
ucc1 (saccharomyces cerevisiae s288c),is functionally related to,toxaphene


### Step 4: Write it to a file and Vizualize these in Triple Viewer and Spot

In [34]:
write.table(tripleName,file="./triple.csv",sep=";")