## Motivation: To identify genotype-phenotype relationships for yeast genes related to butanol tolerance using the Euretos Knowledge platform 

### Setup the Workflow Infrastructure


In [28]:
source("EuretosInfrastructure.R")
options(warn=-1)




#### DSM workflow starts here: Load Input data provided by DSM this data consists of a list of yeast genes and a list of terms that represent butanol tolerance

In [4]:
yeast_genes<-read.csv("yeast_genes_sgdID.csv",header=TRUE)
phenotype <- read.csv("/home/anandgavai/ODEX4all-UseCases/ODEX4all-UseCases/scripts/EKP/DSM/dropbox/Resistance_terms.txt",header=FALSE)
# separate onto columns
phenotype <- separate(data = phenotype, col = V1, into = c("terms", "class"), sep = "\tequals\t")

## Step 1a : Get the starting concept identifiers

query = "/external/concepts/search"
start<-getConceptID(as.character(yeast_genes[,1]))

## Step 1b: Get the ending concept identifiers for "resistance to chemicals"

In [7]:
query = "/external/concepts/search"
end1 <- getConceptID("resistance to chemicals")





## Step 1c: Get the ending concept identifiers for "butanol tolerance"

In [10]:
query = "/external/concepts/search"
end2 <- getConceptID("butanol tolerance")




## Step 2a: Get Indirect relationships from EKP for ending terms "resistance to chemicals"

In [30]:
query = "/external/concepts/search"
end1 <- getConceptID("resistance to chemicals")




In [25]:
head(resistance2Chemicals)

Unnamed: 0,content.concepts.id,content.concepts.name,content.concepts.semanticCategory,content.concepts.id.1,content.concepts.name.1,content.concepts.semanticCategory.1,content.concepts.id.2,content.concepts.name.2,content.concepts.semanticCategory.2,content.relationships.concept0Id,⋯,content.relationships.concept0Id.1,content.relationships.concept1Id,content.score,last,totalPages,totalElements,numberOfElements,first,size,number
a,4042749,ucc1 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,4964457,parachlorophenol,Chemicals & Drugs,3461869,chemicals,Chemicals & Drugs,4042749,⋯,1288761,3461869,13.7088029326004,TRUE,1,6,6,TRUE,10,0.0
a,4044156,pro2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,4963583,urea,Chemicals & Drugs,831196,"tryptophan 2,3-dioxygenase (homo sapiens)",Chemicals & Drugs,4044156,⋯,"oxidoreductase activity, acting on other substrates",Physiology,831196.0,"tryptophan 2,3-dioxygenase (homo sapiens)",Chemicals & Drugs,4044156,279172,279172,831196,13.9772611399278
a,4044156,pro2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,4964457,parachlorophenol,Chemicals & Drugs,3461869,chemicals,Chemicals & Drugs,4044156,⋯,5225041,3461869,11.6230756106485,4044156,pro2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,696567,hydrogen sulfide,Chemicals & Drugs,3461869.0
a,4043331,snx4 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,967758,polyphosphates,Chemicals & Drugs,831196,"tryptophan 2,3-dioxygenase (homo sapiens)",Chemicals & Drugs,4043331,⋯,diphosphates,Chemicals & Drugs,831196.0,"tryptophan 2,3-dioxygenase (homo sapiens)",Chemicals & Drugs,4043331,1262592,1262592,831196,11.4024287782839
a,4043331,snx4 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,5210546,calcofluor white,Chemicals & Drugs,3461869,chemicals,Chemicals & Drugs,4043331,⋯,1134038,3461869,12.3879766898607,4043331,snx4 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,837632,dieldrin,Chemicals & Drugs,3461869.0
a,4044162,msn2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,1047802,cytoplasmic matrix,Anatomy,831196,"tryptophan 2,3-dioxygenase (homo sapiens)",Chemicals & Drugs,4044162,⋯,1047802,831196,13.7965516756188,4044162,msn2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,249129,heavy metal binding,Physiology,831196.0


## Step 2b: Get Indirect relationships from EKP for ending terms "butanol tolerance"

In [31]:
query = "/external/concept-to-concept/indirect"
butanolTolerance<-getIndirectRelation(start,end2)



































































































In [21]:
dim(butanolTolerance)

In [26]:
head(butanolTolerance)

Unnamed: 0,content.concepts.id,content.concepts.name,content.concepts.semanticCategory,content.concepts.id.1,content.concepts.name.1,content.concepts.semanticCategory.1,content.concepts.id.2,content.concepts.name.2,content.concepts.semanticCategory.2,content.relationships.concept0Id,⋯,content.relationships.concept0Id.1,content.relationships.concept1Id,content.score,last,totalPages,totalElements,numberOfElements,first,size,number
a,4042749,ucc1 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,588040,1-butanol,Chemicals & Drugs,588040,1-butanol,Chemicals & Drugs,4042749,⋯,1288761,588040,12.9304933250657,TRUE,1,3,3,TRUE,10,0.0
a,4042749,ucc1 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,1288761,sodium chloride,Chemicals & Drugs,3438887,physiologic tolerance,Physiology,4042749,⋯,TRUE,10,0.0,4042749,ucc1 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,1288761,sodium chloride,Chemicals & Drugs,3438887.0
a,4042749,ucc1 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,4970313,sirolimus,Chemicals & Drugs,4056067,immune tolerance,Disorders,4042749,⋯,TRUE,10,0.0,4042749,ucc1 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,4970313,sirolimus,Chemicals & Drugs,4056067.0
a,4042749,ucc1 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,588040,1-butanol,Chemicals & Drugs,5225634,butanols,Chemicals & Drugs,4042749,⋯,1-butanol,Chemicals & Drugs,5225634.0,butanols,Chemicals & Drugs,4042749,588040,588040,5225634,10.2934774029667
a,4044156,pro2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,588040,1-butanol,Chemicals & Drugs,588040,1-butanol,Chemicals & Drugs,4044156,⋯,4962734,588040,11.8690738083681,4044156,pro2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,5258250,resistance to chemicals,Physiology,588040.0
a,4044156,pro2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,1288761,sodium chloride,Chemicals & Drugs,3438887,physiologic tolerance,Physiology,4044156,⋯,TRUE,10,0.0,4044156,pro2 (saccharomyces cerevisiae s288c),Genes & Molecular Sequences,1288761,sodium chloride,Chemicals & Drugs,3438887.0


## Step 3 : Find the intersections of these relationships based on ?

## Alternative approach

## Get phenotype terms from the list provided by DSM and do the above anal