### Detecting human orthologs for yeast genetic interactors of Huntingtin protein.
#### Mason etal_2013

In [6]:
path <- "C:/Users/Sonali/Box Sync/Huntington Interactome/MutantHTT_Interactors/Yeast/MutantHttSuppressors_green"
setwd(path)

#####################################################
### Get the required libararies
#####################################################
library("AnnotationDbi")
library("hom.Sc.inp.db")
library("biomaRt")
library ("plyr") #### just in case.

########################################
###Biomart package installation####
#######################################
source("http://bioconductor.org/biocLite.R")
biocLite("biomaRt")
source("https://bioconductor.org/biocLite.R")
biocLite("hom.Sc.inp.db")
# 
# #########################################
# ##To view documentation for the version of this package ###
# ############################################################
# browseVignettes("biomaRt")
# 
# ###################################################
# ### code chunk number 2: biomaRt
# ###################################################
# library("biomaRt")
# listMarts()
# 
# 
# 
# ###################################################
# ### code chunk number 4: listDatasets
# ###################################################
# listDatasets(ensembl)

#############################################################
### Read the file containing the gene symbols of the organism
###############################################################

yeastEGIds <- read.table(file= "R_Input_Mason_etal2013.txt", sep = "\t", header = TRUE)

# convert your Gene Symbols to ensemble gene id and ensembl protein id for your organism

yeast = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "scerevisiae_gene_ensembl") ### specify the dataset to use 

###################################################
### filters for the yeast dataset
###################################################
filters = listFilters(yeast)
filters[1:5,]
listFilters(yeast)

###################################################
### attributes for yeast dataset
###################################################
attributes = listAttributes(yeast)

attributes[1:5,]
listAttributes(yeast, page="feature_page")


########################################
## Get the attributes for yeast dataset 
#######################################


rawMasonENSProtIds <- getBM(c('ensembl_gene_id', 'external_gene_name', 'ensembl_peptide_id'), filters='wikigene_name', 'external_gene_name', values= yeastEGIds$AssociatedGeneName, mart=yeast) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
length (rawMasonENSProtIds)
dim (rawMasonENSProtIds)
class(rawMasonENSProtIds)
rawMasonENSProtIds


#########################################
# remove duplicate values from the ensembl protein id column
#######################################

MasonENSProtIds <- rawMasonENSProtIds[!duplicated(rawMasonENSProtIds[,3]),]
MasonENSProtIds
dim(MasonENSProtIds)
 
##############################################
# Using the Inparanoid function to find human orthologs in form of human ensembl protein IDs#
##################################################

rawHumanProtIds <- mget(unlist(MasonENSProtIds$ensembl_peptide_id) ,hom.Sc.inpHOMSA,ifnotfound=NA)

rawHumanProtIds
class(rawHumanProtIds)
HumanProtIds <- rawHumanProtIds[!is.na(rawHumanProtIds)] ## remove all the NA values from the list
HumanProtIds
length(HumanProtIds)

##########################################################################
## Find the ensembl gene id annd hgnc symbols for your human ensembl peptide id 
## using the getBM function and the human ensembl dataset
#######################################################################

human = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "hsapiens_gene_ensembl")
Humanorthologs <- getBM(c('ensembl_gene_id', 'hgnc_symbol', 'ensembl_peptide_id', 'entrezgene'), filters='ensembl_peptide_id', values=HumanProtIds, mart=human) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
Humanorthologs
dim(Humanorthologs)

## run the line below if need output in text file.

# write.table(Humanorthologs, file="MasonHumanOrthologs.txt", sep="\t", col.names = NA) ### export file in text format to the folder.


##############################################################################################
##########END ################################################################################
#############################################################################################

sessionInfo()



Bioconductor version 3.4 (BiocInstaller 1.24.0), ?biocLite for help
BioC_mirror: https://bioconductor.org
Using Bioconductor 3.4 (BiocInstaller 1.24.0), R 3.3.1 (2016-06-21).
Installing package(s) 'biomaRt'
"package 'biomaRt' is in use and will not be installed"installation path not writeable, unable to update packages: cluster, codetools,
  foreign, lattice, Matrix, mgcv, nlme, survival
Old packages: 'assertthat', 'curl', 'matrixStats', 'party', 'zoo'
Bioconductor version 3.4 (BiocInstaller 1.24.0), ?biocLite for help
BioC_mirror: https://bioconductor.org
Using Bioconductor 3.4 (BiocInstaller 1.24.0), R 3.3.1 (2016-06-21).
Installing package(s) 'hom.Sc.inp.db'
installing the source package 'hom.Sc.inp.db'

installation path not writeable, unable to update packages: cluster, codetools,
  foreign, lattice, Matrix, mgcv, nlme, survival
Old packages: 'assertthat', 'curl', 'matrixStats', 'party', 'zoo'


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"
with_tra_tsl,with Transcript Support Level (TSL)
with_tra_appris_pi,with APPRIS principal isoform annotation
with_tra_gencode_basic,with GENCODE basic annotation
with_ec_number,with EC number(s)
with_embl,with EMBL ID(s)


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page
chromosome_name,Chromosome Name,feature_page
start_position,Gene Start (bp),feature_page
end_position,Gene End (bp),feature_page
strand,Strand,feature_page
band,Band,feature_page


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YHR116W,COX23,YHR116W
YNL218W,MGS1,YNL218W
YOR026W,BUB3,YOR026W
YML116W,ATR1,YML116W
YGL155W,CDC43,YGL155W
YDL125C,HNT1,YDL125C
YGL162W,SUT1,YGL162W
YGL047W,ALG13,YGL047W
YMR203W,TOM40,YMR203W
YEL072W,RMD6,YEL072W


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YHR116W,COX23,YHR116W
YNL218W,MGS1,YNL218W
YOR026W,BUB3,YOR026W
YML116W,ATR1,YML116W
YGL155W,CDC43,YGL155W
YDL125C,HNT1,YDL125C
YGL162W,SUT1,YGL162W
YGL047W,ALG13,YGL047W
YMR203W,TOM40,YMR203W
YEL072W,RMD6,YEL072W


ensembl_gene_id,hgnc_symbol,ensembl_peptide_id,entrezgene
ENSG00000058799,YIPF1,ENSP00000072644,54432
ENSG00000077009,NMRK2,ENSP00000168977,27231
ENSG00000100206,DMC1,ENSP00000216024,11144
ENSG00000239900,ADSL,ENSP00000216194,158
ENSG00000101464,PIGU,ENSP00000217446,128869
ENSG00000104231,ZFAND1,ENSP00000220669,79752
ENSG00000077348,EXOSC5,ENSP00000221233,56915
ENSG00000065268,WDR18,ENSP00000251289,57418
ENSG00000130204,TOMM40,ENSP00000252487,10452
ENSG00000130733,YIPF2,ENSP00000253031,78992


R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 14393)

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
[1] plyr_1.8.4           biomaRt_2.30.0       hom.Sc.inp.db_3.1.2 
[4] AnnotationDbi_1.36.2 IRanges_2.8.2        S4Vectors_0.12.2    
[7] Biobase_2.34.0       BiocGenerics_0.20.0  BiocInstaller_1.24.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10        magrittr_1.5        uuid_0.1-2         
 [4] R6_2.2.0            stringr_1.2.0       tools_3.3.1        
 [7] DBI_0.6-1           digest_0.6.12       crayon_1.3.2       
[10] IRdisplay_0.4.4     repr_0.12.0         bitops_1.0-6       
[13] RC

#### Giorgini et al_2005

In [7]:
path <- "C:/Users/Sonali/Box Sync/Huntington Interactome/MutantHTT_Interactors/Yeast/MutantHttSuppressors_green"
setwd(path)

#####################################################
### Get the required libararies
#####################################################
library("AnnotationDbi")
library("hom.Sc.inp.db")
library("biomaRt")
library ("plyr") #### just in case.

########################################
###Biomart package installation####
#######################################
# source("http://bioconductor.org/biocLite.R")
# biocLite("biomaRt")
# 
# #########################################
# ##To view documentation for the version of this package ###
# ############################################################
# browseVignettes("biomaRt")
# 
# ###################################################
# ### code chunk number 2: biomaRt
# ###################################################
# library("biomaRt")
# listMarts()
# 
# 
# 
# ###################################################
# ### code chunk number 4: listDatasets
# ###################################################
# listDatasets(ensembl)

#############################################################
### Read the file containing the gene symbols of the organism
###############################################################

yeastEGIds <- read.table(file= "R_Input_Giorgini_etal2005.txt", sep = "\t", header = TRUE)

# convert your Gene Symbols to ensemble gene id and ensembl protein id for your organism

yeast = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "scerevisiae_gene_ensembl") ### specify the dataset to use 

###################################################
### filters for the yeast dataset
###################################################
filters = listFilters(yeast)
filters[1:5,]
listFilters(yeast)

###################################################
### attributes for yeast dataset
###################################################
attributes = listAttributes(yeast)

attributes[1:5,]
listAttributes(yeast, page="feature_page")


########################################
## Get the attributes for yeast dataset 
#######################################


rawGiorginiENSProtIds <- getBM(c('ensembl_gene_id', 'external_gene_name', 'ensembl_peptide_id'), filters='wikigene_name', 'external_gene_name', values= yeastEGIds$Suppressors, mart=yeast) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
length (rawGiorginiENSProtIds)
dim (rawGiorginiENSProtIds)
class(rawGiorginiENSProtIds)
rawGiorginiENSProtIds


#########################################
# remove duplicate values from the ensembl protein id column
#######################################

GiorginiENSProtIds <- rawGiorginiENSProtIds[!duplicated(rawGiorginiENSProtIds[,3]),]
GiorginiENSProtIds
dim(GiorginiENSProtIds)
 
##############################################
# Using the Inparanoid function to find human orthologs in form of human ensembl protein IDs#
##################################################

rawHumanProtIds <- mget(unlist(GiorginiENSProtIds$ensembl_peptide_id) ,hom.Sc.inpHOMSA,ifnotfound=NA)

rawHumanProtIds
class(rawHumanProtIds)
HumanProtIds <- rawHumanProtIds[!is.na(rawHumanProtIds)] ## remove all the NA values from the list
HumanProtIds
length(HumanProtIds)

##########################################################################
## Find the ensembl gene id annd hgnc symbols for your human ensembl peptide id 
## using the getBM function and the human ensembl dataset
#######################################################################

human = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "hsapiens_gene_ensembl")
Humanorthologs <- getBM(c('ensembl_gene_id', 'hgnc_symbol', 'ensembl_peptide_id', 'entrezgene'), filters='ensembl_peptide_id', values=HumanProtIds, mart=human) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
Humanorthologs
dim(Humanorthologs)

## run the line below if need output in text file.
# write.table(Humanorthologs, file="GiorginiHumanOrthologs.txt", sep="\t", col.names = NA) ### export file in text format to the folder.


##############################################################################################
##########END ################################################################################
#############################################################################################

sessionInfo()


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"
with_tra_tsl,with Transcript Support Level (TSL)
with_tra_appris_pi,with APPRIS principal isoform annotation
with_tra_gencode_basic,with GENCODE basic annotation
with_ec_number,with EC number(s)
with_embl,with EMBL ID(s)


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page
chromosome_name,Chromosome Name,feature_page
start_position,Gene Start (bp),feature_page
end_position,Gene End (bp),feature_page
strand,Strand,feature_page
band,Band,feature_page


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YBR279W,PAF1,YBR279W
YDR525W-A,SNA2,YDR525W-A
YOR198C,BFR1,YOR198C
YLR278C,,YLR278C
YBR073W,RDH54,YBR073W
YJL029C,VPS53,YJL029C
YOR298C-A,MBF1,YOR298C-A
YDL117W,CYK3,YDL117W
YBL098W,BNA4,YBL098W
YNR049C,MSO1,YNR049C


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YBR279W,PAF1,YBR279W
YDR525W-A,SNA2,YDR525W-A
YOR198C,BFR1,YOR198C
YLR278C,,YLR278C
YBR073W,RDH54,YBR073W
YJL029C,VPS53,YJL029C
YOR298C-A,MBF1,YOR298C-A
YDL117W,CYK3,YDL117W
YBL098W,BNA4,YBL098W
YNR049C,MSO1,YNR049C


ensembl_gene_id,hgnc_symbol,ensembl_peptide_id,entrezgene
ENSG00000006712,PAF1,ENSP00000221265,54623
ENSG00000107223,EDF1,ENSP00000224073,8721
ENSG00000146830,GIGYF1,ENSP00000275732,64599
ENSG00000117009,KMO,ENSP00000355517,8564
ENSG00000141252,VPS53,ENSP00000373692,55275


R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 14393)

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
[1] plyr_1.8.4           biomaRt_2.30.0       hom.Sc.inp.db_3.1.2 
[4] AnnotationDbi_1.36.2 IRanges_2.8.2        S4Vectors_0.12.2    
[7] Biobase_2.34.0       BiocGenerics_0.20.0  BiocInstaller_1.24.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10        magrittr_1.5        uuid_0.1-2         
 [4] R6_2.2.0            stringr_1.2.0       tools_3.3.1        
 [7] DBI_0.6-1           digest_0.6.12       crayon_1.3.2       
[10] IRdisplay_0.4.4     repr_0.12.0         bitops_1.0-6       
[13] RC

### Chatterjee et al 2013

In [8]:
path <- "C:/Users/Sonali/Box Sync/Huntington Interactome/MutantHTT_Interactors/Yeast/MutantHttSuppressors_green"
setwd(path)

#####################################################
### Get the required libararies
#####################################################
library("AnnotationDbi")
library("hom.Sc.inp.db")
library("biomaRt")
library ("plyr")

########################################
###Biomart package installation####
#######################################
# source("http://bioconductor.org/biocLite.R")
# biocLite("biomaRt")
# 
# #########################################
# ##To view documentation for the version of this package ###
# ############################################################
# browseVignettes("biomaRt")
# 
# ###################################################
# ### code chunk number 2: biomaRt
# ###################################################
# library("biomaRt")
# listMarts()
# 
# 
# 
# ###################################################
# ### code chunk number 4: listDatasets
# ###################################################
# listDatasets(ensembl)

#############################################################
### Read the file containing the gene symbols of the organism
###############################################################

yeastEGIds <- read.table(file= "R_Input_Chatterjee_etal2013.txt", sep = "\t", header = TRUE)

# convert your Gene Symbols to ensemble gene id and ensembl protein id for your organism

yeast = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "scerevisiae_gene_ensembl") ### specify the dataset to use 

###################################################
### filters for the yeast dataset
###################################################
filters = listFilters(yeast)
filters[1:5,]
listFilters(yeast)

###################################################
### attributes for yeast dataset
###################################################
attributes = listAttributes(yeast)

attributes[1:5,]
listAttributes(yeast, page="feature_page")


########################################
## Get the attributes for yeast dataset 
#######################################


rawChatterjeeENSProtIds <- getBM(c('ensembl_gene_id', 'external_gene_name', 'ensembl_peptide_id'), filters='wikigene_name', 'external_gene_name', values= yeastEGIds$ConfirmedSuppressorGenes, mart=yeast) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
length (rawChatterjeeENSProtIds)
dim (rawChatterjeeENSProtIds)
class(rawChatterjeeENSProtIds)
rawChatterjeeENSProtIds


#########################################
# remove duplicate values from the ensembl protein id column
#######################################

ChatterjeeENSProtIds <- rawChatterjeeENSProtIds[!duplicated(rawChatterjeeENSProtIds[,3]),]
ChatterjeeENSProtIds
dim(ChatterjeeENSProtIds)
 
##############################################
# Using the Inparanoid function to find human orthologs in form of human ensembl protein IDs#
##################################################

rawHumanProtIds <- mget(unlist(ChatterjeeENSProtIds$ensembl_peptide_id) ,hom.Sc.inpHOMSA,ifnotfound=NA)

rawHumanProtIds
class(rawHumanProtIds)
HumanProtIds <- rawHumanProtIds[!is.na(rawHumanProtIds)] ## remove all the NA values from the list
HumanProtIds
length(HumanProtIds)

##########################################################################
## Find the ensembl gene id annd hgnc symbols for your human ensembl peptide id 
## using the getBM function and the human ensembl dataset
#######################################################################

human = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "hsapiens_gene_ensembl")
Humanorthologs <- getBM(c('ensembl_gene_id', 'hgnc_symbol', 'ensembl_peptide_id', 'entrezgene'), filters='ensembl_peptide_id', values=HumanProtIds, mart=human) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
Humanorthologs
dim(Humanorthologs)

## run the line below if need output in text file.
#write.table(Humanorthologs, file="ChatterjeeHumanOrthologs.txt", sep="\t", col.names = NA) ### export file in text format to the folder.


##############################################################################################
##########END ################################################################################
#############################################################################################

sessionInfo()


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"
with_tra_tsl,with Transcript Support Level (TSL)
with_tra_appris_pi,with APPRIS principal isoform annotation
with_tra_gencode_basic,with GENCODE basic annotation
with_ec_number,with EC number(s)
with_embl,with EMBL ID(s)


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page
chromosome_name,Chromosome Name,feature_page
start_position,Gene Start (bp),feature_page
end_position,Gene End (bp),feature_page
strand,Strand,feature_page
band,Band,feature_page


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YJR094W-A,RPL43B,YJR094W-A
YNL069C,RPL16B,YNL069C
YPL143W,RPL33A,YPL143W
YDL082W,RPL13A,YDL082W
YDL136W,RPL35B,YDL136W
YMR194W,RPL36A,YMR194W
YLR441C,RPS1A,YLR441C
YBR084C-A,RPL19A,YBR084C-A
YDR418W,RPL12B,YDR418W
YML026C,RPS18B,YML026C


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YJR094W-A,RPL43B,YJR094W-A
YNL069C,RPL16B,YNL069C
YPL143W,RPL33A,YPL143W
YDL082W,RPL13A,YDL082W
YDL136W,RPL35B,YDL136W
YMR194W,RPL36A,YMR194W
YLR441C,RPS1A,YLR441C
YBR084C-A,RPL19A,YBR084C-A
YDR418W,RPL12B,YDR418W
YML026C,RPS18B,YML026C


ensembl_gene_id,hgnc_symbol,ensembl_peptide_id,entrezgene
ENSG00000096150,RPS18,ENSP00000211372,6222
ENSG00000108298,RPL19,ENSP00000225430,6143
ENSG00000130255,RPL36,ENSP00000252543,25873
ENSG00000131469,RPL27,ENSP00000253788,6155
ENSG00000136942,RPL35,ENSP00000259469,11224
ENSG00000170889,RPS9,ENSP00000302896,6203
ENSG00000167526,RPL13,ENSP00000307889,6137
ENSG00000167526,RPL13,ENSP00000307889,606500
ENSG00000134419,RPS15A,ENSP00000318646,6210
ENSG00000089157,RPLP0,ENSP00000339027,6175


R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 14393)

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
[1] plyr_1.8.4           biomaRt_2.30.0       hom.Sc.inp.db_3.1.2 
[4] AnnotationDbi_1.36.2 IRanges_2.8.2        S4Vectors_0.12.2    
[7] Biobase_2.34.0       BiocGenerics_0.20.0  BiocInstaller_1.24.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10        magrittr_1.5        uuid_0.1-2         
 [4] R6_2.2.0            stringr_1.2.0       tools_3.3.1        
 [7] DBI_0.6-1           digest_0.6.12       crayon_1.3.2       
[10] IRdisplay_0.4.4     repr_0.12.0         bitops_1.0-6       
[13] RC

#### Willingham etal_2003

In [9]:
path <- "C:/Users/Sonali/Box Sync/Huntington Interactome/MutantHTT_Interactors/Yeast/MutantHttEnhancers_red"
setwd(path)

#####################################################
### Get the required libararies
#####################################################
library("AnnotationDbi")
library("hom.Sc.inp.db")
library("biomaRt")
library ("plyr") #### just in case.

########################################
###Biomart package installation####
#######################################
# source("http://bioconductor.org/biocLite.R")
# biocLite("biomaRt")
# 
# #########################################
# ##To view documentation for the version of this package ###
# ############################################################
# browseVignettes("biomaRt")
# 
# ###################################################
# ### code chunk number 2: biomaRt
# ###################################################
# library("biomaRt")
# listMarts()
# 
# 
# 
# ###################################################
# ### code chunk number 4: listDatasets
# ###################################################
# listDatasets(ensembl)

#############################################################
### Read the file containing the gene symbols of the organism
###############################################################

yeastEGIds <- read.table(file= "R_Input_Willingham_etal2003.txt", sep = "\t", header = TRUE)

# convert your Gene Symbols to ensemble gene id and ensembl protein id for your organism

yeast = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "scerevisiae_gene_ensembl") ### specify the dataset to use 

###################################################
### filters for the yeast dataset
###################################################
filters = listFilters(yeast)
filters[1:5,]
listFilters(yeast)

###################################################
### attributes for yeast dataset
###################################################
attributes = listAttributes(yeast)

attributes[1:5,]
listAttributes(yeast, page="feature_page")


########################################
## Get the attributes for yeast dataset 
#######################################


rawWillinghamENSProtIds <- getBM(c('ensembl_gene_id', 'external_gene_name', 'ensembl_peptide_id'), filters='wikigene_name', 'external_gene_name', values= yeastEGIds$HttEnhancergenes, mart=yeast) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
length (rawWillinghamENSProtIds)
dim (rawWillinghamENSProtIds)
class(rawWillinghamENSProtIds)
rawWillinghamENSProtIds


#########################################
# remove duplicate values from the ensembl protein id column
#######################################

WillinghamENSProtIds <- rawWillinghamENSProtIds[!duplicated(rawWillinghamENSProtIds[,3]),]
WillinghamENSProtIds
dim(WillinghamENSProtIds)

##############################################
# Using the Inparanoid function to find human orthologs in form of human ensembl protein IDs#
##################################################

rawHumanProtIds <- mget(unlist(WillinghamENSProtIds$ensembl_peptide_id) ,hom.Sc.inpHOMSA,ifnotfound=NA)

rawHumanProtIds
class(rawHumanProtIds)
HumanProtIds <- rawHumanProtIds[!is.na(rawHumanProtIds)] ## remove all the NA values from the list
HumanProtIds
length(HumanProtIds)

##########################################################################
## Find the ensembl gene id annd hgnc symbols for your human ensembl peptide id 
## using the getBM function and the human ensembl dataset
#######################################################################

human = useMart(host='dec2014.archive.ensembl.org',biomart = 'ENSEMBL_MART_ENSEMBL', dataset = "hsapiens_gene_ensembl")
Humanorthologs <- getBM(c('ensembl_gene_id', 'hgnc_symbol', 'ensembl_peptide_id', 'entrezgene'), filters='ensembl_peptide_id', values=HumanProtIds, mart=human) # convert Gene Symbol to ensemble gene id and ensembl protein id for your organism
Humanorthologs
dim(Humanorthologs)

## run the line below if need output in text file.
#write.table(Humanorthologs, file="WillinghamHumanOrthologs.txt", sep="\t", col.names = NA) ### export file in text format to the folder.


##############################################################################################
##########END ################################################################################
#############################################################################################

sessionInfo()


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"


name,description
chromosome_name,Chromosome name
start,Gene Start (bp)
end,Gene End (bp)
strand,Strand
chromosomal_region,"Chromosome Regions (e.g 1:100:10000:-1,1:100000:200000:1)"
with_tra_tsl,with Transcript Support Level (TSL)
with_tra_appris_pi,with APPRIS principal isoform annotation
with_tra_gencode_basic,with GENCODE basic annotation
with_ec_number,with EC number(s)
with_embl,with EMBL ID(s)


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page


name,description,page
ensembl_gene_id,Ensembl Gene ID,feature_page
ensembl_transcript_id,Ensembl Transcript ID,feature_page
ensembl_peptide_id,Ensembl Protein ID,feature_page
ensembl_exon_id,Ensembl Exon ID,feature_page
description,Description,feature_page
chromosome_name,Chromosome Name,feature_page
start_position,Gene Start (bp),feature_page
end_position,Gene End (bp),feature_page
strand,Strand,feature_page
band,Band,feature_page


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YGR055W,MUP1,YGR055W
YMR161W,HLJ1,YMR161W
YJR126C,VPS70,YJR126C
YDR272W,GLO2,YDR272W
YOR337W,TEA1,YOR337W
YOR032C,HMS1,YOR032C
YGR197C,SNG1,YGR197C
YHL019C,APM2,YHL019C
YBL052C,SAS3,YBL052C
YDR502C,SAM2,YDR502C


ensembl_gene_id,external_gene_name,ensembl_peptide_id
YGR055W,MUP1,YGR055W
YMR161W,HLJ1,YMR161W
YJR126C,VPS70,YJR126C
YDR272W,GLO2,YDR272W
YOR337W,TEA1,YOR337W
YOR032C,HMS1,YOR032C
YGR197C,SNG1,YGR197C
YHL019C,APM2,YHL019C
YBL052C,SAS3,YBL052C
YDR502C,SAM2,YDR502C


ensembl_gene_id,hgnc_symbol,ensembl_peptide_id,entrezgene
ENSG00000021488,SLC7A9,ENSP00000023064,11136
ENSG00000100983,GSS,ENSP00000216951,2937
ENSG00000162877,PM20D1,ENSP00000356104,148811


R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 14393)

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
[1] plyr_1.8.4           biomaRt_2.30.0       hom.Sc.inp.db_3.1.2 
[4] AnnotationDbi_1.36.2 IRanges_2.8.2        S4Vectors_0.12.2    
[7] Biobase_2.34.0       BiocGenerics_0.20.0  BiocInstaller_1.24.0

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10        magrittr_1.5        uuid_0.1-2         
 [4] R6_2.2.0            stringr_1.2.0       tools_3.3.1        
 [7] DBI_0.6-1           digest_0.6.12       crayon_1.3.2       
[10] IRdisplay_0.4.4     repr_0.12.0         bitops_1.0-6       
[13] RC