Skip to content

Commit

Permalink
Version 2.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
MohmedSoudy committed Sep 23, 2021
1 parent 1cddefe commit 9f55be1
Show file tree
Hide file tree
Showing 16 changed files with 295 additions and 288 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
Package: UniprotR
Title: Retrieving Information of Proteins from Uniprot
Version: 2.0.7
Version: 2.1.0
Authors@R: c(person("Mohamed", "Soudy", email = "MohmedSoudy2009@gmail.com", role=c("aut", "cre")), person("Ali", "Mostafa", email = "ali.mo.anwar@std.agr.cu.edu.eg", role = "aut"))
Author: Mohamed Soudy [aut, cre],
Ali Mostafa [aut]
Maintainer: Mohamed Soudy <MohmedSoudy2009@gmail.com>
Description: Connect to Uniprot <https://www.uniprot.org/> to retrieve information about proteins using their accession number such information could be name or taxonomy information, For detailed information kindly read the publication <https://www.sciencedirect.com/science/article/pii/S1874391919303859>.
License: GPL-3
Encoding: UTF-8
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
Imports: utils , grDevices , stats , grid , graphics , httr , plyr ,
dplyr , scales , magrittr , magick , data.tree , ggplot2 ,
tidyverse , gridExtra , ggpubr , curl, networkD3,
stringr , qdapRegex , htmlwidgets , alakazam (>= 1.0.0), gprofiler2, progress
tidyverse , gridExtra , ggpubr , curl, networkD3, stringr ,
qdapRegex , htmlwidgets , alakazam (>= 1.0.0), gprofiler2,
progress, ggsci
URL: https://github.com/Proteomicslab57357/UniprotR
BugReports: https://github.com/Proteomicslab57357/UniprotR/issues
NeedsCompilation: no
Packaged: 2021-04-06 10:17:45 UTC; RSH
Packaged: 2021-09-22 14:25:38 UTC; RSH
Repository: CRAN
Date/Publication: 2020-07-26 13:20:05 UTC
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import(stringr)
import(ggplot2)
import(data.tree)
import(ggpubr)
import(ggsci)
import(grid)
import(gridExtra)
import(networkD3)
Expand Down Expand Up @@ -69,6 +70,9 @@ export(Plot.GOSubCellular)
export(PlotAcidity)
export(PlotCharge)
export(PlotChromosomeInfo)
export(PlotEnrichedGO)
export(PlotEnrichedPathways)
export(PlotGOAll)
export(PlotGOBiological)
export(PlotGenesNetwork)
export(PlotGoInfo)
Expand Down
28 changes: 14 additions & 14 deletions R/GETSeqFastaUniprot.R
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
#' Connect and parse UniProt information.
#'
#' This Function is used to download sequences of given protein list in a fasta format
#' This Function is used to get Sequence information of accession/s from Uniprot as a Fasta file.
#'
#' @usage GETSeqFastaUniprot(Accessions, FileName = NULL)
#' @usage GETSeqFastaUniprot(Accessions,FilePath = NULL, FileName = NULL)
#'
#' @param Accessions list of Uniprot accessions
#' @param Accessions Vector of UniProt Accession/s
#'
#' @param FileName OUtput file name
#' @param FilePath path of directory to save the output fasta.
#'
#' @param FileName Name of the fasta file.
#'
#' @author Mohmed Soudy \email{Mohamed.soudy@57357.com} and Ali Mostafa \email{ali.mo.anwar@std.agr.cu.edu.eg}
#'
#' @export
GETSeqFastaUniprot <- function(Accessions, FileName = NULL)
GETSeqFastaUniprot <- function(Accessions, FilePath = NULL ,FileName = NULL)
{
OutNumber <- 0
message("Please wait we are processing your accessions ...")
pb <- progress::progress_bar$new(total = length(Accessions))

for (Acc in Accessions)
{
Request <- tryCatch(
{
GET(paste0("https://www.uniprot.org/uniprot/" , Acc , ".Fasta"))
GET(paste0("https://www.uniprot.org/uniprot/" , Acc , ".Fasta") , timeout(10))
},error = function(cond)
{
message("Internet connection problem occurs and the function will return the original error")
Expand All @@ -27,15 +31,11 @@ GETSeqFastaUniprot <- function(Accessions, FileName = NULL)
)
if (Request$status_code == 200)
{
OutNumber <<- OutNumber + 1
Fastadata <- read.csv(paste0("https://www.uniprot.org/uniprot/" , Acc , ".Fasta") , header = F , sep = "\t")
Sequences <- paste0(as.character(unlist(Fastadata)) , collapse = "\n")
if (!is.null(FileName))
{
write.table(x = Sequences , file = paste0(FileName ,".fasta") , quote = F , row.names = F , col.names = F, append = T)
}
write.table(x = Sequences , file = paste0(FileName ,".fasta") , quote = F , row.names = F , col.names = F, append = T)
pb$tick()
}

}
return(OutNumber)
}
}
2 changes: 1 addition & 1 deletion R/GetAccessionList.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
GetAccessionList <- function(DataObjPath)
{
DataSet <- read.csv(DataObjPath)
AccessionList <- as.array(as.character(DataSet[,1]))
AccessionList <- trimws(unique(as.character(DataSet[,1])))
return(AccessionList)
}

2 changes: 1 addition & 1 deletion R/GetProteinFunction.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ GetProteinFunction <- function(ProteinAccList , directorypath = NULL)
}
ProteinInfoParsed_total = data.frame()
baseUrl <- "http://www.uniprot.org/uniprot/"
Colnames = "ec,comment(ABSORPTION),comment(CATALYTIC ACTIVITY),chebi,chebi(Catalytic activity),chebi(Cofactor),chebi-id,comment(COFACTOR),comment(ENZYME REGULATION),comment(FUNCTION),comment(KINETICS),comment(PATHWAY),comment(REDOX POTENTIAL),comment(TEMPERATURE DEPENDENCE),comment(PH DEPENDENCE),feature(ACTIVE SITE),feature(BINDING SITE),feature(DNA BINDING),feature(METAL BINDING),feature(NP BIND),feature(SITE)"
Colnames = "ec,comment(ABSORPTION),comment(CATALYTIC ACTIVITY),chebi,chebi(Catalytic activity),chebi(Cofactor),chebi-id,comment(COFACTOR),comment(ACTIVITY REGULATION),comment(FUNCTION),comment(KINETICS),comment(PATHWAY),comment(REDOX POTENTIAL),comment(TEMPERATURE DEPENDENCE),comment(PH DEPENDENCE),feature(ACTIVE SITE),feature(BINDING SITE),feature(DNA BINDING),feature(METAL BINDING),feature(NP BIND),feature(SITE)"

message("Please wait we are processing your accessions ...")
pb <- progress::progress_bar$new(total = length(ProteinAccList))
Expand Down
59 changes: 59 additions & 0 deletions R/PlotEnrichedGO.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Connect and parse UniProt information
#'
#' This function is used to generate a combined plot for the enriched Gene Ontology terms
#'
#' @usage PlotEnrichedGO(Accs,OS="hsapiens",p_value=0.05,Path=NULL,theme="aaas",width=7,height=7)
#'
#' @param Accs Vector of UniProt Accession/s or genes
#'
#' @param OS organism name Example: human - 'hsapiens', mouse - 'mmusculus'
#'
#' @param p_value custom p-value threshold for significance, default = 0.05
#'
#' @param theme optional parameter to generate specific theme for journals ex: "aaas", "nature", "lancet", "jama"
#'
#' @param Path Path to save output plot
#'
#' @param width width of the generated plot
#'
#' @param height height of the generated plot
#'
#' @export
#'
#' @author Mohmed Soudy \email{Mohamed.soudy@57357.com} and Ali Mostafa \email{ali.mo.anwar@std.agr.cu.edu.eg}
#'
PlotEnrichedGO <- function(Accs,OS="hsapiens",p_value=0.05,Path=NULL,theme="aaas",width=7,height= 7)
{
Enr.data <- gost(Accs)

Enr.frame <- Enr.data$result

GOs <- Enr.frame[Enr.frame$source %in% c("GO:BP", "GO:CC", "GO:MF"),]
GOs$`-log10 (p)` <- -log10(GOs$p_value)

P <- ggbarplot(GOs, x = "term_name", y = "-log10 (p)",
fill = "source",
color = "white",
palette = "jco",
sort.val = "asc",
sort.by.groups = TRUE,
x.text.angle = 90
)
if (tolower(theme) == "aaas")
P <- P + scale_fill_aaas() + xlab("") + coord_flip()
if (tolower(theme) == "lancet")
P <- P + scale_fill_lancet() + xlab("") + coord_flip()
if (tolower(theme) == "jama")
P <- P + scale_fill_jama() + xlab("") + coord_flip()
if (tolower(theme) == "nature")
P <- P + scale_fill_nejm() + xlab("") + coord_flip()

if (!is.null(Path))
{
if (dim(GOs)[1] < 50)
ggsave(path = Path, filename = "Significant GOs.jpeg", plot = P,width = width, height = height, dpi = 300)
if (dim(GOs)[1] > 50)
ggsave(path = Path, filename = "Significant GO.jppeg", plot = P,width = width, height =height, dpi = 300)
}
plot(P)
}
59 changes: 59 additions & 0 deletions R/PlotEnrichedPathways.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Connect and parse UniProt information
#'
#' This function is used to generate a combined plot for the enriched pathways from KEGG and REACTOME
#'
#' @usage PlotEnrichedPathways(Accs,OS="hsapiens",p_value=0.05,Path=NULL,theme="aaas",w=w,h=h)
#'
#' @param Accs Vector of UniProt Accession/s or genes
#'
#' @param OS organism name Example: human - 'hsapiens', mouse - 'mmusculus'
#'
#' @param p_value custom p-value threshold for significance, default = 0.05
#'
#' @param theme optional parameter to generate specific theme for journals ex: "aaas", "nature", "lancet", "jama"
#'
#' @param Path Path to save output plot
#'
#' @param w width of the generated plot
#'
#' @param h height of the generated plot
#'
#' @export
#'
#' @author Mohmed Soudy \email{Mohamed.soudy@57357.com} and Ali Mostafa \email{ali.mo.anwar@std.agr.cu.edu.eg}
#'
PlotEnrichedPathways <- function(Accs,OS="hsapiens",p_value=0.05,Path=NULL,theme="aaas",w=w,h=h)
{
Enr.data <- gost(Accs)

Enr.frame <- Enr.data$result

Pathways <- Enr.frame[Enr.frame$source %in% c("KEGG", "REAC"),]
Pathways$`-log10 (p)` <- -log10(Pathways$p_value)

P <- ggbarplot(Pathways, x = "term_name", y = "-log10 (p)",
fill = "source",
color = "white",
palette = "jco",
sort.val = "asc",
sort.by.groups = TRUE,
x.text.angle = 90
)
if (tolower(theme) == "aaas")
P <- P + scale_fill_aaas() + xlab("") + coord_flip()
if (tolower(theme) == "lancet")
P <- P + scale_fill_lancet() + xlab("") + coord_flip()
if (tolower(theme) == "jama")
P <- P + scale_fill_jama() + xlab("") + coord_flip()
if (tolower(theme) == "nature")
P <- P + scale_fill_nejm() + xlab("") + coord_flip()

if (!is.null(Path))
{
if (dim(Pathways)[1] < 50)
ggsave(path = Path, filename = "Significant Pathways.jpeg", plot = P,width = w, height = h, dpi = 300)
if (dim(Pathways)[1] > 50)
ggsave(path = Path, filename = "Significant Pathways.jpeg", plot = P,width = w, height = h, dpi = 300)
}
plot(P)
}
62 changes: 62 additions & 0 deletions R/PlotGOAll.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Connect and parse UniProt information.
#'
#' This Function is used to plot the retrieved Gene Ontology from function 'GetProteinGOInfo'.
#'
#' @usage PlotGOAll(GOObj, Top = 10, directorypath = NULL, width = width, height = height)
#'
#' @param GOObj Dataframe returned from UniprotR Function "GetProteinGOInfo"
#'
#' @param Top Number of molecular functions to be visualized
#'
#' @param directorypath path to save Output plot.
#'
#' @param width width of the generated plot
#'
#' @param height height of the generated plot
#'
#' @author Mohmed Soudy \email{Mohamed.soudy@57357.com} and Ali Mostafa \email{ali.mo.anwar@std.agr.cu.edu.eg}
#'
#' @export
#'
PlotGOAll <- function(GOObj, Top = 10, directorypath = NULL, width = width, height = height)
{
BiologicalDF <- Goparse(GOObj, 3)
if (dim(BiologicalDF)[1] < 10)
Top <- dim(BiologicalDF)[1]
BiologicalDF <- BiologicalDF[1:Top, ]
BiologicalDF <- na.omit(BiologicalDF)
BiologicalDF$source <- "BP"

CellularDF <- Goparse(GOObj, 5)
if (dim(CellularDF)[1] < 10)
Top <- dim(CellularDF)[1]
CellularDF <- CellularDF[1:Top, ]
CellularDF <- na.omit(CellularDF)
CellularDF$source <- "CC"


MolecularDF <- Goparse(GOObj, 4)
if (dim(MolecularDF)[1] < 10)
Top <- dim(MolecularDF)[1]
MolecularDF <- MolecularDF[1:Top, ]
MolecularDF <- na.omit(MolecularDF)
MolecularDF$source <- "MF"

GO.terms <- rbind(BiologicalDF, CellularDF)
GO.terms <- rbind(GO.terms, MolecularDF)

P <- ggbarplot(GO.terms, x = "Goterm", y = "Count",
fill = "source",
color = "white",
palette = "jco",
sort.val = "asc",
sort.by.groups = TRUE,
x.text.angle = 90
)
P <- P + scale_fill_lancet() + coord_flip()
if(!is.null(directorypath))
{
ggsave(plot = P, filename = paste0(directorypath, "/", "GO All.jpeg"), device = "jpeg", width = width, height = height)
}
plot(P)
}
32 changes: 0 additions & 32 deletions R/PlotSummaryInfo.R

This file was deleted.

Loading

0 comments on commit 9f55be1

Please sign in to comment.