.Rhistory

install.packages("tidyverse")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("SeqArray")
BiocManager::install("SeqArray")
BiocManager::install("SNPRelate")
vcftools --vcf RAD_data/OL_subset.vcf --missing-indv --out RAD_data/OL_subset
setwd("/Users/Jason/github/marineomics.github.io")
rmarkdown::render_site()
rmarkdown::render_site()
install.packages(DESeq2)
install.packages('DESeq2')
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
, "rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
#install.packages(p)
}
library(p, character.only=TRUE)
}))
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
, "rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
"rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("DESeq2","edgeR","arrayQualityMetrics"))
rmarkdown::render_site()
install.packages('Hmisc')
rmarkdown::render_site()
install.packages('Hmisc')
install.packages('Hmisc')
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
"rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
install.packages('XQuartz')
devtools::install_github("natverse/nat")
if (!require("devtools")) install.packages("devtools")
# then install nat
devtools::install_github("natverse/nat")
rmarkdown::render_site()
invisible(lapply(c( "tidyverse", "ape", "vegan", "GGally",
"rgl", "adegenet", "MASS",
"data.table", "plyr", "lmtest", "reshape2", "Rmisc", "lmerTest","statmod"),
function(p){
if(! p %in% rownames(installed.packages())) {
install.packages(p)
}
library(p, character.only=TRUE)
}))
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
library(knitcitations)
library(kableExtra)
opts_chunk$set(fig.width = 10,
fig.height = 5,
cache = FALSE)
cite_options(citation_format = "pandoc", max.names = 3, style = "html",
hyperlink = "to.doc")
knitr::include_graphics("submissions_instructions/Rivera_etal_fig.png")
getwd()
?normalizePath
?render_website
??render_website
??render
rmarkdown::render_site()
getwd()
vcftools --vcf RAD_data/OL_subset.vcf --missing-indv --out RAD_data/OL_subset
#using previously loaded gdsin object
print("Per variant: ")
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
library(SeqArray) # efficient storage and filtering of genomic data
library(tidyverse) # plotting data formatting and manipulation
library(SNPRelate) # PCA and other popgen analyses
# This code reads in a comma-delimited STRATA/meta-data file, randomises samples, and returns a file with wells and plates and duplicates which may be used to assist with plating libraries
# ** TODO** will need to consider where to duplicate samples - need to consider STARTA, DOC, ect..?
# maybe have user write in a list of samples to replicate
wells = 96 # how many wells can you use on your plate?
data.in = read.table("./data/example.metadata.csv", sep = ",", header = T) # each sample should be in its own ROW
filename = "OL_subset" #replace with your file name
filename.gds = paste0("RAD_data/", paste0(filename, ".gds"))
filename.vcf = paste0("RAD_data/", paste0(filename, ".vcf"))
# 1 . Convert VCF to GDS
SeqArray::seqVCF2GDS(vcf.fn = filename.vcf, out.fn = filename.gds, storage.option="ZIP_RA")
gdsin = SeqArray::seqOpen(filename.gds)
print(paste0("The number of SAMPLES in data: ", length(c(SeqArray::seqGetData(gdsin, "sample.id")))))
print(paste0("The number of SNPs in data: ",  length(c(SeqArray::seqGetData(gdsin, "variant.id")))))
metafile = "RAD_data/OL.popmap"
sample.ids = seqGetData(gdsin, "sample.id")
sample.strata =  read.table(metafile, header = T, sep = "\t") %>%
dplyr::select(ID, STRATA, PLATE)
#using previously loaded gdsin object
print("Per variant: ")
summary(m1 <- SeqArray::seqMissing(gdsin, per.variant=TRUE))
print("Per sample: ")
summary(m2 <- SeqArray::seqMissing(gdsin, per.variant=FALSE))
samples <- SeqArray::seqGetData(gdsin, "sample.id")
cbind(samples,m2)[order(-m2),]
#plot histogram
hist(m2,breaks=50)
View(gdsin)
gdsin[["root"]]
install.packages("shiny")
install.packages("learnr")
Cvgff <- read.csv("DRAFT_Funct_Enrich/annotGCF_002022765.2_C_virginica-3.0_genomic.gff")
Cvgff <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff", )
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff", comment.char = "#")
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff")
View(Cvgff)
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff", comment.char=#)
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff", comment.char="#")
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff", comment.char=##)
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff")
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff", fill=TRUE,comment.char=#)
library(data.table)
Cvgff <- fread("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff", fill=TRUE)
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("rtracklayer")
# Load the library
library(rtracklayer)
# Define the file path to your GFF file
file_path <- "DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_genomic.gff"
# Import the GFF file
gff_data <- import(file_path)
# Convert GRanges object to a DataFrame for easier manipulation
gff_data_frame <- as.data.frame(gff_data)
View(gff_data_frame)
gff_data_frame %>%
select(type, gene)
library(tidyverse)
gff_data_frame %>%
select(type, gene)
gff_data_frame %>%
filter(type = gene)
gff_data_frame %>%
filter(type == gene)
gff_data_frame %>%
filter(type == "gene")
cds <- gff_data_frame %>%
filter(type == "CDS")
View(cds)
View(cds)
cds <- gff_data_frame %>%
filter(gbkey == "CDS" && source == "Gnomon")
cds <- gff_data_frame %>%
filter(gbkey == "CDS" | source == "Gnomon")
cds <- gff_data_frame %>%
filter(gbkey == "CDS", source == "Gnomon")
cds <- gff_data_frame %>%
filter(gbkey == "CDS", source == "Gnomon")
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab")
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', row.names=NULL)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', row.names=NULL)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', row.names=NULL)
View(cds)
View(cdsftab)
View(cdsftab)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', header = FALSE, row.names=NULL)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = ' ', header = FALSE, row.names=NULL)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', header = FALSE, row.names=NULL)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '] [', header = FALSE, row.names=NULL)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = "] [", header = FALSE, row.names=NULL)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', header = FALSE, row.names=NULL)
df <- cdsftab %>%
mutate(text = str_remove_all(text, "\\[|\\]")) %>%
separate(text, into = c("gene", "db_xref", "protein", "protein_id"), sep = "\\] \\[", remove = FALSE) %>%
mutate(across(gene:protein_id, ~str_split(., "=", simplify = TRUE)[,2]))
df <- cdsftab %>%
mutate(data = str_replace_all(data, "\\[|\\]", "")) %>%
separate(data, into = c("gene", "db_xref", "protein", "protein_id"), sep = " ", remove = FALSE) %>%
mutate(across(gene:protein_id, ~str_split(., "=", simplify = TRUE)[,2]))
blast <- read.csv("DRAFT_Funct_Enrich/annot/Cvir_transcds-uniprot_blastp.tab")
View(blast)
blast <- read.csv("DRAFT_Funct_Enrich/annot/Cvir_transcds-uniprot_blastp.tab", sep )
blast <- read.csv("DRAFT_Funct_Enrich/annot/Cvir_transcds-uniprot_blastp.tab", sep = '\t')
bedtools getfasta [OPTIONS] -fi <input FASTA> -bed <BED/GFF/VCF>ex
View(blast)
View(blast)
blast <- read.csv("DRAFT_Funct_Enrich/annot/Cvir_transcds-uniprot_blastp.tab", sep = '\t')
blast <- read.csv("DRAFT_Funct_Enrich/annot/Cvir_transcds-uniprot_blastp.tab", sep = '\t', header = FALSE)
blast <- read.csv("DRAFT_Funct_Enrich/annot/Cvir_transcds-uniprot_blastp.tab", sep = '\t', header = FALSE)
View(blast)
left_join(blast, cdsftab, by = V1)
left_join(blast, cdsftab, by = V1)
left_join(blast, cdsftab, by = "V1")
df < - left_join(blast, cdsftab, by = "V1")
df < - left_join(blast, cdsftab, by = "V1")
df <- left_join(blast, cdsftab, by = "V1")
View(df)
View(df)
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(text, "(?<=\\[gene=)\\w+"))
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract("(?<=\\[gene=)\\w+"))
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+"))
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.y)
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.y)
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x)
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)"))
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE)
df <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE)
gene-spid <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE)
left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE) %>%
select(SPID) %>%
write.table(file = "DRAFT_Funct_Enrich/annot/SPID.txt", sep = "\t", row.names = FALSE
)
library(tidyverse)
g.spid <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE)
reticulate::repl_python()
left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE) %>%
select(SPID) %>%
write.table(file = "DRAFT_Funct_Enrich/annot/SPID.txt", sep = "\t", row.names = FALSE
)
reticulate::repl_python()
reticulate::repl_python()
reticulate::repl_python()
left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE) %>%
select(SPID) %>%
write.table(file = "DRAFT_Funct_Enrich/annot/SPID.txt", sep = "\t", row.names = FALSE, quote = FALSE
)
reticulate::repl_python()
reticulate::repl_python()
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', header = FALSE, row.names=NULL)
blast <- read.csv("DRAFT_Funct_Enrich/annot/Cvir_transcds-uniprot_blastp.tab", sep = '\t', header = FALSE)
cdsftab <- read.csv("DRAFT_Funct_Enrich/annot/GCF_002022765.2_C_virginica-3.0_translated_cds.tab", sep = '\t', header = FALSE, row.names=NULL)
g.spid <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE)
library(tidyverse)
g.spid <- left_join(blast, cdsftab, by = "V1") %>%
mutate(gene = str_extract(V2.y, "(?<=\\[gene=)\\w+")) %>%
select(gene, V11, V2.x) %>%
mutate(SPID = str_extract(V2.x, "(?<=\\|)[^\\|]*(?=\\|)")) %>%
distinct(gene, SPID, .keep_all = TRUE)
View(g.spid)
reticulate::repl_python()
reticulate::repl_python()
reticulate::repl_python()
reticulate::py_last_error()
View(g.spid)
reticulate::repl_python()
library(tidyverse)
library(reticulate)
knitr::opts_chunk$set(engine.path = '/usr/bin/python3')
reticulate::repl_python()
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
reticulate::repl_python()
reticulate::repl_python()
reticulate::repl_python()
knitr::opts_chunk$set(engine.path = '/usr/bin/python3')
reticulate::repl_python()
knitr::opts_chunk$set(engine.path = '/usr/bin/python3')
reticulate::repl_python()
reticulate::repl_python()
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
python DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py #DRAFT_Funct_Enrich/annot/SPID.txt
getwd
getwd()
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
python DRAFT_Funct_Enrich/annot/uniprot-retrieval.py
reticulate::repl_python()
reticulate::repl_python()
getwd()
getwd()
library(tidyverse)
library(reticulate)
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
`DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt`
knitr::opts_chunk$set(engine.path = '/usr/bin/python3')
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
reticulate::repl_python()
reticulate::py_last_error()
reticulate::repl_python()
knitr::opts_chunk$set(engine.path = '/usr/bin/python3')
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
python DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
library(tidyverse)
library(reticulate)
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
reticulate::source_python('/home/shared/8TB_HDD_01/sr320/github/marineomics.github.io/DRAFT_Funct_Enrich/annot/uniprot-retrieval.py')
reticulate::repl_python()
import gzip
import gzip
import gzip
reticulate::repl_python()
DRAFT_Funct_Enrich/annot/uniprot-retrieval.py DRAFT_Funct_Enrich/annot/SPID.txt
reticulate::repl_python()
reticulate::repl_python()
install.packages("reticulate")
?include_graphics
R.Version()