Skip to content

Commit

Permalink
Documentation Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
astrasb committed Oct 13, 2020
1 parent 2025ea2 commit f1e71c0
Show file tree
Hide file tree
Showing 29 changed files with 4,224 additions and 4,679 deletions.
File renamed without changes.
62 changes: 62 additions & 0 deletions Data/Sr_top50_usage_counts.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
AA,Codon,Count
Ala,GCA,156
Ala,GCC,165
Ala,GCG,1
Ala,GCT,609
Cys,TGT,185
Cys,TGC,60
Asp,GAC,139
Asp,GAT,450
Glu,GAA,586
Glu,GAG,100
Phe,TTC,215
Phe,TTT,197
Gly,GGA,675
Gly,GGC,23
Gly,GGG,8
Gly,GGT,358
His,CAC,91
His,CAT,127
Ile,ATA,67
Ile,ATC,171
Ile,ATT,425
Lys,AAA,660
Lys,AAG,244
Leu,CTA,7
Leu,CTC,60
Leu,CTG,3
Leu,CTT,413
Leu,TTA,123
Leu,TTG,97
Met,ATG,237
Asn,AAC,157
Asn,AAT,310
Pro,CCA,680
Pro,CCC,11
Pro,CCG,3
Pro,CCT,81
Gln,CAA,391
Gln,CAG,13
Arg,AGA,218
Arg,CGA,10
Arg,AGG,10
Arg,CGC,29
Arg,CGG,1
Arg,CGT,283
Ser,AGC,31
Ser,TCA,220
Ser,TCC,55
Ser,AGT,86
Ser,TCG,13
Ser,TCT,320
Thr,ACA,237
Thr,ACC,119
Thr,ACG,7
Thr,ACT,317
Val,GTA,139
Val,GTC,117
Val,GTG,6
Val,GTT,489
Trp,TGG,98
Tyr,TAC,136
Tyr,TAT,217
52 changes: 52 additions & 0 deletions Data/calculate_codon_usage_rules.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# This script takes species-specific counts of codon occurances and calculates the frequency each codon "i" encodes amino acid "AA"
# These values will be passed to the quantification of relative adaptiveness, the first step for calculating codon adaptation index.

# Generate tibble with stop codon codes
stop_cdns <- tibble(AA = factor("*","*","*"),
Codon = c("TAA", "TAG", "TGA"),
Frequency = c(0,0,0))

# Load S. ratti count data
# Source: Mitreva et al 2006; counts taken from 50 most common expressed sequence tag clusters (putative genes)
Sr.dat <- read_csv('Sr_top50_usage_counts.csv',
quote = "",
col_types = 'fcd')

Sr.codon.freq <- Sr.dat %>%
dplyr::mutate(AA = seqinr::a(AA)) %>%
dplyr::arrange(AA, Codon) %>%
dplyr::mutate(AA = factor(AA)) %>%
group_by(AA) %>%
dplyr::mutate (Frequency = Count / sum(Count)) %>%
dplyr::mutate (Frequency = Frequency *100) %>%
dplyr::mutate (Frequency = signif(Frequency, digits = 9)) %>%
dplyr::full_join(stop_cdns, by = c("AA", "Codon", "Frequency")) %>%
dplyr::rename("Sr_optimal" = "Frequency") %>%
dplyr::select(!Count)

# Load C. elegans count data
# Soruce: Sharp and Bradnam, 1997; https://www.ncbi.nlm.nih.gov/books/NBK20194/
Ce.dat <- read_csv('Ce_usage_counts.csv',
quote = "",
col_types = 'ccd'
)

Ce.codon.freq <- Ce.dat %>%
dplyr::mutate(AA = seqinr::a(AA)) %>%
dplyr::arrange(AA, Codon) %>%
dplyr::mutate(AA = factor(AA)) %>%
group_by(AA) %>%
dplyr::mutate (Frequency = Count / sum(Count)) %>%
dplyr::mutate (Frequency = Frequency *100) %>%
dplyr::mutate (Frequency = signif(Frequency, digits = 9)) %>%
dplyr::full_join(stop_cdns, by = c("AA", "Codon", "Frequency")) %>%
dplyr::rename("Ce_optimal" = "Frequency") %>%
dplyr::select(!Count)

codon_usage_chart <- dplyr::full_join(Sr.codon.freq,
Ce.codon.freq,
by = c("AA", "Codon")
)

write_csv(codon_usage_chart,
path = "codon_usage_chart.csv")
1,544 changes: 1,544 additions & 0 deletions Offline Analysis/Ce_chemoreceptors.csv

Large diffs are not rendered by default.

1,257 changes: 1,257 additions & 0 deletions Offline Analysis/Ce_chemosensory_cDNA_list.csv

Large diffs are not rendered by default.

63 changes: 42 additions & 21 deletions Offline Analysis/Chemoreceptor_Codon_Adaptiveness.Rmd
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
---
title: "Codon Usage Chemoreceptor Adaptiveness"
date: "10/2/2020"
date: "10/7/2020"
output:
html_document:
code_folding: hide
pdf_document:
df_print: paged
toc: yes
html_notebook:
toc: yes
toc: true
toc_depth: 3
number_sections: true

---

# Introduction
Expand All @@ -19,7 +19,7 @@ Full lists of CDS sequences for *S. stercoralis*, *S. ratti*, *S. papillosus*, *

For user-defined genes-of-interest, a list of *S. stercoralis* chemoreceptor genes was used as input to the *Strongyloides* Codon Adapter App; the generated excel report is uploaded below.

```{r setup, include=FALSE}
```{r setup, echo=FALSE}
suppressPackageStartupMessages({
library(knitr)
library(rmarkdown)
Expand All @@ -31,22 +31,23 @@ suppressPackageStartupMessages({
library(biomaRt)
library(ggplot2)
})
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
```

## Parse Species-specific lists of chemoreceptors
Cleaning and data wrangling for chemoreceptor gene lists; necessary prepreocessing step before running lists of genes through the *Strongyloides* Codon Adapter App. Gene lists were downloaded from a Shiny app provided for this purpose by [Wheeler *et al* 2020](https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3000723). Species for which we have lists of chemoreceptors are: *C. elegans*, *S. stercoralis*, *S. ratti*, and *S. venezuelensis*. Data are saved as 2-column .csv files containing geneIDs and cDNA sequences; these files can be used as inputs to the *Strongyloides* Codon Adapter App.

```{r cleanGeneLists, eval = F}
# C. elegans ----
temp <- c(Ce = '../Data/Ce_chemoreceptors.csv')
temp <- c(Ce = 'Data/Ce_chemoreceptors.csv')
genelist.Ce <- suppressWarnings(read.csv(temp,
header = TRUE,
colClasses = "character",
strip.white = T)) %>%
as_tibble()
## For C. elegans, match wormbase gene sequence name to the wbsp transcript id, and pull the cDNA sequence
## For C. elegans, match wormbase gene sequence name to the wbsp transcript id,
## and pull the cDNA sequence
Ce.tr.seq <- getBM(attributes=c('wbps_transcript_id', 'cdna'),
# grab the cDNA sequences for the given genes from WormBase Parasite
mart = useMart(biomart="parasite_mart",
Expand All @@ -63,18 +64,23 @@ Ce.tr.seq <- getBM(attributes=c('wbps_transcript_id', 'cdna'),
dplyr::rename(geneID = wbps_transcript_id, cDNA = cdna)
Ce.tr.seq$cDNA <- tolower(Ce.tr.seq$cDNA)
write.table(Ce.tr.seq, file = "./Ce_chemosensory_cDNA_list.csv", sep = ",", col.names = FALSE, row.names = FALSE)
write.table(Ce.tr.seq,
file = "./Ce_chemosensory_cDNA_list.csv",
sep = ",",
col.names = FALSE,
row.names = FALSE)
# S. ratti ----
temp <- c(Sr = '../Data/Sr_chemoreceptors.csv')
temp <- c(Sr = 'Data/Sr_chemoreceptors.csv')
genelist.Sr <- suppressWarnings(read.csv(temp,
header = TRUE,
colClasses = "character",
strip.white = T)) %>%
as_tibble()
## For ratti, match wormbase gene sequence name to the wbsp transcript id, and pull the cDNA sequence
## For ratti, match wormbase gene sequence name to the wbsp transcript id,
## and pull the cDNA sequence
Sr.tr.seq <- getBM(attributes=c('wbps_transcript_id', 'cdna'),
# grab the cDNA sequences for the given genes from WormBase Parasite
mart = useMart(biomart="parasite_mart",
Expand All @@ -91,18 +97,23 @@ Sr.tr.seq <- getBM(attributes=c('wbps_transcript_id', 'cdna'),
dplyr::rename(geneID = wbps_transcript_id, cDNA = cdna)
Sr.tr.seq$cDNA <- tolower(Sr.tr.seq$cDNA)
write.table(Sr.tr.seq, file = "./Sr_chemosensory_cDNA_list.csv", sep = ",", col.names = FALSE, row.names = FALSE)
write.table(Sr.tr.seq,
file = "./Sr_chemosensory_cDNA_list.csv",
sep = ",",
col.names = FALSE,
row.names = FALSE)
# S. venezuelensis ----
temp <- c(Sv = '../Data/Sv_chemoreceptors.csv')
temp <- c(Sv = 'Data/Sv_chemoreceptors.csv')
genelist.Sv <- suppressWarnings(read.csv(temp,
header = TRUE,
colClasses = "character",
strip.white = T)) %>%
as_tibble()
## For ratti, match wormbase gene sequence name to the wbsp transcript id, and pull the cDNA sequence
## For venezuelnsis, match wormbase gene sequence name to the wbsp transcript id,
## and pull the cDNA sequence
Sv.tr.seq <- getBM(attributes=c('wbps_transcript_id', 'cdna'),
# grab the cDNA sequences for the given genes from WormBase Parasite
mart = useMart(biomart="parasite_mart",
Expand All @@ -119,7 +130,11 @@ Sv.tr.seq <- getBM(attributes=c('wbps_transcript_id', 'cdna'),
dplyr::rename(geneID = wbps_transcript_id, cDNA = cdna)
Sv.tr.seq$cDNA <- tolower(Sv.tr.seq$cDNA)
write.table(Sv.tr.seq, file = "./Sv_chemosensory_cDNA_list.csv", sep = ",", col.names = FALSE, row.names = FALSE)
write.table(Sv.tr.seq,
file = "./Sv_chemosensory_cDNA_list.csv",
sep = ",",
col.names = FALSE,
row.names = FALSE)
```


Expand Down Expand Up @@ -286,7 +301,10 @@ cai_plot <- ggplot(tbl, aes(Sr_CAI, Ce_CAI, species)) +
geom_point(dat.GoI.df, mapping = aes(Sr_CAI, Ce_CAI, color = species),
show.legend = F,
shape = 1, size = 2, alpha = 1) +
scale_color_manual(values = c("seagreen4", "coral4", "darkgoldenrod4", "darkorchid4"))+
scale_color_manual(values = c("seagreen4",
"coral4",
"darkgoldenrod4",
"darkorchid4"))+
geom_hline(yintercept = 0.5, color = "grey", linetype = 2) +
geom_vline(xintercept = 0.5, color = "grey", linetype = 2) +
Expand All @@ -298,11 +316,14 @@ cai_plot <- ggplot(tbl, aes(Sr_CAI, Ce_CAI, species)) +
# shape = 1, size = 2, alpha = 1, color = "coral4") +
facet_grid(~species) +
labs(title = "Species-specific codon adaptiveness",
subtitle = "colored icons = species-specific chemoreceptors; black icons = all coding sequences
",
subtitle = paste("colored icons = species-specific chemoreceptors;",
"black icons = all coding sequences
"),
x = "Codon bias relative to \n S. ratti usage rules (CAI)",
y = "Codon Bias relative to \n C. elegans usage rules (CAI)",
caption = "Blue line/shading = linear regression \n w/ 95% confidence regions; \n formula = y ~ x") +
caption = "Blue line/shading = linear regression
w/ 95% confidence regions;
formula = y ~ x") +
coord_equal(xlim = c(0,1), ylim = c(0,1)) +
theme_bw() +
theme(plot.title.position = "plot",
Expand Down
2,338 changes: 0 additions & 2,338 deletions Offline Analysis/Chemoreceptor_Codon_Adaptiveness.html

This file was deleted.

2,113 changes: 0 additions & 2,113 deletions Offline Analysis/Chemoreceptor_Codon_Adaptiveness.nb.html

This file was deleted.

Binary file not shown.
Binary file modified Offline Analysis/ChemosensoryCodonUsagePlot.pdf
Binary file not shown.
Binary file modified Offline Analysis/CodonAdaptivenessDistributions.pdf
Binary file not shown.
Loading

0 comments on commit f1e71c0

Please sign in to comment.