Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/McGill-CSB/RNApyro
Browse files Browse the repository at this point in the history
  • Loading branch information
yannponty committed Oct 5, 2012
2 parents 2769ec8 + eb8eade commit ea5e7d8
Show file tree
Hide file tree
Showing 12 changed files with 675 additions and 3,614 deletions.
108 changes: 104 additions & 4 deletions Recomb/RNApyro.bib
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ @article{Waldispuhl2008
number = {8},
pages = {e1000124},
title = {{Efficient Algorithms for Probing the RNA Mutation Landscape}},
url = {http://dx.plos.org/10.1371/journal.pcbi.1000124},
volume = {4},
year = {2008}
}
Expand All @@ -36,7 +35,6 @@ @article{Turner2010
pages = {D280--2},
pmid = {19880381},
title = {{NNDB: the nearest neighbor parameter database for predicting stability of nucleic acid secondary structure.}},
url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2808915\&tool=pmcentrez\&rendertype=abstract},
volume = {38},
year = {2010}
}
Expand All @@ -56,7 +54,6 @@ @article{Stombaugh2009
pages = {2294--312},
pmid = {19240142},
title = {{Frequency and isostericity of RNA base pairs.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/19240142},
volume = {37},
year = {2009}
}
Expand All @@ -73,7 +70,110 @@ @article{Lari1990
number = {1},
pages = {35--56},
title = {{The estimation of stochastic context-free grammars using the Inside-Outside algorithm}},
url = {http://www.sciencedirect.com/science/article/pii/088523089090022X http://linkinghub.elsevier.com/retrieve/pii/088523089090022X},
volume = {4},
year = {1990}
}
Automatically generated by Mendeley 1.6
Any changes to this file will be lost if it is regenerated by Mendeley.
@article{Olsen1986,
author = {Olsen, G J and Lane, D J and Giovannoni, S J and Pace, N R and Stahl, D a},
doi = {10.1146/annurev.mi.40.100186.002005},
file = {:Users/zenon/Documents/Mendeley/Olsen et al/1986/Annual review of microbiology/Olsen et al.\_1986\_Microbial ecology and evolution a ribosomal RNA approach.pdf:pdf},
issn = {0066-4227},
journal = {Annual review of microbiology},
keywords = {Bacteria,Bacteria: classification,Bacteria: genetics,Base Sequence,Chromosome Mapping,DNA, Recombinant,Nucleic Acid Hybridization,Phylogeny,RNA, Bacterial,RNA, Bacterial: analysis,RNA, Ribosomal,RNA, Ribosomal: analysis},
month = jan,
pages = {337--65},
pmid = {2430518},
title = {{Microbial ecology and evolution: a ribosomal RNA approach.}},
volume = {40},
year = {1986}
}
Automatically generated by Mendeley 1.6
Any changes to this file will be lost if it is regenerated by Mendeley.
@article{Olsen1993,
author = {Olsen, GJ and Woese, CR},
file = {:Users/zenon/Documents/Mendeley/Olsen, Woese/1993/The FASEB journal/Olsen, Woese\_1993\_Ribosomal RNA a key to phylogeny.pdf:pdf},
journal = {The FASEB journal},
number = {1},
pages = {113--123},
title = {{Ribosomal RNA: a key to phylogeny.}},
volume = {7},
year = {1993}
}
Automatically generated by Mendeley 1.6
Any changes to this file will be lost if it is regenerated by Mendeley.
@article{Zuckerkandl1965,
author = {Zuckerkandl, Emile and Pauling, Linus},
doi = {10.1016/0022-5193(65)90083-4},
file = {:Users/zenon/Documents/Mendeley/Zuckerkandl, Pauling/1965/Journal of Theoretical Biology/Zuckerkandl, Pauling\_1965\_Molecules as documents of evolutionary history.pdf:pdf},
issn = {00225193},
journal = {Journal of Theoretical Biology},
month = mar,
number = {2},
pages = {357--366},
title = {{Molecules as documents of evolutionary history}},
volume = {8},
year = {1965}
}
Automatically generated by Mendeley 1.6
Any changes to this file will be lost if it is regenerated by Mendeley.
@article{Turnbaugh2007,
abstract = {A strategy to understand the microbial components of the human genetic and metabolic landscape and how they contribute to normal physiology and predisposition to disease.},
author = {Turnbaugh, Peter J and Ley, Ruth E and Hamady, Micah and Fraser-Liggett, Claire M and Knight, Rob and Gordon, Jeffrey I},
doi = {10.1038/nature06244},
file = {:Users/zenon/Documents/Mendeley/Turnbaugh et al/2007/Nature/Turnbaugh et al.\_2007\_The human microbiome project.pdf:pdf},
issn = {1476-4687},
journal = {Nature},
keywords = {Animals,Biodiversity,Genome, Bacterial,Genome, Bacterial: genetics,Genomics,Humans,Intestines,Intestines: cytology,Intestines: immunology,Intestines: microbiology,Metagenome,Metagenome: genetics,Metagenome: immunology,Mice,Sequence Analysis, DNA},
month = oct,
number = {7164},
pages = {804--10},
pmid = {17943116},
title = {{The human microbiome project.}},
volume = {449},
year = {2007}
}
Automatically generated by Mendeley 1.6
Any changes to this file will be lost if it is regenerated by Mendeley.
@article{Huse2007,
abstract = {Massively parallel pyrosequencing systems have increased the efficiency of DNA sequencing, although the published per-base accuracy of a Roche GS20 is only 96\%. In genome projects, highly redundant consensus assemblies can compensate for sequencing errors. In contrast, studies of microbial diversity that catalogue differences between PCR amplicons of ribosomal RNA genes (rDNA) or other conserved gene families cannot take advantage of consensus assemblies to detect and minimize incorrect base calls.},
author = {Huse, Susan M and Huber, Julie a and Morrison, Hilary G and Sogin, Mitchell L and Welch, David Mark},
doi = {10.1186/gb-2007-8-7-r143},
file = {:Users/zenon/Documents/Mendeley/Huse et al/2007/Genome biology/Huse et al.\_2007\_Accuracy and quality of massively parallel DNA pyrosequencing.pdf:pdf},
issn = {1465-6914},
journal = {Genome biology},
keywords = {DNA, Ribosomal,DNA, Ribosomal: chemistry,DNA, Ribosomal: genetics,Genes, rRNA,Genes, rRNA: genetics,Genetic Variation,Genetics, Microbial,Genetics, Microbial: methods,Genome,Genome: genetics,Reproducibility of Results,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
month = jan,
number = {7},
pages = {R143},
pmid = {17659080},
title = {{Accuracy and quality of massively parallel DNA pyrosequencing.}},
volume = {8},
year = {2007}
}
Automatically generated by Mendeley 1.6
Any changes to this file will be lost if it is regenerated by Mendeley.
@article{Kunin2010,
abstract = {Massively parallel pyrosequencing of the small subunit (16S) ribosomal RNA gene has revealed that the extent of rare microbial populations in several environments, the 'rare biosphere', is orders of magnitude higher than previously thought. One important caveat with this method is that sequencing error could artificially inflate diversity estimates. Although the per-base error of 16S rDNA amplicon pyrosequencing has been shown to be as good as or lower than Sanger sequencing, no direct assessments of pyrosequencing errors on diversity estimates have been reported. Using only Escherichia coli MG1655 as a reference template, we find that 16S rDNA diversity is grossly overestimated unless relatively stringent read quality filtering and low clustering thresholds are applied. In particular, the common practice of removing reads with unresolved bases and anomalous read lengths is insufficient to ensure accurate estimates of microbial diversity. Furthermore, common and reproducible homopolymer length errors can result in relatively abundant spurious phylotypes further confounding data interpretation. We suggest that stringent quality-based trimming of 16S pyrotags and clustering thresholds no greater than 97\% identity should be used to avoid overestimates of the rare biosphere.},
author = {Kunin, Victor and Engelbrektson, Anna and Ochman, Howard and Hugenholtz, Philip},
doi = {10.1111/j.1462-2920.2009.02051.x},
file = {:Users/zenon/Documents/Mendeley/Kunin et al/2010/Environmental microbiology/Kunin et al.\_2010\_Wrinkles in the rare biosphere pyrosequencing errors can lead to artificial inflation of diversity estimates.pdf:pdf},
issn = {1462-2920},
journal = {Environmental microbiology},
keywords = {Biodiversity,Cluster Analysis,DNA, Bacterial,DNA, Bacterial: genetics,Escherichia coli,Escherichia coli: genetics,Genes, Bacterial,Genetic Variation,RNA, Ribosomal, 16S,RNA, Ribosomal, 16S: genetics,Sequence Alignment,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
month = jan,
number = {1},
pages = {118--23},
pmid = {19725865},
title = {{Wrinkles in the rare biosphere: pyrosequencing errors can lead to artificial inflation of diversity estimates.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/19725865},
volume = {12},
year = {2010}
}
31 changes: 31 additions & 0 deletions Recomb/introduction_RECOMB.tex
Original file line number Diff line number Diff line change
@@ -1,3 +1,34 @@
%!TEX root = main_RECOMB.tex
\section{Introduction}
\label{sec:introduction}

Ribonucleic acids (RNAs) are now an ubiquitous class of molecules, being
found in every living organisms and having a broad range of functions, from catalyzing
chemical reactions as the RNase P or the group II introns,
hybridizing messenger RNA to regulate gene expression,
to ribosomal RNA (rRNA) synthesizing proteins.
Those functions require specific structures,
encoded in their nucleotide sequence. Although the functions
need to be preserved through various organisms, and therefore
their structure must be similar, the sequences
can greatly differ from one organism to another.
For half a century, biological molecules have been studied as a proxy to understand
evolution~\cite{Zuckerkandl1965}, and with all their characteristics, rRNAs have
become a prime candidate for phylogenetic studies~\cite{Olsen1986, Olsen1993}.

In recent years, studies as the \emph{Human Microbiome Project}~\cite{Turnbaugh2007},
leveraging the NGS techniques to sequence as many new organisms
as possible, are producing a wealth of new information. Although
those techniques have a huge throughput, they yield a sequencing error rate of around
$4\%$~\cite{Huse2007}. This error can be highly reduced in genome projects when highly
redundant consensus
assemblies are available, but in studies of new or not well known organisms, there is not
enough similarity to differentiate between the sequencing errors and the natural
polymorphisms that we want to observe, often inflating the diversity estimates~\cite{Kunin2010}.
In rRNAs, we have as additional
information the conserved secondary structure, and we want to use it to identify
highly probable sequencing errors.

The first challenge is to efficiently explore the mutant space, which grows exponentially.
Leveraging the techniques in \texttt{RNAmutants}~\cite{Waldispuhl2008}, we develop
here
31 changes: 31 additions & 0 deletions Recomb/main_RECOMB.aux
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
\relax
\citation{Waldispuhl2008}
\citation{Lari1990}
\citation{Turner2010}
\citation{Stombaugh2009}
\citation{Stombaugh2009}
\citation{Turner2010}
\citation{Stombaugh2009}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}}
\newlabel{sec:introduction}{{1}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Methods}{2}}
\newlabel{sec:methods}{{2}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Definitions}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Energy Model}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Inside}{3}}
\newlabel{eq:Z_in}{{1}{3}}
\newlabel{eq:Z_rec}{{2}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Outside}{4}}
\newlabel{eq:Y_in}{{3}{4}}
\newlabel{eq:Y_rec}{{4}{4}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Inside-Outside}{5}}
\citation{mpmath}
\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{6}}
\newlabel{sec:results}{{4}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Implementation}{6}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{6}}
\newlabel{sec:conclusion}{{5}{6}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Acknowledgments}{6}}
\newlabel{sec:acknowledgments}{{6}{6}}
\bibstyle{plainnat}
\bibdata{RNApyro}
Loading

0 comments on commit ea5e7d8

Please sign in to comment.