Skip to content

Commit

Permalink
reverse_complement: Simplify lookup table code
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrubeck committed Mar 4, 2017
1 parent df104f0 commit 0ce7d17
Showing 1 changed file with 27 additions and 43 deletions.
70 changes: 27 additions & 43 deletions src/reverse_complement.rs
Expand Up @@ -10,24 +10,15 @@ extern crate rayon;
extern crate memchr;

use std::io::{Read, Write};
use std::{cmp, io, mem};
use std::{cmp, io};
use std::fs::File;
use std::mem::replace;

struct Tables {
table8: [u8;1 << 8],
}

impl Tables {
fn new() -> Tables {
let mut table8 = [0;1 << 8];
for (i, v) in table8.iter_mut().enumerate() {
*v = Tables::computed_cpl8(i as u8);
}
Tables { table8: table8 }
}

fn computed_cpl8(c: u8) -> u8 {
match c {
/// Returns a lookup table to find the complement of a FASTA nucleic acid code.
fn build_table() -> [u8; 256] {
let mut table = [0; 256];
for (i, x) in table.iter_mut().enumerate() {
*x = match i as u8 {
b'A' | b'a' => b'T',
b'C' | b'c' => b'G',
b'G' | b'g' => b'C',
Expand All @@ -45,13 +36,9 @@ impl Tables {
b'B' | b'b' => b'V',
b'N' | b'n' => b'N',
i => i,
}
}

/// Retrieves the complement for `i`.
fn cpl8(&self, i: u8) -> u8 {
self.table8[i as usize]
};
}
table
}

/// Utilities for splitting chunks off of mutable slices.
Expand All @@ -63,7 +50,7 @@ impl<'a, T> SplitOff for &'a mut [T] {
/// Split the left `n` items from self and return them as a separate slice.
fn split_off_left(&mut self, n: usize) -> Self {
let n = cmp::min(self.len(), n);
let data = mem::replace(self, &mut []);
let data = replace(self, &mut []);
let (left, data) = data.split_at_mut(n);
*self = data;
left
Expand All @@ -72,7 +59,7 @@ impl<'a, T> SplitOff for &'a mut [T] {
fn split_off_right(&mut self, n: usize) -> Self {
let len = self.len();
let n = cmp::min(len, n);
let data = mem::replace(self, &mut []);
let data = replace(self, &mut []);
let (data, right) = data.split_at_mut(len - n);
*self = data;
right
Expand All @@ -84,18 +71,16 @@ const LINE_LEN: usize = 61;
const SEQUENTIAL_SIZE: usize = 2048;

/// Compute the reverse complement for two contiguous chunks without line breaks.
fn reverse_complement_chunk(left: &mut [u8], right: &mut [u8], tables: &Tables) {
fn reverse_complement_chunk(left: &mut [u8], right: &mut [u8], table: &[u8; 256]) {
for (x, y) in left.iter_mut().zip(right.iter_mut().rev()) {
let tmp = tables.cpl8(*x);
*x = tables.cpl8(*y);
*y = tmp;
*y = table[replace(x, table[*y as usize]) as usize];
}
}

/// Compute the reverse complement on chunks from opposite ends of the sequence.
///
/// `left` must start at the beginning of a line.
fn reverse_complement_left_right(mut left: &mut [u8], mut right: &mut [u8], trailing_len: usize, tables: &Tables) {
fn reverse_complement_left_right(mut left: &mut [u8], mut right: &mut [u8], trailing_len: usize, table: &[u8; 256]) {
let len = left.len();
if len <= SEQUENTIAL_SIZE {
while left.len() > 0 || right.len() > 0 {
Expand All @@ -106,9 +91,9 @@ fn reverse_complement_left_right(mut left: &mut [u8], mut right: &mut [u8], trai
// If there is an odd number of bytes, the extra one will be on the right.
if b.len() > a.len() {
let mid = b.split_off_left(1);
mid[0] = tables.cpl8(mid[0])
mid[0] = table[mid[0] as usize];
}
reverse_complement_chunk(a, b, tables);
reverse_complement_chunk(a, b, table);

// Skip the newline in `right`.
right.split_off_right(1);
Expand All @@ -121,9 +106,9 @@ fn reverse_complement_left_right(mut left: &mut [u8], mut right: &mut [u8], trai
// If there is an odd number of bytes, the extra one will be on the left.
if a.len() > b.len() {
let mid = a.split_off_right(1);
mid[0] = tables.cpl8(mid[0])
mid[0] = table[mid[0] as usize]
}
reverse_complement_chunk(a, b, tables);
reverse_complement_chunk(a, b, table);

// Skip the newline in `left`.
left.split_off_left(1);
Expand All @@ -134,25 +119,25 @@ fn reverse_complement_left_right(mut left: &mut [u8], mut right: &mut [u8], trai

let left1 = left.split_off_left(mid);
let right1 = right.split_off_right(mid);
rayon::join(|| reverse_complement_left_right(left, right, trailing_len, tables),
|| reverse_complement_left_right(left1, right1, trailing_len, tables));
rayon::join(|| reverse_complement_left_right(left, right, trailing_len, table),
|| reverse_complement_left_right(left1, right1, trailing_len, table));
}
}

/// Compute the reverse complement.
fn reverse_complement(seq: &mut [u8], tables: &Tables) {
fn reverse_complement(seq: &mut [u8], table: &[u8; 256]) {
let len = seq.len() - 1;
let seq = &mut seq[..len]; // Drop the last newline
let trailing_len = len % LINE_LEN;
let (left, right) = seq.split_at_mut(len / 2);
reverse_complement_left_right(left, right, trailing_len, tables);
reverse_complement_left_right(left, right, trailing_len, table);
}

fn file_size(f: &mut File) -> io::Result<usize> {
Ok(f.metadata()?.len() as usize)
}

fn split_and_reverse<'a>(data: &mut [u8], tables: &Tables) {
fn split_and_reverse<'a>(data: &mut [u8], table: &[u8; 256]) {
let data = match memchr::memchr(b'\n', data) {
Some(i) => &mut data[i + 1..],
None => return,
Expand All @@ -161,10 +146,10 @@ fn split_and_reverse<'a>(data: &mut [u8], tables: &Tables) {
match memchr::memchr(b'>', data) {
Some(i) => {
let (head, tail) = data.split_at_mut(i);
rayon::join(|| reverse_complement(head, tables),
|| split_and_reverse(tail, tables));
rayon::join(|| reverse_complement(head, table),
|| split_and_reverse(tail, table));
}
None => reverse_complement(data, tables),
None => reverse_complement(data, table),
};
}

Expand All @@ -173,9 +158,8 @@ fn main() {
let size = file_size(&mut stdin).unwrap_or(1024 * 1024);
let mut data = Vec::with_capacity(size + 1);
stdin.read_to_end(&mut data).unwrap();
let tables = &Tables::new();

split_and_reverse(&mut data, tables);
split_and_reverse(&mut data, &build_table());
let stdout = io::stdout();
stdout.lock().write_all(&data).unwrap();
}

0 comments on commit 0ce7d17

Please sign in to comment.