Skip to content

Commit

Permalink
reverse_complement: Do line wrapping and reverse complement in one step
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrubeck committed Mar 4, 2017
1 parent 2b151d3 commit 84e26be
Showing 1 changed file with 78 additions and 71 deletions.
149 changes: 78 additions & 71 deletions src/reverse_complement.rs
Expand Up @@ -10,12 +10,11 @@ extern crate rayon;
extern crate memchr;

use std::io::{Read, Write};
use std::{io, ptr, slice};
use std::{cmp, io, mem};
use std::fs::File;

struct Tables {
table8: [u8;1 << 8],
table16: [u16;1 << 16]
}

impl Tables {
Expand All @@ -24,12 +23,7 @@ impl Tables {
for (i, v) in table8.iter_mut().enumerate() {
*v = Tables::computed_cpl8(i as u8);
}
let mut table16 = [0;1 << 16];
for (i, v) in table16.iter_mut().enumerate() {
*v = (table8[i & 255] as u16) << 8 |
table8[i >> 8] as u16;
}
Tables { table8: table8, table16: table16 }
Tables { table8: table8 }
}

fn computed_cpl8(c: u8) -> u8 {
Expand Down Expand Up @@ -58,85 +52,98 @@ impl Tables {
fn cpl8(&self, i: u8) -> u8 {
self.table8[i as usize]
}
}

/// Retrieves the complement for `i`.
fn cpl16(&self, i: u16) -> u16 {
self.table16[i as usize]
/// Utilities for splitting chunks off of mutable slices.
trait SplitOff {
fn split_off_left(&mut self, n: usize) -> Self;
fn split_off_right(&mut self, n: usize) -> Self;
}
impl<'a, T> SplitOff for &'a mut [T] {
fn split_off_left(&mut self, n: usize) -> Self {
let n = cmp::min(self.len(), n);
let data = mem::replace(self, &mut []);
let (left, data) = data.split_at_mut(n);
*self = data;
left
}
fn split_off_right(&mut self, n: usize) -> Self {
let len = self.len();
let n = cmp::min(len, n);
let data = mem::replace(self, &mut []);
let (data, right) = data.split_at_mut(len - n);
*self = data;
right
}
}

/// Length of a normal line without the terminating \n.
const LINE_LEN: usize = 60;
const SEQUENTIAL_SIZE: usize = 1024;
/// Length of a normal line including the terminating \n.
const LINE_LEN: usize = 61;
const SEQUENTIAL_SIZE: usize = 2048;

/// Compute the reverse complement with the sequence split into two equal-sized slices.
fn reverse_complement_left_right(left: &mut [u16], right: &mut [u16], tables: &Tables) {
/// Compute the reverse complement for two contiguous chunks without line breaks.
fn reverse_complement_chunk(left: &mut [u8], right: &mut [u8], tables: &Tables) {
for (x, y) in left.iter_mut().zip(right.iter_mut().rev()) {
let tmp = tables.cpl8(*x);
*x = tables.cpl8(*y);
*y = tmp;
}
}

/// Compute the reverse complement on chunks from opposite ends of the sequence.
///
/// `left` must start at the beginning of a line.
fn reverse_complement_left_right(mut left: &mut [u8], mut right: &mut [u8], trailing_len: usize, tables: &Tables) {
let len = left.len();
if len <= SEQUENTIAL_SIZE {
assert_eq!(right.len(), len);
for (left, right) in left.iter_mut().zip(right.iter_mut().rev()) {
let tmp = tables.cpl16(*left);
*left = tables.cpl16(*right);
*right = tmp;
while left.len() > 0 || right.len() > 0 {
// Process the chunk up to the newline in `right`.
let mut a = left.split_off_left(trailing_len);
let mut b = right.split_off_right(trailing_len);

// If there is an odd number of bytes, the extra one will be on the right.
if b.len() > a.len() {
let mid = b.split_off_left(1);
mid[0] = tables.cpl8(mid[0])
}
reverse_complement_chunk(a, b, tables);

// Skip the newline in `right`.
right.split_off_right(1);

// Process the chunk up to the newline in `left`.
let leading_len = LINE_LEN - 1 - trailing_len;
a = left.split_off_left(leading_len);
b = right.split_off_right(leading_len);

// If there is an odd number of bytes, the extra one will be on the left.
if a.len() > b.len() {
let mid = a.split_off_right(1);
mid[0] = tables.cpl8(mid[0])
}
reverse_complement_chunk(a, b, tables);

// Skip the newline in `left`.
left.split_off_left(1);
}
} else {
let (left1, left2) = left.split_at_mut((len + 1) / 2);
let (right2, right1) = right.split_at_mut(len / 2);
rayon::join(|| reverse_complement_left_right(left1, right1, tables),
|| reverse_complement_left_right(left2, right2, tables));
}
}
let line_count = (len + LINE_LEN - 1) / LINE_LEN;
let mid = line_count / 2 * LINE_LEN; // Split on a whole number of lines.

/// Split a byte slice into two u16-halves with any remainder left in the middle.
fn split_mut_middle_as_u16<'a>(seq: &'a mut [u8]) -> (&'a mut [u16], &'a mut [u8], &'a mut [u16]) {
let len = seq.len();
let div = len / 4;
let rem = len % 4;
unsafe {
let left_ptr = seq.as_mut_ptr();
// This is slow if len % 2 != 0 but still faster than bytewise operations.
let right_ptr = left_ptr.offset((div * 2 + rem) as isize);
(slice::from_raw_parts_mut(left_ptr as *mut u16, div),
slice::from_raw_parts_mut(left_ptr.offset((div * 2) as isize), rem),
slice::from_raw_parts_mut(right_ptr as *mut u16, div))
let left1 = left.split_off_left(mid);
let right1 = right.split_off_right(mid);
rayon::join(|| reverse_complement_left_right(left, right, trailing_len, tables),
|| reverse_complement_left_right(left1, right1, trailing_len, tables));
}
}

/// Compute the reverse complement.
fn reverse_complement(seq: &mut [u8], tables: &Tables) {
let len = seq.len() - 1;
let seq = &mut seq[..len];// Drop the last newline

// Move newlines so the reversed text is wrapped correctly.
let off = LINE_LEN - len % (LINE_LEN + 1);
let mut i = LINE_LEN;
while i < len {
unsafe {
ptr::copy(seq.as_ptr().offset((i - off) as isize),
seq.as_mut_ptr().offset((i - off + 1) as isize), off);
*seq.get_unchecked_mut(i - off) = b'\n';
}
i += LINE_LEN + 1;
}
let (left, middle, right) = split_mut_middle_as_u16(seq);
reverse_complement_left_right(left, right, tables);

match middle.len() {
0 => {}
1 => middle[0] = tables.cpl8(middle[0]),
2 => {
let tmp = tables.cpl8(middle[0]);
middle[0] = tables.cpl8(middle[1]);
middle[1] = tmp;
},
3 => {
middle[1] = tables.cpl8(middle[1]);
let tmp = tables.cpl8(middle[0]);
middle[0] = tables.cpl8(middle[2]);
middle[2] = tmp;
},
_ => unreachable!()
}
let seq = &mut seq[..len]; // Drop the last newline
let trailing_len = len % LINE_LEN;
let (left, right) = seq.split_at_mut(len / 2);
reverse_complement_left_right(left, right, trailing_len, tables);
}

fn file_size(f: &mut File) -> io::Result<usize> {
Expand Down

0 comments on commit 84e26be

Please sign in to comment.