From 341805288e8a055162bef64055a7962ecffbf103 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Fri, 4 Nov 2016 00:20:11 +0100 Subject: [PATCH] Move small-copy optimization into copy_from_slice Ultimately copy_from_slice is being a bottleneck, not io::Cursor::read. It might be worthwhile to move the check here, so more places can benefit from it. --- src/libcore/slice.rs | 16 +++++++++++++--- src/libstd/io/cursor.rs | 18 +++--------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/libcore/slice.rs b/src/libcore/slice.rs index a4a90e7a9da7a..b238623eabaa7 100644 --- a/src/libcore/slice.rs +++ b/src/libcore/slice.rs @@ -515,9 +515,19 @@ impl SliceExt for [T] { fn copy_from_slice(&mut self, src: &[T]) where T: Copy { assert!(self.len() == src.len(), "destination and source slices have different lengths"); - unsafe { - ptr::copy_nonoverlapping( - src.as_ptr(), self.as_mut_ptr(), self.len()); + // First check if the amount of elements we want to copy is small: + // `copy_nonoverlapping` will do a memcopy, which involves an indirect + // function call when `memcpy` is in the dynamically-linked libc. For + // small elements (such as a single byte or pointer), the overhead is + // significant. If the element is big then the assignment is a memcopy + // anyway. + if self.len() == 1 { + self[0] = src[0]; + } else { + unsafe { + ptr::copy_nonoverlapping( + src.as_ptr(), self.as_mut_ptr(), self.len()); + } } } diff --git a/src/libstd/io/cursor.rs b/src/libstd/io/cursor.rs index 9b50168a954b7..1b5023380a783 100644 --- a/src/libstd/io/cursor.rs +++ b/src/libstd/io/cursor.rs @@ -219,21 +219,9 @@ impl io::Seek for Cursor where T: AsRef<[u8]> { #[stable(feature = "rust1", since = "1.0.0")] impl Read for Cursor where T: AsRef<[u8]> { fn read(&mut self, buf: &mut [u8]) -> io::Result { - // First check if the amount of bytes we want to read is small: the read - // in the else branch will end up calling `<&[u8] as Read>::read()`, - // which will copy the buffer using a memcopy. If we only want to read a - // single byte, then the overhead of the function call is significant. - let num_read = { - let mut inner_buf = self.fill_buf()?; - if buf.len() == 1 && inner_buf.len() > 0 { - buf[0] = inner_buf[0]; - 1 - } else { - Read::read(&mut inner_buf, buf)? - } - }; - self.pos += num_read as u64; - Ok(num_read) + let n = Read::read(&mut self.fill_buf()?, buf)?; + self.pos += n as u64; + Ok(n) } }