From 05424c7051119f36cb8a1f84326072a66abddb1a Mon Sep 17 00:00:00 2001 From: Steve Klabnik Date: Tue, 24 Nov 2015 11:49:19 -0500 Subject: [PATCH] Improve docs for std::char Part of #29428 --- src/libcore/char.rs | 158 +++++++++++++++++++++++++++++++---- src/librustc_unicode/char.rs | 47 ++++++----- 2 files changed, 169 insertions(+), 36 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 525f9490b17df..5e13df432808f 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -63,19 +63,63 @@ const MAX_THREE_B: u32 = 0x10000; Cn Unassigned a reserved unassigned code point or a noncharacter */ -/// The highest valid code point +/// The highest valid code point a `char` can have. +/// +/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code +/// Point], but only ones within a certain range. `MAX` is the highest valid +/// code point that's a valid [Unicode Scalar Value]. +/// +/// [`char`]: primitive.char.html +/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value +/// [Code Point]: http://www.unicode.org/glossary/#code_point #[stable(feature = "rust1", since = "1.0.0")] pub const MAX: char = '\u{10ffff}'; -/// Converts a `u32` to an `Option`. +/// Converts a `u32` to a `char`. +/// +/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with +/// [`as`]: +/// +/// ``` +/// let c = '💯'; +/// let i = c as u32; +/// +/// assert_eq!(128175, i); +/// ``` +/// +/// However, the reverse is not true: not all valid [`u32`]s are valid +/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value +/// for a [`char`]. +/// +/// [`char`]: primitive.char.html +/// [`u32`]: primitive.u32.html +/// [`as`]: ../book/casting-between-types.html#as +/// +/// For an unsafe version of this function which ignores these checks, see +/// [`from_u32_unchecked()`]. +/// +/// [`from_u32_unchecked()`]: fn.from_u32_unchecked.html /// /// # Examples /// +/// Basic usage: +/// /// ``` /// use std::char; /// -/// assert_eq!(char::from_u32(0x2764), Some('❤')); -/// assert_eq!(char::from_u32(0x110000), None); // invalid character +/// let c = char::from_u32(0x2764); +/// +/// assert_eq!(Some('❤'), c); +/// ``` +/// +/// Returning `None` when the input is not a valid [`char`]: +/// +/// ``` +/// use std::char; +/// +/// let c = char::from_u32(0x110000); +/// +/// assert_eq!(None, c); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -88,33 +132,104 @@ pub fn from_u32(i: u32) -> Option { } } -/// Converts a `u32` to an `char`, not checking whether it is a valid unicode -/// codepoint. +/// Converts a `u32` to a `char`, ignoring validity. +/// +/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with +/// [`as`]: +/// +/// ``` +/// let c = '💯'; +/// let i = c as u32; +/// +/// assert_eq!(128175, i); +/// ``` +/// +/// However, the reverse is not true: not all valid [`u32`]s are valid +/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to +/// [`char`], possibly creating an invalid one. +/// +/// [`char`]: primitive.char.html +/// [`u32`]: primitive.u32.html +/// [`as`]: ../book/casting-between-types.html#as +/// +/// # Safety +/// +/// This function is unsafe, as it may construct invalid `char` values. +/// +/// For a safe version of this function, see the [`from_u32()`] function. +/// +/// [`from_u32()`]: fn.from_u32.html +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::char; +/// +/// let c = unsafe { char::from_u32_unchecked(0x2764) }; +/// +/// assert_eq!('❤', c); +/// ``` #[inline] #[stable(feature = "char_from_unchecked", since = "1.5.0")] pub unsafe fn from_u32_unchecked(i: u32) -> char { transmute(i) } -/// Converts a number to the character representing it. +/// Converts a digit in the given radix to a `char`. /// -/// # Return value +/// A 'radix' here is sometimes also called a 'base'. A radix of two +/// indicates a binary number, a radix of ten, decimal, and a radix of +/// sixteen, hexicdecimal, to give some common values. Arbitrary +/// radicum are supported. /// -/// Returns `Some(char)` if `num` represents one digit under `radix`, -/// using one character of `0-9` or `a-z`, or `None` if it doesn't. +/// `from_digit()` will return `None` if the input is not a digit in +/// the given radix. /// /// # Panics /// -/// Panics if given an `radix` > 36. +/// Panics if given a radix larger than 36. /// /// # Examples /// +/// Basic usage: +/// /// ``` /// use std::char; /// /// let c = char::from_digit(4, 10); /// -/// assert_eq!(c, Some('4')); +/// assert_eq!(Some('4'), c); +/// +/// // Decimal 11 is a single digit in base 16 +/// let c = char::from_digit(11, 16); +/// +/// assert_eq!(Some('b'), c); +/// ``` +/// +/// Returning `None` when the input is not a digit: +/// +/// ``` +/// use std::char; +/// +/// let c = char::from_digit(20, 10); +/// +/// assert_eq!(None, c); +/// ``` +/// +/// Passing a large radix, causing a panic: +/// +/// ``` +/// use std::thread; +/// use std::char; +/// +/// let result = thread::spawn(|| { +/// // this panics +/// let c = char::from_digit(1, 37); +/// }).join(); +/// +/// assert!(result.is_err()); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -287,8 +402,14 @@ pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option { } } -/// An iterator over the characters that represent a `char`, as escaped by -/// Rust's unicode escaping rules. +/// Returns an iterator that yields the hexadecimal Unicode escape of a +/// character, as `char`s. +/// +/// This `struct` is created by the [`escape_unicode()`] method on [`char`]. See +/// its documentation for more. +/// +/// [`escape_unicode()`]: primitive.char.html#method.escape_unicode +/// [`char`]: primitive.char.html #[derive(Clone)] #[stable(feature = "rust1", since = "1.0.0")] pub struct EscapeUnicode { @@ -362,8 +483,13 @@ impl Iterator for EscapeUnicode { } } -/// An iterator over the characters that represent a `char`, escaped -/// for maximum portability. +/// An iterator that yields the literal escape code of a `char`. +/// +/// This `struct` is created by the [`escape_default()`] method on [`char`]. See +/// its documentation for more. +/// +/// [`escape_default()`]: primitive.char.html#method.escape_default +/// [`char`]: primitive.char.html #[derive(Clone)] #[stable(feature = "rust1", since = "1.0.0")] pub struct EscapeDefault { diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs index d5a2de69c0271..3824dd0e4368c 100644 --- a/src/librustc_unicode/char.rs +++ b/src/librustc_unicode/char.rs @@ -8,24 +8,23 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Unicode scalar values +//! A character type. //! -//! This module provides the `CharExt` trait, as well as its -//! implementation for the primitive `char` type, in order to allow -//! basic character manipulation. +//! The `char` type represents a single character. More specifically, since +//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode +//! scalar value]', which is similar to, but not the same as, a '[Unicode code +//! point]'. //! -//! A `char` represents a -//! *[Unicode scalar -//! value](http://www.unicode.org/glossary/#unicode_scalar_value)*, as it can -//! contain any Unicode code point except high-surrogate and low-surrogate code -//! points. +//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value +//! [Unicode code point]: http://www.unicode.org/glossary/#code_point //! -//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\] -//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`; -//! however the converse is not always true due to the above range limits -//! and, as such, should be performed via the `from_u32` function. +//! This module exists for technical reasons, the primary documentation for +//! `char` is directly on [the `char` primitive type](../primitive.char.html) +//! itself. //! -//! *[See also the `char` primitive type](../primitive.char.html).* +//! This module is the home of the iterator implementations for the iterators +//! implemented on `char`, as well as some useful constants and conversion +//! functions that convert various types to `char`. #![stable(feature = "rust1", since = "1.0.0")] @@ -42,9 +41,13 @@ pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicod #[unstable(feature = "unicode", issue = "27783")] pub use tables::UNICODE_VERSION; -/// An iterator over the lowercase mapping of a given character, returned from -/// the [`to_lowercase` method](../primitive.char.html#method.to_lowercase) on -/// characters. +/// Returns an iterator that yields the lowercase equivalent of a `char`. +/// +/// This `struct` is created by the [`to_lowercase()`] method on [`char`]. See +/// its documentation for more. +/// +/// [`to_lowercase()`]: primitive.char.html#method.escape_to_lowercase +/// [`char`]: primitive.char.html #[stable(feature = "rust1", since = "1.0.0")] pub struct ToLowercase(CaseMappingIter); @@ -56,9 +59,13 @@ impl Iterator for ToLowercase { } } -/// An iterator over the uppercase mapping of a given character, returned from -/// the [`to_uppercase` method](../primitive.char.html#method.to_uppercase) on -/// characters. +/// Returns an iterator that yields the uppercase equivalent of a `char`. +/// +/// This `struct` is created by the [`to_uppercase()`] method on [`char`]. See +/// its documentation for more. +/// +/// [`to_uppercase()`]: primitive.char.html#method.escape_to_uppercase +/// [`char`]: primitive.char.html #[stable(feature = "rust1", since = "1.0.0")] pub struct ToUppercase(CaseMappingIter);