From 05424c7051119f36cb8a1f84326072a66abddb1a Mon Sep 17 00:00:00 2001
From: Steve Klabnik <steve@steveklabnik.com>
Date: Tue, 24 Nov 2015 11:49:19 -0500
Subject: [PATCH] Improve docs for std::char

Part of #29428
---
 src/libcore/char.rs          | 158 +++++++++++++++++++++++++++++++----
 src/librustc_unicode/char.rs |  47 ++++++-----
 2 files changed, 169 insertions(+), 36 deletions(-)
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index 525f9490b17df..5e13df432808f 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -63,19 +63,63 @@ const MAX_THREE_B: u32 =  0x10000;
     Cn  Unassigned              a reserved unassigned code point or a noncharacter
 */
 
-/// The highest valid code point
+/// The highest valid code point a `char` can have.
+///
+/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
+/// Point], but only ones within a certain range. `MAX` is the highest valid
+/// code point that's a valid [Unicode Scalar Value].
+///
+/// [`char`]: primitive.char.html
+/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
+/// [Code Point]: http://www.unicode.org/glossary/#code_point
 #[stable(feature = "rust1", since = "1.0.0")]
 pub const MAX: char = '\u{10ffff}';
 
-/// Converts a `u32` to an `Option<char>`.
+/// Converts a `u32` to a `char`.
+///
+/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with
+/// [`as`]:
+///
+/// ```
+/// let c = '💯';
+/// let i = c as u32;
+///
+/// assert_eq!(128175, i);
+/// ```
+///
+/// However, the reverse is not true: not all valid [`u32`]s are valid
+/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
+/// for a [`char`].
+///
+/// [`char`]: primitive.char.html
+/// [`u32`]: primitive.u32.html
+/// [`as`]: ../book/casting-between-types.html#as
+///
+/// For an unsafe version of this function which ignores these checks, see
+/// [`from_u32_unchecked()`].
+///
+/// [`from_u32_unchecked()`]: fn.from_u32_unchecked.html
 ///
 /// # Examples
 ///
+/// Basic usage:
+///
 /// ```
 /// use std::char;
 ///
-/// assert_eq!(char::from_u32(0x2764), Some('❤'));
-/// assert_eq!(char::from_u32(0x110000), None); // invalid character
+/// let c = char::from_u32(0x2764);
+///
+/// assert_eq!(Some('❤'), c);
+/// ```
+///
+/// Returning `None` when the input is not a valid [`char`]:
+///
+/// ```
+/// use std::char;
+///
+/// let c = char::from_u32(0x110000);
+///
+/// assert_eq!(None, c);
 /// ```
 #[inline]
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -88,33 +132,104 @@ pub fn from_u32(i: u32) -> Option<char> {
     }
 }
 
-/// Converts a `u32` to an `char`, not checking whether it is a valid unicode
-/// codepoint.
+/// Converts a `u32` to a `char`, ignoring validity.
+///
+/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with
+/// [`as`]:
+///
+/// ```
+/// let c = '💯';
+/// let i = c as u32;
+///
+/// assert_eq!(128175, i);
+/// ```
+///
+/// However, the reverse is not true: not all valid [`u32`]s are valid
+/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
+/// [`char`], possibly creating an invalid one.
+///
+/// [`char`]: primitive.char.html
+/// [`u32`]: primitive.u32.html
+/// [`as`]: ../book/casting-between-types.html#as
+///
+/// # Safety
+///
+/// This function is unsafe, as it may construct invalid `char` values.
+///
+/// For a safe version of this function, see the [`from_u32()`] function.
+///
+/// [`from_u32()`]: fn.from_u32.html
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use std::char;
+///
+/// let c = unsafe { char::from_u32_unchecked(0x2764) };
+///
+/// assert_eq!('❤', c);
+/// ```
 #[inline]
 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
 pub unsafe fn from_u32_unchecked(i: u32) -> char {
     transmute(i)
 }
 
-/// Converts a number to the character representing it.
+/// Converts a digit in the given radix to a `char`.
 ///
-/// # Return value
+/// A 'radix' here is sometimes also called a 'base'. A radix of two
+/// indicates a binary number, a radix of ten, decimal, and a radix of
+/// sixteen, hexicdecimal, to give some common values. Arbitrary
+/// radicum are supported.
 ///
-/// Returns `Some(char)` if `num` represents one digit under `radix`,
-/// using one character of `0-9` or `a-z`, or `None` if it doesn't.
+/// `from_digit()` will return `None` if the input is not a digit in
+/// the given radix.
 ///
 /// # Panics
 ///
-/// Panics if given an `radix` > 36.
+/// Panics if given a radix larger than 36.
 ///
 /// # Examples
 ///
+/// Basic usage:
+///
 /// ```
 /// use std::char;
 ///
 /// let c = char::from_digit(4, 10);
 ///
-/// assert_eq!(c, Some('4'));
+/// assert_eq!(Some('4'), c);
+///
+/// // Decimal 11 is a single digit in base 16
+/// let c = char::from_digit(11, 16);
+///
+/// assert_eq!(Some('b'), c);
+/// ```
+///
+/// Returning `None` when the input is not a digit:
+///
+/// ```
+/// use std::char;
+///
+/// let c = char::from_digit(20, 10);
+///
+/// assert_eq!(None, c);
+/// ```
+///
+/// Passing a large radix, causing a panic:
+///
+/// ```
+/// use std::thread;
+/// use std::char;
+///
+/// let result = thread::spawn(|| {
+///     // this panics
+///     let c = char::from_digit(1, 37);
+/// }).join();
+///
+/// assert!(result.is_err());
 /// ```
 #[inline]
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -287,8 +402,14 @@ pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<usize> {
     }
 }
 
-/// An iterator over the characters that represent a `char`, as escaped by
-/// Rust's unicode escaping rules.
+/// Returns an iterator that yields the hexadecimal Unicode escape of a
+/// character, as `char`s.
+///
+/// This `struct` is created by the [`escape_unicode()`] method on [`char`]. See
+/// its documentation for more.
+///
+/// [`escape_unicode()`]: primitive.char.html#method.escape_unicode
+/// [`char`]: primitive.char.html
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct EscapeUnicode {
@@ -362,8 +483,13 @@ impl Iterator for EscapeUnicode {
     }
 }
 
-/// An iterator over the characters that represent a `char`, escaped
-/// for maximum portability.
+/// An iterator that yields the literal escape code of a `char`.
+///
+/// This `struct` is created by the [`escape_default()`] method on [`char`]. See
+/// its documentation for more.
+///
+/// [`escape_default()`]: primitive.char.html#method.escape_default
+/// [`char`]: primitive.char.html
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct EscapeDefault {
diff --git a/src/librustc_unicode/char.rs b/src/librustc_unicode/char.rs
index d5a2de69c0271..3824dd0e4368c 100644
--- a/src/librustc_unicode/char.rs
+++ b/src/librustc_unicode/char.rs
@@ -8,24 +8,23 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-//! Unicode scalar values
+//! A character type.
 //!
-//! This module provides the `CharExt` trait, as well as its
-//! implementation for the primitive `char` type, in order to allow
-//! basic character manipulation.
+//! The `char` type represents a single character. More specifically, since
+//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
+//! scalar value]', which is similar to, but not the same as, a '[Unicode code
+//! point]'.
 //!
-//! A `char` represents a
-//! *[Unicode scalar
-//! value](http://www.unicode.org/glossary/#unicode_scalar_value)*, as it can
-//! contain any Unicode code point except high-surrogate and low-surrogate code
-//! points.
+//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
+//! [Unicode code point]: http://www.unicode.org/glossary/#code_point
 //!
-//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
-//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
-//! however the converse is not always true due to the above range limits
-//! and, as such, should be performed via the `from_u32` function.
+//! This module exists for technical reasons, the primary documentation for
+//! `char` is directly on [the `char` primitive type](../primitive.char.html)
+//! itself.
 //!
-//! *[See also the `char` primitive type](../primitive.char.html).*
+//! This module is the home of the iterator implementations for the iterators
+//! implemented on `char`, as well as some useful constants and conversion
+//! functions that convert various types to `char`.
 
 #![stable(feature = "rust1", since = "1.0.0")]
 
@@ -42,9 +41,13 @@ pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicod
 #[unstable(feature = "unicode", issue = "27783")]
 pub use tables::UNICODE_VERSION;
 
-/// An iterator over the lowercase mapping of a given character, returned from
-/// the [`to_lowercase` method](../primitive.char.html#method.to_lowercase) on
-/// characters.
+/// Returns an iterator that yields the lowercase equivalent of a `char`.
+///
+/// This `struct` is created by the [`to_lowercase()`] method on [`char`]. See
+/// its documentation for more.
+///
+/// [`to_lowercase()`]: primitive.char.html#method.escape_to_lowercase
+/// [`char`]: primitive.char.html
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct ToLowercase(CaseMappingIter);
 
@@ -56,9 +59,13 @@ impl Iterator for ToLowercase {
     }
 }
 
-/// An iterator over the uppercase mapping of a given character, returned from
-/// the [`to_uppercase` method](../primitive.char.html#method.to_uppercase) on
-/// characters.
+/// Returns an iterator that yields the uppercase equivalent of a `char`.
+///
+/// This `struct` is created by the [`to_uppercase()`] method on [`char`]. See
+/// its documentation for more.
+///
+/// [`to_uppercase()`]: primitive.char.html#method.escape_to_uppercase
+/// [`char`]: primitive.char.html
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct ToUppercase(CaseMappingIter);