# Fluent Python by Luciano Ramalho

## Part I: Data Structures

In [None]:
# Fluent Python: Magic Methods (Dunder) & FrenchDeck Example
import collections
from random import choice

Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits 
                      for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

# Demo of FrenchDeck with magic methods
deck = FrenchDeck()

print(f"Deck length: {len(deck)}")  # 52
print(f"First card: {deck[0]}")     # Card(rank='2', suit='spades')
print(f"Last card: {deck[-1]}")     # Card(rank='A', suit='hearts')
print(f"Random card: {choice(deck)}")  # Random card from deck

print("\nTop 3 cards:")
print(deck[:3])  # [Card(rank='2', suit='spades'), ...]
print("\nAces (every 13th card):")
print(deck[12::13])  # All aces

print("\nIterate deck (first 5 cards):")
for card in deck[:5]:
    print(card)

print("\nReverse iteration (last 5 cards):")
for card in reversed(deck[-5:]):
    print(card)

print(f"\n'Q of hearts' in deck: {'Q' in deck and 'hearts' in deck}")  # True/False

# Custom sorting: Spades > Hearts > Diamonds > Clubs
suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)
def spades_high(card):
    rank_value = FrenchDeck.ranks.index(card.rank)
    return rank_value * len(suit_values) + suit_values[card.suit]

sorted_deck = sorted(deck, key=spades_high)
print("\nSorted deck (lowest to highest):")
print(f"Lowest: {sorted_deck[0]}")   # Card(rank='2', suit='clubs')
print(f"Highest: {sorted_deck[-1]}")  # Card(rank='A', suit='spades')

"""
KEY TAKEAWAYS:
1. **Dunder Methods**: `__len__` and `__getitem__` enable Pythonic behavior (e.g., `len()`, `[]`).
2. **Zero Boilerplate**: By implementing two methods, the class works with `random.choice`, slicing, iteration, and `in`.
3. **Delegation**: `__getitem__` delegates to the list's `__getitem__`, enabling slicing and reverse iteration.
4. **Custom Sorting**: Combine `sorted()` with a key function to define ranking logic (e.g., `spades_high`).
5. **Pythonic Design**: Follows the Data Model to integrate with built-in functions, reducing the need for custom methods.
"""

In [None]:
# Fluent Python: Dunder Methods & Data Model Examples
import collections
import math
from random import choice

# === FrenchDeck Example: Emulating Built-in Sequences ===
Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits 
                      for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

# === Vector Example: Emulating Numeric Types ===
class Vector:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return f'Vector({self.x!r}, {self.y!r})'
    
    def __abs__(self):
        return math.hypot(self.x, self.y)
    
    def __bool__(self):
        return bool(abs(self))  # Returns False if magnitude is 0
    
    def __add__(self, other):
        return Vector(self.x + other.x, self.y + other.y)
    
    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)

# === Demo Code ===
deck = FrenchDeck()
v1 = Vector(2, 4)
v2 = Vector(2, 1)

# FrenchDeck Features
print("FrenchDeck Demo:")
print(f"Deck length: {len(deck)}")  # 52
print(f"First card: {deck[0]}")     # Card(rank='2', suit='spades')
print(f"Random card: {choice(deck)}")  # Random card
print(f"Top 3 cards: {deck[:3]}")   # First three cards

# Vector Features
print("\nVector Demo:")
print(f"v1 + v2 = {v1 + v2}")       # Vector(4, 5)
print(f"abs(v1) = {abs(v1)}")       # 4.472...
print(f"v * 3 = {v1 * 3}")          # Vector(6, 12)
print(f"bool(Vector(0,0)) = {bool(Vector(0, 0))}")  # False

"""
KEY TAKEAWAYS:
1. **Dunder Methods Power**: Implementing `__len__` and `__getitem__` makes your class behave like built-in sequences (supports len(), indexing, slicing, iteration).
2. **Composition Over Inheritance**: FrenchDeck delegates to a list (`self._cards`) rather than inheriting from list.
3. **Operator Overloading**: Use `__add__`, `__mul__`, etc., to define custom behavior for operators (+, *, etc.).
4. **String Representation**:
   - `__repr__`: Unambiguous, for debugging/reconstruction (e.g., `Vector(2, 4)`).
   - `__str__`: Human-readable, used by `print()`. Prefer `__repr__` if only one is implemented.
5. **Truth Value Testing**: Define `__bool__` to control truthiness (e.g., `if Vector(0, 0): ...` returns False).
6. **Performance Note**: For simple truth checks, `bool(self.x or self.y)` is faster than `abs()` but less readable.
7. **Data Model Compliance**: Follows Python's data model to integrate with core features (e.g., `len()`, `in`, `sorted()`).
"""

In [None]:
# Fluent Python: Dunder Methods & Data Model Examples
import collections
import math
from random import choice

# === FrenchDeck Example: Emulating Built-in Sequences ===
Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits 
                      for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

# === Vector Example: Emulating Numeric Types ===
class Vector:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return f'Vector({self.x!r}, {self.y!r})'
    
    def __abs__(self):
        return math.hypot(self.x, self.y)
    
    def __bool__(self):
        return bool(abs(self))  # Returns False if magnitude is 0
    
    def __add__(self, other):
        return Vector(self.x + other.x, self.y + other.y)
    
    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)

# === Demo Code ===
deck = FrenchDeck()
v1 = Vector(2, 4)
v2 = Vector(2, 1)

# FrenchDeck Features
print("FrenchDeck Demo:")
print(f"Deck length: {len(deck)}")  # 52
print(f"First card: {deck[0]}")     # Card(rank='2', suit='spades')
print(f"Random card: {choice(deck)}")  # Random card
print(f"Top 3 cards: {deck[:3]}")   # First three cards

# Vector Features
print("\nVector Demo:")
print(f"v1 + v2 = {v1 + v2}")       # Vector(4, 5)
print(f"abs(v1) = {abs(v1)}")       # 4.472...
print(f"v * 3 = {v1 * 3}")          # Vector(6, 12)
print(f"bool(Vector(0,0)) = {bool(Vector(0, 0))}")  # False

"""
KEY TAKEAWAYS:
1. **Dunder Methods Power**: Implementing `__len__` and `__getitem__` makes your class behave like built-in sequences (supports len(), indexing, slicing, iteration).
2. **Composition Over Inheritance**: FrenchDeck delegates to a list (`self._cards`) rather than inheriting from list.
3. **Operator Overloading**: Use `__add__`, `__mul__`, etc., to define custom behavior for operators (+, *, etc.).
4. **String Representation**:
   - `__repr__`: Unambiguous, for debugging/reconstruction (e.g., `Vector(2, 4)`).
   - `__str__`: Human-readable, used by `print()`. Prefer `__repr__` if only one is implemented.
5. **Truth Value Testing**: Define `__bool__` to control truthiness (e.g., `if Vector(0, 0): ...` returns False).
6. **Performance Note**: For simple truth checks, `bool(self.x or self.y)` is faster than `abs()` but less readable.
7. **Data Model Compliance**: Follows Python's data model to integrate with core features (e.g., `len()`, `in`, `sorted()`).
"""

### CHAPTER 2: An Array of Sequences

In [None]:
# Fluent Python: Built-in Sequences & Advanced Concepts
import array
from collections import deque
import sys

# 1. List Comprehensions vs Generator Expressions
squares_list = [x**2 for x in range(5)]  # Creates list immediately
squares_gen = (x**2 for x in range(5))  # Lazy evaluation generator

# 2. Tuples: Records vs Immutable Lists
point = (10, 20)  # Record (fixed-size, heterogeneous)
coordinates = (1, 2, 3)  # Immutable list (fixed-size, homogeneous)

# 3. Sequence Unpacking & Patterns (Python 3.10+)
def sequence_match(seq):
    match seq:
        case [x, y]: return f"Two elements: {x}, {y}"
        case [x, y, z]: return f"Three elements: {x}, {y}, {z}"
        case _: return "Unknown pattern"

# 4. Slicing: Read & Write
numbers = [1, 2, 3, 4, 5]
numbers[1:3] = [20, 30]  # Replace slice

# 5. Specialized Sequences
float_array = array.array('d', [1.1, 2.2, 3.3])  # Flat sequence (compact storage)
queue = deque([1, 2, 3])  # Efficient appends/pops from both ends

# 6. Performance Comparison
list_size = sys.getsizeof([1, 2, 3])
tuple_size = sys.getsizeof((1, 2, 3))
array_size = sys.getsizeof(float_array)

# Demo Execution
print("List Comprehension:", squares_list)
print("Generator Expression (converted to list):", list(squares_gen))
print("\nTuple as Record:", point)
print("Tuple as Immutable List:", coordinates)
print("\nSequence Matching Examples:")
print(sequence_match([1, 2]))
print(sequence_match([1, 2, 3]))
print("\nAfter Slice Assignment:", numbers)
print("\nSpecialized Sequences:")
print("Float Array:", float_array)
print("Deque (FIFO-ready):", queue)
print("\nMemory Comparison (bytes):")
print(f"List: {list_size}, Tuple: {tuple_size}, Array: {array_size}")

"""
KEY TAKEAWAYS:
1. **List Comprehensions vs Generators**:
   - Use `[]` for immediate computation, `()` for lazy evaluation.
   - Generators save memory for large datasets.

2. **Tuples**:
   - Immutable but faster than lists for fixed data.
   - Use as records (namedtuples recommended for clarity) or for hashable keys.

3. **Sequence Unpacking**:
   - `*` captures variable elements: `head, *rest = [1,2,3,4]`
   - Python 3.10+ pattern matching enables expressive sequence analysis.

4. **Slicing**:
   - Lists support slice assignment: `lst[1:3] = [new_values]`
   - Tuples are immutable (no slice assignment).

5. **Specialized Sequences**:
   - `array.array`: Compact storage for homogeneous data (flat vs container).
   - `collections.deque`: Thread-safe, fast O(1) appends/pops from both ends.

6. **Performance**:
   - Tuples are ~40% smaller than lists for small data.
   - Flat sequences (arrays) use ~1/5th the memory of list/tuple equivalents.
"""

In [None]:
# Fluent Python: List Comprehensions & Generator Expressions
import sys

# 1. Basic List Comprehension
symbols = '$¢£¥€¤'
codes = [ord(symbol) for symbol in symbols]  # List comprehension
print("List Comprehension Output:", codes)

# 2. Generator Expression (Lazy Evaluation)
gen_codes = (ord(symbol) for symbol in symbols)  # Generator expression
print("Generator Expression Output (converted to list):", list(gen_codes))

# 3. Filtering with List Comprehension
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]
print("\nFiltered Unicode Codes (>127):", beyond_ascii)

# 4. Equivalent with map/filter (Less Readable)
beyond_ascii_map = list(filter(lambda c: c > 127, map(ord, symbols)))
print("Filtered with map/filter:", beyond_ascii_map)

# 5. Cartesian Product with List Comprehension
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
cartesian_product = [(color, size) for color in colors for size in sizes]
print("\nCartesian Product:", cartesian_product)

# 6. Scope in Comprehensions (Python 3)
x = 'ABC'
codes = [ord(x) for x in x]  # Shadows outer x inside comprehension
print(f"\nOuter x after comprehension: '{x}'")  # Original value preserved
print("Comprehension result:", codes)

# 7. Walrus Operator in Generator Expression (Python 3.8+)
last = None
codes = [last := ord(c) for c in x]  # Assigns and captures 'last'
print("\nValue of 'last' after walrus operator:", last)

# 8. Memory Comparison
list_size = sys.getsizeof(codes)
gen_size = sys.getsizeof((ord(s) for s in symbols))
print(f"\nMemory Usage (bytes):")
print(f"List: {list_size}, Generator: {gen_size}")

"""
KEY TAKEAWAYS:
1. **Readability**:
   - List comprehensions (`[x for x in ...]`) are explicit and self-documenting for list creation.
   - Avoid overusing nested listcomps; prefer plain loops for complex logic.

2. **Performance**:
   - Listcomps are often faster than `map`/`filter` for Python code (C-level optimizations help).
   - Generators (`(x for x in ...)`) save memory by producing items lazily.

3. **Scope**:
   - Variables in comprehensions are local to the expression (outer variables preserved).
   - Walrus operator (`:=`) allows capturing values post-comprehension.

4. **Use Cases**:
   - Use listcomps for new list creation.
   - Use generator expressions for streaming/iterating without full list allocation.
   - Cartesian products and filtering are natural fits for listcomps.

5. **Memory Efficiency**:
   - Generators reduce memory overhead for large datasets.
   - Listcomps materialize the entire sequence upfront.
"""

In [None]:
# Fluent Python: List Comprehensions, Generators, and Tuples
import array
from collections import deque
import sys

# 1. List Comprehensions vs Generator Expressions
symbols = '$¢£¥€¤'
codes_list = [ord(symbol) for symbol in symbols]  # List comprehension
codes_gen = (ord(symbol) for symbol in symbols)    # Generator expression

# 2. Cartesian Product with List Comprehension
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]  # By color then size
tshirts_by_size = [(size, color) for size in sizes for color in colors]  # By size then color

# 3. Generator Expression for Memory Efficiency
float_array = array.array('d', (ord(s) for s in symbols))  # Generator feeds array

# 4. Tuples as Records (Immutable Data Structures)
lax_coordinates = (33.9425, -118.408056)  # Latitude/longitude tuple
tokyo_data = ('Tokyo', 2003, 32_450, 0.66, 8014)  # City, year, population, change, area

# 5. Tuples as Immutable Lists (with Caveats)
a = (10, 'alpha', [1, 2])  # Tuple with mutable list
b = (10, 'alpha', [1, 2])  # Initially equal to a
b[-1].append(99)           # Modify mutable element in b
are_equal = a == b         # Now a and b differ

# 6. Hashability Check (Fixed vs Mutable Tuples)
def is_hashable(obj):
    try: hash(obj)
    except TypeError: return False
    return True

tf = (10, 'alpha', (1, 2))  # Fully immutable
tm = (10, 'alpha', [1, 2])  # Contains mutable list

# Demo Execution
print("List Comprehension Output:", codes_list)
print("Generator Expression (converted to list):", list(codes_gen))
print("\nCartesian Product (color, size):", tshirts)
print("Cartesian Product (size, color):", tshirts_by_size)
print("\nFloat Array from Generator:", float_array)
print("\nTuple as Record - Tokyo Data:", tokyo_data)
print("Tuple with Mutable Element (a vs b):", are_equal)
print("\nHashability Check:")
print(f"Fully immutable tuple: {is_hashable(tf)}")
print(f"Tuple with list: {is_hashable(tm)}")

"""
KEY TAKEAWAYS:
1. **List Comprehensions vs Generators**:
   - Use `[]` for immediate list creation, `()` for lazy evaluation.
   - Generators save memory for large datasets (e.g., `array.array('d', (ord(s)...)`).

2. **Cartesian Products**:
   - Listcomps generate full product lists: `[(color, size) for...]`.
   - Generator expressions avoid memory overhead when iterating directly.

3. **Tuples**:
   - **As Records**: Fixed-size, ordered fields (e.g., coordinates, city data).
   - **As Immutable Lists**: Memory-efficient, hashable (if all elements are immutable).
   - **Caveat**: Tuples with mutable elements (e.g., lists) can change indirectly.

4. **Performance**:
   - Tuples use ~40% less memory than lists for small data.
   - Listcomps are faster than `map/filter` for Python-level code.
   - Generators avoid allocating full lists for large datasets.

5. **Hashability**:
   - Only fully immutable tuples are hashable (can be dict keys/set elements).
   - Use `hash()` or a helper function to verify immutability.

6. **Unpacking**:
   - Tuples support unpacking for clean variable assignment: `city, year, pop = tokyo_data`.
   - Use `_` as a dummy variable for ignored values: `for country, _ in traveler_ids`.

7. **Design Patterns**:
   - Prefer tuples for data that shouldn't change (e.g., configuration, constants).
   - Use listcomps for small datasets; generators for streaming/large data.
"""

In [None]:
# Fluent Python: Tuples, List Comprehensions, and Generators in Action
import collections
import math
from random import choice
import sys

# === FrenchDeck Example with List Comprehension ===
Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits 
                      for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

# === Vector Example with Special Methods ===
class Vector:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return f'Vector({self.x!r}, {self.y!r})'
    
    def __abs__(self):
        return math.hypot(self.x, self.y)
    
    def __bool__(self):
        return bool(abs(self))
    
    def __add__(self, other):
        return Vector(self.x + other.x, self.y + other.y)
    
    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)

# === Tuples as Records and Immutable Lists ===
lax_coordinates = (33.9425, -118.408056)  # Tuple as record
immutable_list = (10, 'alpha', (1, 2))     # Fully immutable
mutable_tuple = (10, 'alpha', [1, 2])      # Contains mutable list

# === Cartesian Products ===
colors = ['black', 'white']
sizes = ['S', 'M', 'L']

# List comprehension (immediate evaluation)
tshirts_list = [(color, size) for color in colors for size in sizes]

# Generator expression (lazy evaluation)
tshirts_gen = ((color, size) for color in colors for size in sizes)

# === Pattern Matching with Sequences (Python 3.10+) ===
def handle_message(message):
    match message:
        case ['BEEPER', freq, times]: return f"Beep {times} times at {freq}Hz"
        case ['NECK', angle]: return f"Rotate neck to {angle}°"
        case ['LED', id, intensity]: return f"Set LED {id} brightness to {intensity}"
        case _: return "Unknown command"

# === Demo Execution ===
deck = FrenchDeck()
v1 = Vector(2, 4)
v2 = Vector(2, 1)

print("FrenchDeck Features:")
print(f"Random card: {choice(deck)}")
print(f"Top 3 cards: {deck[:3]}")

print("\nVector Operations:")
print(f"v1 + v2 = {v1 + v2}")
print(f"abs(v1) = {abs(v1)}")
print(f"v1 * 3 = {v1 * 3}")

print("\nTuples as Records:")
print("Coordinates:", lax_coordinates)
print("Immutable tuple:", immutable_list)
print("Mutable tuple (after modification):", (mutable_tuple[0], mutable_tuple[1], mutable_tuple[2] + [99]))

print("\nCartesian Product:")
print("List comprehension:", tshirts_list)
print("Generator expression (converted):", list(tshirts_gen))

print("\nPattern Matching Examples:")
print(handle_message(['BEEPER', 440, 3]))
print(handle_message(['NECK', 45]))
print(handle_message(['LED', 1, 0.7]))

# === Memory Comparison ===
list_size = sys.getsizeof([1, 2, 3])
tuple_size = sys.getsizeof((1, 2, 3))
print(f"\nMemory Usage (bytes): List={list_size}, Tuple={tuple_size}")

"""
KEY TAKEAWAYS:
1. **List Comprehensions**:
   - `[]` builds lists immediately (good for small datasets).
   - Use for filtering/transforming sequences (e.g., `[x**2 for x in range(5)]`).

2. **Generator Expressions**:
   - `()` yields items lazily (memory-efficient for large datasets).
   - Ideal for streaming or one-time iteration (e.g., `sum((x**2 for x in range(1000000)))`).

3. **Tuples**:
   - **As Records**: Fixed-size, ordered data (e.g., coordinates, database rows).
   - **As Immutable Lists**: Hashable if all elements are immutable (can be dict keys).
   - **Caveat**: Tuples with mutable elements (e.g., lists) can change indirectly.

4. **Special Methods**:
   - `__add__`, `__mul__` enable operator overloading for custom types.
   - `__repr__` provides unambiguous string representation for debugging.

5. **Pattern Matching (Python 3.10+)**:
   - `match/case` simplifies complex conditionals with destructuring (e.g., handling nested data structures).

6. **Performance**:
   - Tuples use ~40% less memory than lists for small data.
   - Generators avoid memory overhead for large Cartesian products.
"""

In [None]:
# Fluent Python: Sequences, Pattern Matching, and Slicing Examples
import sys

# 1. Pattern Matching with Sequences (Python 3.10+)
def handle_command(command):
    match command:
        case ['BEEPER', freq, times]: return f"Beep {times}x at {freq}Hz"
        case ['NECK', angle]: return f"Rotate neck to {angle}°"
        case ['LED', id, intensity]: return f"Set LED {id} brightness to {intensity}"
        case _: return "Unknown command"

# 2. Cartesian Products with List Comprehensions
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts_list = [(c, s) for c in colors for s in sizes]  # List comprehension
tshirts_gen = ((c, s) for c in colors for s in sizes)   # Generator expression

# 3. Slicing Examples
s = 'bicycle'
slice_examples = {
    "Basic slice": s[2:5],
    "Full step": s[::3],
    "Reverse": s[::-1],
    "Custom slice": s[1:6:2]
}

# 4. Named Slice Objects (Invoice Example)
invoice = """1909 Pimoroni PiBrella $17.50 3 $52.50
1489 6mm Tactile Switch x20 $4.95 2 $9.90"""
SKU = slice(0, 6)
PRICE = slice(40, 52)

# 5. Tuples as Records
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32_450, 0.66, 8014)

# Demo Execution
print("Pattern Matching Examples:")
print(handle_command(['BEEPER', 440, 3]))
print(handle_command(['LED', 1, 0.7]))

print("\nCartesian Product (List vs Generator):")
print("List:", tshirts_list)
print("Generator (converted):", list(tshirts_gen))

print("\nSlicing Examples:")
for name, result in slice_examples.items():
    print(f"{name}: {result}")

print("\nNamed Slice Invoice Parsing:")
for line in invoice.split('\n'):
    print(f"SKU: {line[SKU]} | Price: {line[PRICE]}")

print("\nTuple Unpacking:")
print(f"Coordinates: {lax_coordinates}")
print(f"Tokyo Data: {city}, {year}, {pop}, {chg}, {area}")

"""
KEY TAKEAWAYS:
1. **Pattern Matching**:
   - Use `match/case` for structural pattern matching (Python 3.10+).
   - Destructures sequences and checks patterns with guards.

2. **Cartesian Products**:
   - Listcomps build full products: `[(c, s) for c in colors for s in sizes]`
   - Generators save memory: `(c, s) for c in colors for s in sizes`

3. **Slicing**:
   - `s[start:stop:step]` supports negative indices and steps.
   - Use `slice()` objects for named, reusable slices in complex data parsing.

4. **Tuples**:
   - Immutable records with fixed-size, ordered fields.
   - Unpack tuples for clean variable assignment: `city, year, pop = ...`

5. **Performance**:
   - Generators avoid memory overhead for large datasets.
   - Slicing creates views, not copies (except for strings/bytes).
"""

In [None]:
# Fluent Python: Sequences, Generators, Tuples, and Pattern Matching
import collections
import math
import sys
from random import choice

# === FrenchDeck: Emulating Built-in Sequences ===
Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits 
                      for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

# === Vector: Emulating Numeric Types ===
class Vector:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return f'Vector({self.x!r}, {self.y!r})'
    
    def __abs__(self):
        return math.hypot(self.x, self.y)
    
    def __add__(self, other):
        return Vector(self.x + other.x, self.y + other.y)
    
    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)

# === List Comprehensions vs Generator Expressions ===
symbols = '$¢£¥€¤'
codes_list = [ord(symbol) for symbol in symbols]  # List comprehension
codes_gen = (ord(symbol) for symbol in symbols)    # Generator expression

# === Tuples as Records and Immutable Lists ===
lax_coordinates = (33.9425, -118.408056)  # Tuple as record
immutable_tuple = (10, 'alpha', (1, 2))     # Fully immutable
mutable_tuple = (10, 'alpha', [1, 2])       # Contains mutable list

# === Pattern Matching with Sequences (Python 3.10+) ===
def handle_command(command):
    match command:
        case ['BEEPER', freq, times]: return f"Beep {times}x at {freq}Hz"
        case ['NECK', angle]: return f"Rotate neck to {angle}°"
        case ['LED', id, intensity]: return f"Set LED {id} brightness to {intensity}"
        case _: return "Unknown command"

# === Slicing Examples ===
s = 'bicycle'
slice_examples = {
    "Basic slice": s[2:5],
    "Full step": s[::3],
    "Reverse": s[::-1],
    "Custom slice": s[1:6:2]
}

# === Named Slice Objects (Invoice Example) ===
invoice = """1909 Pimoroni PiBrella $17.50 3 $52.50
1489 6mm Tactile Switch x20 $4.95 2 $9.90"""
SKU = slice(0, 6)
PRICE = slice(40, 52)

# === Cartesian Products ===
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts_list = [(c, s) for c in colors for s in sizes]  # List comprehension
tshirts_gen = ((c, s) for c in colors for s in sizes)   # Generator expression

# === Augmented Assignment Gotcha with Tuples ===
t = (1, 2, [30, 40])
try:
    t[2] += [50, 60]  # Triggers TypeError but modifies the tuple
except TypeError as e:
    print(f"TypeError: {e}")

# === Demo Execution ===
deck = FrenchDeck()
v1 = Vector(2, 4)
v2 = Vector(2, 1)

print("FrenchDeck Features:")
print(f"Random card: {choice(deck)}")
print(f"Top 3 cards: {deck[:3]}")

print("\nVector Operations:")
print(f"v1 + v2 = {v1 + v2}")
print(f"abs(v1) = {abs(v1)}")
print(f"v1 * 3 = {v1 * 3}")

print("\nTuples as Records:")
print("Coordinates:", lax_coordinates)
print("Immutable tuple:", immutable_tuple)
print("Mutable tuple (after modification):", (mutable_tuple[0], mutable_tuple[1], mutable_tuple[2] + [99]))

print("\nCartesian Product:")
print("List comprehension:", tshirts_list)
print("Generator expression (converted):", list(tshirts_gen))

print("\nPattern Matching Examples:")
print(handle_command(['BEEPER', 440, 3]))
print(handle_command(['LED', 1, 0.7]))

print("\nSlicing Examples:")
for name, result in slice_examples.items():
    print(f"{name}: {result}")

print("\nNamed Slice Invoice Parsing:")
for line in invoice.split('\n'):
    print(f"SKU: {line[SKU]} | Price: {line[PRICE]}")

print("\nTuple Mutation Gotcha:")
print("Original tuple t:", t)
print("Modified inner list:", t[2])

# === Memory Comparison ===
list_size = sys.getsizeof([1, 2, 3])
tuple_size = sys.getsizeof((1, 2, 3))
print(f"\nMemory Usage (bytes): List={list_size}, Tuple={tuple_size}")

"""
KEY TAKEAWAYS:
1. **Dunder Methods**:
   - `__len__` and `__getitem__` make classes behave like sequences (e.g., `len(deck)`, `deck[0]`).

2. **List Comprehensions vs Generators**:
   - Use `[]` for immediate computation, `()` for lazy evaluation.
   - Generators save memory for large datasets (e.g., `array.array('d', (ord(s)...)`).

3. **Tuples**:
   - **As Records**: Fixed-size, ordered fields (e.g., coordinates, city data).
   - **Caveat**: Tuples with mutable elements (e.g., lists) can change indirectly.

4. **Pattern Matching (Python 3.10+)**:
   - `match/case` simplifies complex conditionals with destructuring (e.g., handling nested data structures).

5. **Slicing**:
   - `s[start:stop:step]` supports negative indices and steps.
   - Use `slice()` objects for named, reusable slices in complex data parsing.

6. **Performance**:
   - Tuples use ~40% less memory than lists for small data.
   - Listcomps are faster than `map/filter` for Python-level code.

7. **Gotchas**:
   - Augmented assignment (`+=`) on tuples with mutable elements raises `TypeError` but modifies the inner list.
   - `a * n` with mutable items creates multiple references to the same object (e.g., ` [[]] * 3`).

8. **Design Patterns**:
   - Prefer tuples for data that shouldn't change (e.g., configuration, constants).
   - Use listcomps for small datasets; generators for streaming/large data.
"""

In [None]:
# Fluent Python: Advanced Sequences, Memoryviews, and Pattern Matching
import collections
import array
import numpy as np

# === Deque Operations (collections.deque) ===
dq = collections.deque(range(10), maxlen=10)
dq.rotate(3)          # Move right items to left
dq.appendleft(-1)     # Bounded deque discards from right
dq.extend([11, 22])   # Full deque demonstration

# === Memoryview Manipulation ===
numbers = array.array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
memv_oct = memv.cast('B')  # View as bytes
memv_oct[5] = 4            # Modify byte directly

# === NumPy Array Basics ===
np_array = np.arange(12).reshape(3, 4)  # 3x4 matrix
row = np_array[2]                     # Row access
element = np_array[2, 1]              # Element access
column = np_array[:, 1]               # Column access
transposed = np_array.T               # Transpose

# === Sorting: list.sort() vs sorted() ===
fruits = ['grape', 'raspberry', 'apple', 'banana']
sorted_fruits = sorted(fruits)        # Returns new list
fruits.sort(reverse=True)             # In-place sort

# === Structural Pattern Matching (Python 3.10+) ===
def handle_command(cmd):
    match cmd:
        case ['BEEPER', freq, times]: return f"Beep {times}x at {freq}Hz"
        case ['LED', id, *rest]: return f"LED {id} with {rest}"
        case _: return "Unknown command"

# === Demo Execution ===
print("Deque Operations:")
print("Original deque:", dq)
print("After rotation and appends:", dq)

print("\nMemoryview Modification:")
print("Original array:", numbers)
print("Modified array:", array.array('h', memv.tolist()))

print("\nNumPy Array Manipulation:")
print("Original array:\n", np_array)
print("Row [2]:", row)
print("Element [2,1]:", element)
print("Transposed array:\n", transposed)

print("\nSorting Examples:")
print("sorted() result:", sorted_fruits)
print("In-place sorted list:", fruits)

print("\nPattern Matching Example:")
print(handle_command(['BEEPER', 440, 3]))
print(handle_command(['LED', 1, 'ON', 50]))

"""
KEY TAKEAWAYS:
1. **Deque Efficiency**:
   - Use `collections.deque` for O(1) appends/pops from both ends.
   - `maxlen` creates bounded deques that discard old items when full.

2. **Memoryviews**:
   - Zero-copy views of array data (`memoryview(array)`).
   - Cast to different data types (`cast('B')` for bytes).
   - Direct byte manipulation without copying underlying data.

3. **NumPy Arrays**:
   - Vectorized operations avoid explicit loops (e.g., `np.arange()`, `reshape()`).
   - Efficient multidimensional indexing and transposing.
   - Ideal for numerical data processing vs. native Python lists.

4. **Sorting**:
   - `list.sort()` sorts in-place and returns `None`.
   - `sorted()` returns a new sorted list.
   - Use `key=` and `reverse=` parameters for custom sorting.

5. **Pattern Matching**:
   - `match/case` simplifies complex conditionals (Python 3.10+).
   - Destructures sequences with guards and wildcards (`_`).

6. **Mutable vs Immutable**:
   - `+=`/`*=` behavior differs: 
     - Lists (mutable): in-place modification
     - Tuples (immutable): creates new objects
   - Augmented assignment can have side effects with nested mutable elements.
"""

### CHAPTER 3: Dictionaries and Sets


In [None]:
# Fluent Python: Dictionaries, Dict Comprehensions, and Pattern Matching

# === 1. Dict Comprehensions ===
dial_codes = [
    (880, 'Bangladesh'),
    (55, 'Brazil'),
    (86, 'China'),
    (91, 'India'),
    (62, 'Indonesia'),
    (81, 'Japan'),
    (234, 'Nigeria'),
    (92, 'Pakistan'),
    (7, 'Russia'),
    (1, 'United States'),
]

# Simple dict comprehension
country_dial = {country: code for code, country in dial_codes}
# Filtered and transformed dict comprehension
filtered_dial = {code: country.upper() for country, code in sorted(country_dial.items()) if code < 70}

# === 2. Unpacking Mappings (PEP 448) ===
def dump(**kwargs):
    """Demonstrates unpacking multiple mappings in function calls"""
    return kwargs

# Unpacking in function calls
unpacked_func = dump(**{'x': 1}, y=2, **{'z': 3})

# Unpacking in dict literals
unpacked_dict = {'a': 0, **{'x': 1}, 'y': 2, **{'z': 3, 'x': 4}}  # Note x:4 overwrites x:1

# === 3. Merging Mappings with | (Python 3.9+) ===
d1 = {'a': 1, 'b': 3}
d2 = {'a': 2, 'b': 4, 'c': 6}

# Create new merged mapping
merged_dict = d1 | d2  # {'a': 2, 'b': 4, 'c': 6}

# In-place update
d1 |= d2  # Updates d1 with d2's values

# === 4. Pattern Matching with Mappings (Python 3.10+) ===
def get_creators(record):
    """Extracts creator names from various media records using pattern matching"""
    match record:
        case {'type': 'book', 'api': 2, 'authors': [*names]}:
            return names
        case {'type': 'book', 'api': 1, 'author': name}:
            return [name]
        case {'type': 'book'}:
            raise ValueError(f"Invalid 'book' record: {record!r}")
        case {'type': 'movie', 'director': name}:
            return [name]
        case _:
            raise ValueError(f'Invalid record: {record!r}')

# === Demo Execution ===
print("Dict Comprehensions:")
print("Country to dial code mapping:", country_dial)
print("Filtered dial codes (<70):", filtered_dial)

print("\nUnpacking Mappings:")
print("Function unpacking result:", unpacked_func)
print("Dict literal unpacking result:", unpacked_dict)

print("\nMerging Mappings:")
print("Original d1:", {'a': 1, 'b': 3})
print("d2:", d2)
print("d1 | d2 (merged):", merged_dict)
print("d1 after |= d2:", d1)

print("\nPattern Matching Examples:")
# Book with API 1
b1 = dict(api=1, author='Douglas Hofstadter', type='book', title='Gödel, Escher, Bach')
print("Book (API 1) creators:", get_creators(b1))

# Book with API 2
from collections import OrderedDict
b2 = OrderedDict(api=2, type='book', title='Python in a Nutshell', 
                authors='Martelli Ravenscroft Holden'.split())
print("Book (API 2) creators:", get_creators(b2))

# Movie example
m1 = {'type': 'movie', 'director': 'Christopher Nolan'}
print("Movie creators:", get_creators(m1))

# Extra key handling example
food = dict(category='ice cream', flavor='vanilla', cost=199)
match food:
    case {'category': 'ice cream', **details}:
        print(f"\nIce cream details (capturing extra keys): {details}")

"""
KEY TAKEAWAYS:
1. **Dict Comprehensions**:
   - Format: `{key: value for item in iterable}`
   - Can include filtering (`if` clause) and transformations
   - More readable than `dict()` constructor for complex transformations

2. **Unpacking Mappings** (PEP 448):
   - Use `**` to unpack multiple dictionaries in function calls and literals
   - In function calls: duplicate keys cause errors (no duplicates allowed)
   - In dict literals: later keys overwrite earlier ones (e.g., `{'x':1, **{'x':4}}` → `{'x':4}`)

3. **Merging Mappings** (Python 3.9+):
   - `|` creates a new merged dictionary (right-side keys overwrite left-side)
   - `|=` updates a dictionary in-place with another's contents
   - Works with any mapping types (not just `dict`)

4. **Pattern Matching with Mappings** (Python 3.10+):
   - Mapping patterns match by key presence, not order (unlike sequence patterns)
   - Partial matches succeed (extra keys are ignored)
   - Use `**details` to capture extra key-value pairs
   - Ideal for processing semi-structured data (JSON, API responses)

5. **Best Practices**:
   - Include type/version fields in data records for robust pattern matching
   - Always include validation/catch-all cases in pattern matching
   - Dict comprehensions improve readability when transforming data
   - Use mapping merging operators for cleaner dictionary operations

6. **Important Notes**:
   - Pattern matching uses `.get()` internally, so it doesn't trigger missing-key handlers
   - Mapping patterns are more flexible than sequence patterns (order-independent)
   - Dict comprehensions are evaluated immediately (unlike generator expressions)
"""

In [None]:
# Fluent Python: Mapping Types, Hashability, and Advanced Dictionary Features

import collections
import sys
from collections.abc import Mapping, MutableMapping

# === 1. Hashability Examples ===
# Hashable objects have a hash code that never changes during their lifetime
# and can be compared to other objects (__hash__ and __eq__ methods)

# Hashable examples
tt = (1, 2, (30, 40))  # Tuple with immutable elements
tf = (1, 2, frozenset([30, 40]))  # Tuple with frozenset

# Unhashable example
try:
    tl = (1, 2, [30, 40])
    hash(tl)
except TypeError as e:
    unhashable_error = str(e)

# === 2. Standard Mapping Types ===
# Check if objects are instances of Mapping ABCs
standard_dict = {}
is_mapping = isinstance(standard_dict, Mapping)
is_mutable_mapping = isinstance(standard_dict, MutableMapping)

# === 3. dict.setdefault() vs dict.get() for updating mutable values ===
# Example: Building a word index (mapping word -> list of occurrences)

# Suboptimal approach using get()
index_get = {}
words = "Python is great. Python is powerful. Python is everywhere.".split()
for position, word in enumerate(words):
    # Inefficient: two lookups when word is new, three when word exists
    occurrences = index_get.get(word, [])
    occurrences.append(position)
    index_get[word] = occurrences

# Optimal approach using setdefault()
index_setdefault = {}
for position, word in enumerate(words):
    # Efficient: single lookup
    index_setdefault.setdefault(word, []).append(position)

# === 4. defaultdict for Automatic Default Values ===
# Better solution for the word index problem
index_defaultdict = collections.defaultdict(list)
for position, word in enumerate(words):
    index_defaultdict[word].append(position)

# === 5. Custom Mapping with __missing__ Method ===
class StrKeyDict0(dict):
    """A custom dictionary that converts non-string keys to strings on lookup"""
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
    
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

# Test the custom dictionary
str_key_dict = StrKeyDict0([('2', 'two'), ('4', 'four')])

# === 6. Merging Dictionaries (Python 3.9+) ===
d1 = {'a': 1, 'b': 2}
d2 = {'b': 3, 'c': 4}
merged_dict = d1 | d2  # Creates new dict with d2 values overwriting d1
d1 |= d2  # In-place update

# === 7. Pattern Matching with Mappings (Python 3.10+) ===
def get_media_type(record):
    """Extract media type using pattern matching"""
    match record:
        case {'type': 'book', 'api': 2, 'authors': [*names]}:
            return f"Book (API 2) by {', '.join(names)}"
        case {'type': 'book', 'api': 1, 'author': name}:
            return f"Book (API 1) by {name}"
        case {'type': 'movie', 'director': name}:
            return f"Movie directed by {name}"
        case _:
            return "Unknown media type"

# === Demo Execution ===
print("=== HASHABILITY EXAMPLES ===")
print(f"Hashable tuple: hash({tt}) = {hash(tt)}")
print(f"Frozenset-based tuple: hash({tf}) = {hash(tf)}")
print(f"Unhashable error: {unhashable_error}\n")

print("=== MAPPING TYPE CHECKS ===")
print(f"Is standard dict a Mapping? {is_mapping}")
print(f"Is standard dict a MutableMapping? {is_mutable_mapping}\n")

print("=== WORD INDEX EXAMPLES ===")
print("Using get():", index_get)
print("Using setdefault():", index_setdefault)
print("Using defaultdict:", dict(index_defaultdict), "\n")

print("=== CUSTOM DICTIONARY EXAMPLE ===")
print(f"str_key_dict['2'] = {str_key_dict['2']}")
print(f"str_key_dict[4] = {str_key_dict[4]}")
try:
    print(str_key_dict[1])
except KeyError as e:
    print(f"str_key_dict[1] raises: KeyError('{e}')")
print(f"str_key_dict.get(4) = {str_key_dict.get(4)}")
print(f"2 in str_key_dict? {2 in str_key_dict}\n")

print("=== DICTIONARY MERGING ===")
print(f"Original d1: {{'a': 1, 'b': 2}}")
print(f"d2: {{'b': 3, 'c': 4}}")
print(f"d1 | d2: {merged_dict}")
print(f"d1 after |= d2: {d1}\n")

print("=== PATTERN MATCHING EXAMPLES ===")
book_api2 = {'type': 'book', 'api': 2, 'authors': ['Martelli', 'Ravenscroft', 'Holden']}
book_api1 = {'type': 'book', 'api': 1, 'author': 'Hettinger'}
movie = {'type': 'movie', 'director': 'Nolan'}
print(f"Book (API 2): {get_media_type(book_api2)}")
print(f"Book (API 1): {get_media_type(book_api1)}")
print(f"Movie: {get_media_type(movie)}")

"""
KEY TAKEAWAYS:
1. **Hashability**:
   - An object is hashable if it has a hash code that never changes during its lifetime (__hash__ method) 
     and can be compared to other objects (__eq__ method).
   - Numeric types and immutable flat types (str, bytes) are hashable.
   - Container types are hashable only if they are immutable and all contained objects are hashable.
   - Tuples are hashable only if all items are hashable (e.g., (1, 2, [30, 40]) is unhashable).

2. **Mapping ABCs**:
   - Use `isinstance(obj, Mapping)` to check if an object behaves as a mapping (supports key lookups).
   - Use `isinstance(obj, MutableMapping)` to check if an object supports in-place modifications.
   - These ABCs help write more generic code that works with any mapping type.

3. **Efficient Mutable Value Updates**:
   - Use `dict.setdefault(key, []).append(value)` for single-lookup updates to mutable values.
   - Avoid the inefficient pattern: `d[key] = d.get(key, []) + [value]` (multiple lookups).

4. **defaultdict**:
   - Automatically creates default values for missing keys using a factory function (e.g., `defaultdict(list)`).
   - Only triggers for `d[key]` lookups, not for `d.get(key)` or `key in d`.

5. **__missing__ Method**:
   - Custom mappings can implement `__missing__` to handle missing keys in `__getitem__`.
   - Not automatically called by `get()` or `__contains__`, so those methods need custom implementations.

6. **Dictionary Merging** (Python 3.9+):
   - `|` creates a new merged dictionary (right-side keys overwrite left-side).
   - `|=` updates a dictionary in-place with another's contents.

7. **Pattern Matching** (Python 3.10+):
   - Mapping patterns match by key presence (order-independent).
   - Partial matches succeed (extra keys are ignored).
   - Use `**details` to capture extra key-value pairs.

8. **Best Practices**:
   - Prefer `collections.UserDict` over direct `dict` subclassing for custom mappings.
   - Use `defaultdict` when you need automatic default values for missing keys.
   - Use `setdefault()` for efficient updates of mutable values in standard dictionaries.
"""

In [None]:
# Fluent Python: __missing__ Method Behavior & Dictionary Variations

import collections
from types import MappingProxyType
import sys

# === 1. Inconsistent __missing__ Behavior Across Dictionary Implementations ===

# Case 1: Subclassing dict (only __missing__ implemented)
class DictSubclass(dict):
    def __missing__(self, key):
        return f"dict subclass: default for {key}"

d_dict = DictSubclass()
print("=== dict Subclass ===")
print(f"d_dict['x'] (uses __missing__): {d_dict['x']}")
print(f"d_dict.get('y') (ignores __missing__): {d_dict.get('y')}")
print(f"'z' in d_dict (ignores __missing__): {'z' in d_dict}\n")

# Case 2: Subclassing UserDict (only __missing__ implemented)
class UserDictSubclass(collections.UserDict):
    def __missing__(self, key):
        return f"UserDict subclass: default for {key}"

d_userdict = UserDictSubclass()
print("=== UserDict Subclass ===")
print(f"d_userdict['x'] (uses __missing__): {d_userdict['x']}")
print(f"d_userdict.get('y') (uses __missing__): {d_userdict.get('y')}")
print(f"'z' in d_userdict (uses __missing__): {'z' in d_userdict}\n")

# Case 3: Minimal Mapping subclass with __missing__ but no __getitem__ call
class MinimalMapping(collections.abc.Mapping):
    def __init__(self, *args, **kwargs):
        self.data = dict(*args, **kwargs)
    
    def __getitem__(self, key):
        return self.data[key]  # Doesn't call __missing__
    
    def __iter__(self):
        return iter(self.data)
    
    def __len__(self):
        return len(self.data)
    
    def __missing__(self, key):
        return f"MinimalMapping: default for {key}"

d_minimal = MinimalMapping()
print("=== Minimal Mapping Subclass ===")
try:
    print(f"d_minimal['x'] (no __missing__): {d_minimal['x']}")
except KeyError as e:
    print(f"d_minimal['x'] raises: KeyError('{e}')")
try:
    print(f"d_minimal.get('y') (not implemented): {d_minimal.get('y')}")
except AttributeError as e:
    print(f"d_minimal.get('y') raises: {e}")
print(f"'z' in d_minimal (works): {'z' in d_minimal}\n")

# === 2. StrKeyDict: Subclassing UserDict (Better Approach) ===
class StrKeyDict(collections.UserDict):
    """Always converts non-string keys to str on insertion, update, and lookup"""
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        self.data[str(key)] = item

# Test StrKeyDict
str_key_dict = StrKeyDict([('2', 'two'), ('4', 'four')])
print("=== StrKeyDict (UserDict Subclass) ===")
print(f"str_key_dict['2']: {str_key_dict['2']}")
print(f"str_key_dict[4]: {str_key_dict[4]}")
print(f"str_key_dict.get(4): {str_key_dict.get(4)}")
print(f"2 in str_key_dict: {2 in str_key_dict}")
str_key_dict[5] = 'five'  # Key automatically converted to string
print(f"After str_key_dict[5] = 'five', keys: {list(str_key_dict.keys())}\n")

# === 3. Dictionary Variations in Standard Library ===
# OrderedDict
ordered_dict = collections.OrderedDict([('a', 1), ('b', 2), ('c', 3)])
ordered_dict.move_to_end('a')  # Move to end
print("=== OrderedDict ===")
print(f"OrderedDict after move_to_end: {list(ordered_dict.items())}")
print(f"Equality with regular dict: {ordered_dict == {'a': 1, 'b': 2, 'c': 3}}\n")

# ChainMap
d1 = {'a': 1, 'b': 3}
d2 = {'b': 2, 'c': 4}
chain = collections.ChainMap(d1, d2)
print("=== ChainMap ===")
print(f"chain['a']: {chain['a']}")  # From d1
print(f"chain['c']: {chain['c']}")  # From d2
chain['d'] = 5  # Only affects d1
print(f"After chain['d'] = 5, d1: {d1}\n")

# Counter
counter = collections.Counter('abracadabra')
counter.update('aaaaazzz')
print("=== Counter ===")
print(f"Counter after updates: {counter}")
print(f"Most common 3 items: {counter.most_common(3)}\n")

# Immutable Mapping (MappingProxyType)
original_dict = {'read': 'only', 'this': 'cannot', 'be': 'changed'}
read_only = MappingProxyType(original_dict)
print("=== Immutable Mapping (MappingProxyType) ===")
print(f"read_only['read']: {read_only['read']}")
try:
    read_only['new'] = 'value'
except TypeError as e:
    print(f"Attempt to modify read_only: {e}")
original_dict['new'] = 'value'  # Original can still be modified
print(f"After modifying original, read_only['new']: {read_only['new']}\n")

# === 4. Memory Comparison of Dictionary Types ===
standard_dict = sys.getsizeof({})
ordered_dict_size = sys.getsizeof(collections.OrderedDict())
user_dict_size = sys.getsizeof(collections.UserDict())

print("=== Memory Usage (bytes) ===")
print(f"Standard dict: {standard_dict}")
print(f"OrderedDict: {ordered_dict_size}")
print(f"UserDict: {user_dict_size}")

"""
KEY TAKEAWAYS:
1. **__missing__ Inconsistency**:
   - In `dict` subclasses: Only triggered by `d[k]`, not `d.get(k)` or `k in d`
   - In `UserDict` subclasses: Triggered by `d[k]`, `d.get(k)`, and `k in d`
   - In custom `Mapping` subclasses: Depends on implementation of `__getitem__`

2. **Subclassing Strategy**:
   - Prefer `collections.UserDict` over `dict` for custom mappings
   - `UserDict` uses composition (internal `data` dict) rather than inheritance
   - Avoids recursion issues and simplifies implementation

3. **Dictionary Variations**:
   - `OrderedDict`: Preserves insertion order, supports `move_to_end()`
   - `ChainMap`: Searches multiple mappings as one (updates affect only first mapping)
   - `Counter`: Specialized for counting hashable objects (multiset functionality)
   - `MappingProxyType`: Creates read-only, dynamic proxy of a dictionary

4. **Best Practices**:
   - Implement `__setitem__` when subclassing to handle key transformations
   - Use `MappingProxyType` for immutable mappings (dynamic but read-only)
   - `UserDict` provides better inheritance model for custom mappings than `dict`
   - Always consider whether you need the specific features of a specialized mapping

5. **Important Notes**:
   - `setdefault()` and `update()` behavior depends on underlying key lookup logic
   - `OrderedDict` equality checks for matching order (unlike regular `dict`)
   - `ChainMap` is ideal for nested scope implementations (e.g., language interpreters)
   - `Counter` implements `+`, `-`, and `most_common()` for tally operations
"""

In [None]:
# Fluent Python: Dictionary Views, Sets, and Advanced Operations

# === 1. Dictionary Views ===
d = dict(a=10, b=20, c=30)
keys_view = d.keys()
values_view = d.values()
items_view = d.items()

print("=== DICTIONARY VIEWS ===")
print(f"Original dictionary: {d}")
print(f"Keys view: {keys_view}")
print(f"Values view: {values_view}")
print(f"Items view: {items_view}")

# Demonstrate dynamic nature of views (changes reflect automatically)
d['z'] = 99
print(f"\nAfter adding 'z': {d}")
print(f"Keys view (updated): {keys_view}")
print(f"Values view (updated): {values_view}")
print(f"Items view (updated): {items_view}")

# Views are not subscriptable
try:
    print(f"Attempting to index view: {values_view[0]}")
except TypeError as e:
    print(f"Error when indexing view: {e}")

# === 2. Set Operations on Dictionary Views ===
d1 = dict(a=1, b=2, c=3, d=4)
d2 = dict(b=20, d=40, e=50)

print("\n=== SET OPERATIONS ON DICT VIEWS ===")
print(f"d1: {d1}")
print(f"d2: {d2}")

# Intersection of keys (common keys)
common_keys = d1.keys() & d2.keys()
print(f"Common keys (d1 & d2): {common_keys}")

# Keys unique to d1
d1_only = d1.keys() - d2.keys()
print(f"Keys in d1 but not d2: {d1_only}")

# Union of keys
all_keys = d1.keys() | d2.keys()
print(f"All keys (union): {all_keys}")

# Symmetric difference (keys in either dict but not both)
sym_diff = d1.keys() ^ d2.keys()
print(f"Symmetric difference: {sym_diff}")

# Set operations with regular sets
s = {'a', 'e', 'i'}
print(f"\nRegular set: {s}")
print(f"d1.keys() & s: {d1.keys() & s}")
print(f"d1.keys() | s: {d1.keys() | s}")

# Note: dict_items works as a set only if all values are hashable
items_intersect = d1.items() & d2.items()
print(f"\nd1.items() & d2.items(): {items_intersect}")

# === 3. Sets and Set Operations ===
print("\n=== SETS AND SET OPERATIONS ===")
# Set literals (note: empty set requires set(), not {})
s1 = {1, 2, 3, 4}
s2 = {3, 4, 5, 6}
print(f"Set s1: {s1}")
print(f"Set s2: {s2}")

# Basic set operations
print(f"Union (s1 | s2): {s1 | s2}")
print(f"Intersection (s1 & s2): {s1 & s2}")
print(f"Difference (s1 - s2): {s1 - s2}")
print(f"Symmetric difference (s1 ^ s2): {s1 ^ s2}")

# Set predicates
print(f"\ns1 is subset of {s1 | s2}? {s1 <= (s1 | s2)}")
print(f"s1 is proper subset of {s1 | s2}? {s1 < (s1 | s2)}")
print(f"Are s1 and {s2 - s1} disjoint? {s1.isdisjoint(s2 - s1)}")

# Set comprehensions
from unicodedata import name
sign_chars = {chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}
print(f"\nSet comprehension (Unicode SIGN characters): {sign_chars}")

# === 4. Practical Applications ===
print("\n=== PRACTICAL APPLICATIONS ===")
# Removing duplicates while preserving order (Python 3.7+)
items = ['spam', 'spam', 'eggs', 'spam', 'bacon', 'eggs']
unique_ordered = list(dict.fromkeys(items))
print(f"Original list: {items}")
print(f"Unique items (preserving order): {unique_ordered}")

# Efficient membership testing
needles = {'spam', 'eggs', 'ham'}
haystack = {'spam', 'eggs', 'bacon', 'toast'}
found = len(needles & haystack)
print(f"\nNeedles: {needles}")
print(f"Haystack: {haystack}")
print(f"Found {found} needles in haystack (using set intersection)")

# Counting occurrences with set operations
fruits = ['apple', 'banana', 'apple', 'orange', 'banana', 'apple']
fruit_set = set(fruits)
print(f"\nFruits list: {fruits}")
print(f"Fruit types: {fruit_set}")
print(f"Count of unique fruits: {len(fruit_set)}")

"""
KEY TAKEAWAYS:
1. **Dictionary Views**:
   - `dict.keys()`, `dict.values()`, `dict.items()` return dynamic views that update automatically when the dictionary changes
   - Views are read-only projections of dictionary internals (avoid memory overhead of copying data)
   - `dict_values` supports only iteration and length checking (no set operations)
   - `dict_keys` and `dict_items` support full set operations (intersection, union, etc.)
   - Views are not subscriptable (`view[0]` raises TypeError)

2. **Set Operations on Dict Views**:
   - Use `&` to find common keys: `d1.keys() & d2.keys()`
   - Use `-` to find keys unique to one dictionary: `d1.keys() - d2.keys()`
   - Dict views work seamlessly with regular sets: `d1.keys() & {'a', 'e', 'i'}`
   - `dict_items` only works as a set if all values are hashable
   - These operations are highly efficient (O(1) or O(n) time complexity)

3. **Sets**:
   - Create sets with `{}` (non-empty) or `set()` (empty); `{}` creates a dict
   - Set operations: union (`|`), intersection (`&`), difference (`-`), symmetric difference (`^`)
   - Set predicates: subset (`<=`), proper subset (`<`), disjoint (`isdisjoint()`)
   - Set comprehensions: `{expr for item in iterable if condition}` (e.g., Unicode character filtering)
   - Set literals (`{1, 2, 3}`) are faster than `set([1, 2, 3])` due to optimized bytecode

4. **Practical Applications**:
   - Remove duplicates while preserving order: `list(dict.fromkeys(seq))` (Python 3.7+)
   - Efficient membership testing: `if x in my_set` is O(1) regardless of set size
   - Count occurrences with intersection: `len(needles & haystack)`
   - Use set operations to simplify complex conditional logic and eliminate loops
   - Pattern matching with dictionary views enables clean processing of structured data

5. **Implementation Details**:
   - Sets and dict views use hash tables for O(1) average-case membership testing
   - Set elements and dict keys must be hashable (immutable and with proper __hash__/__eq__)
   - Dictionary key ordering is preserved (officially guaranteed since Python 3.7)
   - Set operations on views avoid creating intermediate lists, saving memory and time
"""

### CHAPTER 4: Unicode Text Versus Bytes

In [None]:
# Fluent Python: Unicode Text vs Bytes - Comprehensive Guide

# === 1. Character Issues: Code Points vs Bytes ===
# Unicode separates character identity (code point) from byte representation (encoding)
cafe = 'café'  # 4 Unicode characters (code points)
print(f"Original string: '{cafe}' | Length: {len(cafe)}")

# Encode to bytes using different encodings
utf8_bytes = cafe.encode('utf-8')
utf16_bytes = cafe.encode('utf-16')
latin1_bytes = cafe.encode('latin-1')

print(f"\nUTF-8 encoding: {utf8_bytes} | Length: {len(utf8_bytes)}")
print(f"UTF-16 encoding: {utf16_bytes} | Length: {len(utf16_bytes)}")
print(f"LATIN-1 encoding: {latin1_bytes} | Length: {len(latin1_bytes)}")

# Decode back to string
print(f"Decoded from UTF-8: {utf8_bytes.decode('utf-8')}")
print(f"Decoded from UTF-16: {utf16_bytes.decode('utf-16')}")

# === 2. Byte Essentials: bytes and bytearray ===
# Creating bytes objects
cafe_bytes = bytes('café', encoding='utf_8')
print(f"\nBytes object: {cafe_bytes}")
print(f"First byte (as int): {cafe_bytes[0]}")
print(f"First byte (as slice): {cafe_bytes[:1]}")  # Returns bytes, not str

# Creating and modifying bytearray
cafe_bytearray = bytearray(cafe_bytes)
print(f"\nBytearray: {cafe_bytearray}")
cafe_bytearray[-1] = 0xa9  # Change é to ©
print(f"Modified bytearray: {cafe_bytearray}")
print(f"Decoded modified bytearray: {cafe_bytearray.decode('utf-8')}")

# Using string methods on binary sequences
print(f"\nBytes startswith 'caf': {cafe_bytes.startswith(b'caf')}")
print(f"Bytes replace é with i: {cafe_bytes.replace(b'\xc3\xa9', b'i')}")

# Building bytes from hex
hex_bytes = bytes.fromhex('41 42 43 44 45')
print(f"\nBytes from hex: {hex_bytes} | Decoded: {hex_bytes.decode('ascii')}")

# === 3. Encoding Examples with Different Codecs ===
city = 'São Paulo'
print(f"\n\nEncoding '{city}' with different codecs:")
print(f"UTF-8: {city.encode('utf-8')}")
print(f"UTF-16: {city.encode('utf-16')}")
print(f"LATIN-1: {city.encode('latin-1')}")
try:
    print(f"CP437: {city.encode('cp437')}")
except UnicodeEncodeError as e:
    print(f"CP437 error: {e}")

# === 4. Error Handling During Encoding ===
print("\nEncoding error handling:")
print(f"Ignore errors: {city.encode('cp437', errors='ignore')}")
print(f"Replace errors: {city.encode('cp437', errors='replace')}")
print(f"XML charref: {city.encode('cp437', errors='xmlcharrefreplace')}")

# === 5. Error Handling During Decoding ===
octets = b'Montr\xe9al'
print(f"\nDecoding b'Montr\\xe9al' with different codecs:")
print(f"CP1252: {octets.decode('cp1252')}")  # Correct for French
print(f"ISO-8859-7: {octets.decode('iso8859_7')}")  # Greek encoding (incorrect)
print(f"KOI8-R: {octets.decode('koi8_r')}")  # Russian encoding (incorrect)
try:
    print(f"UTF-8: {octets.decode('utf-8')}")
except UnicodeDecodeError as e:
    print(f"UTF-8 error: {e}")
print(f"UTF-8 with replace: {octets.decode('utf-8', errors='replace')}")

# === 6. Practical Text Handling Examples ===
# Checking if string is pure ASCII
ascii_check = "Hello, World!"
non_ascii = "Héllö, Wørld!"
print(f"\nIs '{ascii_check}' ASCII? {ascii_check.isascii()}")
print(f"Is '{non_ascii}' ASCII? {non_ascii.isascii()}")

# Using memoryview for efficient byte manipulation
import array
numbers = array.array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
print(f"\nArray: {numbers}")
print(f"Memoryview as bytes: {bytes(memv)}")
print(f"Memoryview as list: {memv.tolist()}")

# === 7. Unicode Normalization Example ===
from unicodedata import normalize, combining, name
# Two ways to represent 'é': U+00E9 (single code point) or U+0065 + U+0301 (e + acute)
e_acute1 = 'é'  # U+00E9
e_acute2 = 'e\u0301'  # e + COMBINING ACUTE ACCENT

print(f"\nTwo representations of 'é':")
print(f"e_acute1: '{e_acute1}' | Length: {len(e_acute1)} | Code point: U+{ord(e_acute1):04X}")
print(f"e_acute2: '{e_acute2}' | Length: {len(e_acute2)} | Code points: U+{ord(e_acute2[0]):04X}, U+{ord(e_acute2[1]):04X}")
print(f"Are they equal? {e_acute1 == e_acute2}")
print(f"Normalized to NFC: {normalize('NFC', e_acute2) == e_acute1}")
print(f"Normalized to NFD: {normalize('NFD', e_acute1) == e_acute2}")

"""
KEY TAKEAWAYS:
1. **Character vs Bytes**:
   - Unicode characters have code points (U+XXXX), but their byte representation depends on encoding
   - Encoding: str → bytes (text to bytes)
   - Decoding: bytes → str (bytes to text)
   - UTF-8 is the most common encoding (97% of websites as of 2021)

2. **Byte Essentials**:
   - `bytes` is immutable, `bytearray` is mutable
   - Each item in bytes/bytearray is an integer (0-255), not a character
   - Slicing bytes produces bytes, not str (unlike Python 2 str behavior)
   - Binary sequences support most string methods (except formatting and Unicode-specific ones)

3. **Encoding/Decoding Errors**:
   - `UnicodeEncodeError`: When converting str to bytes with unsupported characters
   - `UnicodeDecodeError`: When converting bytes to str with invalid byte sequences
   - Error handlers: 'strict' (default), 'ignore', 'replace', 'xmlcharrefreplace'

4. **Best Practices**:
   - Always specify encoding when opening files: `open('file.txt', encoding='utf-8')`
   - Use UTF-8 whenever possible for maximum compatibility
   - Check if text is ASCII with `str.isascii()` (Python 3.7+)
   - Normalize Unicode text (NFC/NFD) for reliable comparisons
   - Use memoryview for efficient byte manipulation without copying

5. **Unicode Features**:
   - Use `unicodedata.normalize()` for consistent text comparison
   - `unicodedata.name()` helps identify characters by name
   - Case folding (`str.casefold()`) is better than `str.lower()` for Unicode text
   - For proper sorting, use locale-aware methods or pyuca library

6. **Critical Insight**:
   - Python 3's strict separation of str and bytes prevents silent data corruption
   - Never assume encoding - always specify it explicitly
   - Not all bytes are valid text - be cautious when decoding binary data as text
"""

In [None]:
# Fluent Python: Unicode Normalization, Case Folding & Sorting

import unicodedata
import locale
import sys
from collections import Counter

# === 1. Unicode Normalization for Reliable Comparisons ===
# Unicode has multiple ways to represent the same visual character
s1 = 'café'  # Precomposed 'é' (U+00E9)
s2 = 'cafe\u0301'  # 'e' + COMBINING ACUTE ACCENT (U+0301)
print("=== UNICODE NORMALIZATION ===")
print(f"Original strings: '{s1}' (len={len(s1)}), '{s2}' (len={len(s2)})")
print(f"Direct comparison: s1 == s2? {s1 == s2}")

# Normalize using different forms
nfc_s1 = unicodedata.normalize('NFC', s1)
nfc_s2 = unicodedata.normalize('NFC', s2)
nfd_s1 = unicodedata.normalize('NFD', s1)
nfd_s2 = unicodedata.normalize('NFD', s2)

print(f"\nNFC normalization: '{nfc_s1}' (len={len(nfc_s1)}), '{nfc_s2}' (len={len(nfc_s2)})")
print(f"NFC comparison: {nfc_s1 == nfc_s2}")
print(f"NFD normalization: '{nfd_s1}' (len={len(nfd_s1)}), '{nfd_s2}' (len={len(nfd_s2)})")
print(f"NFD comparison: {nfd_s1 == nfd_s2}")

# Compatibility normalization (NFKC/NFKD)
half = '\N{VULGAR FRACTION ONE HALF}'  # '½'
four_squared = '4²'
micro = 'µ'

print(f"\nCompatibility normalization examples:")
print(f"'½' (NFKC): {unicodedata.normalize('NFKC', half)}")
print(f"'4²' (NFKC): {unicodedata.normalize('NFKC', four_squared)}")
print(f"'µ' (NFKC): {unicodedata.normalize('NFKC', micro)}")

# === 2. Case Folding vs Lowercase ===
print("\n=== CASE FOLDING ===")
eszett = 'ß'  # German Eszett
micro = 'µ'   # Micro sign

print(f"German 'ß' (casefold): {eszett.casefold()} (vs lower: {eszett.lower()})")
print(f"Micro sign 'µ' (casefold): {micro.casefold()} (vs lower: {micro.lower()})")

# === 3. Utility Functions for Text Matching ===
print("\n=== TEXT MATCHING UTILITIES ===")

def nfc_equal(str1, str2):
    """Case-sensitive comparison using NFC normalization"""
    return unicodedata.normalize('NFC', str1) == unicodedata.normalize('NFC', str2)

def fold_equal(str1, str2):
    """Case-insensitive comparison using NFC normalization + case folding"""
    return (unicodedata.normalize('NFC', str1).casefold() == 
            unicodedata.normalize('NFC', str2).casefold())

# Test the functions
s1 = 'café'
s2 = 'cafe\u0301'
s3 = 'Straße'
s4 = 'strasse'

print(f"nfc_equal('café', 'cafe\\u0301'): {nfc_equal(s1, s2)}")
print(f"nfc_equal('Straße', 'strasse'): {nfc_equal(s3, s4)}")
print(f"fold_equal('Straße', 'strasse'): {fold_equal(s3, s4)}")
print(f"fold_equal('café', 'cafe\\u0301'): {fold_equal(s1, s2)}")

# === 4. Removing Diacritics ===
print("\n=== DIACRITIC REMOVAL ===")

def shave_marks(txt):
    """Remove all diacritic marks from any text"""
    norm_txt = unicodedata.normalize('NFD', txt)
    shaved = ''.join(c for c in norm_txt if not unicodedata.combining(c))
    return unicodedata.normalize('NFC', shaved)

def shave_marks_latin(txt):
    """Remove diacritic marks only from Latin characters"""
    norm_txt = unicodedata.normalize('NFD', txt)
    latin_base = False
    preserve = []
    
    for c in norm_txt:
        if unicodedata.combining(c) and latin_base:
            continue  # Skip diacritic on Latin base char
        preserve.append(c)
        
        # Check if it's a new base character
        if not unicodedata.combining(c):
            latin_base = c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    
    return unicodedata.normalize('NFC', ''.join(preserve))

# Test diacritic removal
order = '“Herr Voß: • ½ cup of Œtker™ caffè latte • bowl of açaí.”'
greek = 'Ζέφυρος, Zéfiro'

print(f"Original: {order}")
print(f"Shaved marks: {shave_marks(order)}")
print(f"Shaved Latin marks only: {shave_marks_latin(order)}")
print(f"Greek text (full shaving): {shave_marks(greek)}")

# === 5. Unicode Sorting ===
print("\n=== UNICODE SORTING ===")

fruits = ['caju', 'atemoia', 'cajá', 'açaí', 'acerola']
print(f"Standard sort: {sorted(fruits)}")

# Try locale-based sorting (may not work on all systems)
try:
    # Set Portuguese/Brazil locale
    locale.setlocale(locale.LC_COLLATE, 'pt_BR.UTF-8')
    sorted_fruits = sorted(fruits, key=locale.strxfrm)
    print(f"Locale sort (pt_BR.UTF-8): {sorted_fruits}")
except (locale.Error, ValueError):
    print("Locale 'pt_BR.UTF-8' not available on this system")

# Alternative: Using pyuca (Unicode Collation Algorithm)
try:
    from pyuca import Collator
    coll = Collator()
    sorted_fruits = sorted(fruits, key=coll.sort_key)
    print(f"pyuca sort: {sorted_fruits}")
except ImportError:
    print("pyuca library not installed. Install with: pip install pyuca")

# === 6. Unicode Database Exploration ===
print("\n=== UNICODE DATABASE ===")

def find_chars(query_words):
    """Find Unicode characters by name"""
    query = {w.upper() for w in query_words}
    results = []
    
    for code in range(0x20, 0x10FFFF + 1):
        try:
            char = chr(code)
            name = unicodedata.name(char, None)
            if name and query.issubset(name.split()):
                results.append((f'U+{code:04X}', char, name))
        except (ValueError, UnicodeEncodeError):
            pass
            
    return results

# Find smiley characters
smileys = find_chars(['SMILE'])
print("First 5 smiley characters:")
for code, char, name in smileys[:5]:
    print(f"{code}\t{char}\t{name}")

# Find circled numbers
circled = find_chars(['CIRCLED', 'NUMBER'])
print("\nFirst 5 circled numbers:")
for code, char, name in circled[:5]:
    print(f"{code}\t{char}\t{name}")

# Get character category
char = 'A'
category = unicodedata.category(char)
print(f"\nCategory of '{char}': {category} ({unicodedata.name(char)})")

"""
KEY TAKEAWAYS:

1. **Unicode Normalization**:
   - Use `unicodedata.normalize('NFC', text)` for most applications
   - NFC (Normalization Form C): Composes characters to shortest equivalent
   - NFD (Normalization Form D): Decomposes into base + combining characters
   - NFKC/NFKD: Stronger forms for compatibility characters (use only for search/indexing)
   - Always normalize before comparing Unicode strings

2. **Case Folding**:
   - Prefer `str.casefold()` over `str.lower()` for case-insensitive comparisons
   - Handles special cases like German ß → "ss" and µ → "μ"
   - Essential for reliable internationalized string comparisons

3. **Diacritic Removal**:
   - `shave_marks()`: Removes all diacritics (may affect non-Latin characters)
   - `shave_marks_latin()`: Only removes diacritics from Latin characters
   - Useful for creating ASCII-friendly URLs or search indexes
   - Use cautiously as it changes word meanings in some languages

4. **Unicode Sorting**:
   - Standard sorting by code points produces incorrect results for non-ASCII text
   - Two main approaches:
     * `locale.strxfrm`: OS-dependent, requires proper locale configuration
     * `pyuca.Collator`: Pure Python implementation of Unicode Collation Algorithm
   - For proper internationalized sorting, always use a specialized collator

5. **Unicode Database**:
   - Access character metadata with `unicodedata` module
   - `unicodedata.name(char)`: Get official Unicode character name
   - `unicodedata.category(char)`: Get character category code
   - Build utilities to search characters by name (like the `find_chars` function)

6. **Best Practices**:
   - Normalize text to NFC before storing (W3C recommendation)
   - Use NFC + case folding for case-insensitive comparisons
   - Never use NFKC/NFKD for permanent storage (data loss)
   - For sorting, prefer pyuca over locale for cross-platform consistency
   - When removing diacritics, consider language-specific rules

7. **Critical Insight**:
   - Unicode string processing requires careful handling of normalization
   - What looks identical visually may have different code point representations
   - Always normalize before comparing, sorting, or indexing Unicode text
"""

In [None]:
# Fluent Python: Unicode Numeric Characters & Dual-Mode APIs

import unicodedata
import re
import os
import sys
import locale
from pathlib import Path

# === 1. Numeric Meaning of Unicode Characters ===
print("=== NUMERIC MEANING OF UNICODE CHARACTERS ===")

# Sample string with various numeric characters
sample = '1²⅓\u0969\u216b\u2466\u3285'  # 1, superscript 2, fraction 1/3, Tamil digit 3, Roman 11, etc.

print(f"Sample string: {sample}")
print(f"{'Code Point':<10} | {'Char':<5} | {'isdecimal':<10} | {'isnumeric':<10} | {'Numeric Value':<15} | {'Name'}")

for char in sample:
    code_point = f'U+{ord(char):04X}'
    is_decimal = '✓' if char.isdecimal() else '✗'
    is_numeric = '✓' if char.isnumeric() else '✗'
    numeric_value = unicodedata.numeric(char) if char.isnumeric() else 'N/A'
    char_name = unicodedata.name(char, 'UNKNOWN')
    
    print(f"{code_point:<10} | {char:<5} | {is_decimal:<10} | {is_numeric:<10} | {numeric_value:<15} | {char_name}")

# Regex comparison for digits
print("\nRegex comparison for digits:")
re_digits_str = re.compile(r'\d+')
re_digits_bytes = re.compile(rb'\d+')

print(f"String regex (\\d+) matches: {re_digits_str.findall(sample)}")
print(f"Bytes regex (rb\\d+) matches: {re_digits_bytes.findall(sample.encode('utf-8'))}")

# Function to extract numeric values from text
def extract_numeric_values(text):
    """Extract all numeric characters and their values from text"""
    results = []
    for char in text:
        if char.isnumeric():
            try:
                value = unicodedata.numeric(char)
                results.append((char, value, unicodedata.name(char)))
            except (TypeError, ValueError):
                pass
    return results

numeric_chars = extract_numeric_values(sample)
print("\nExtracted numeric values:")
for char, value, name in numeric_chars:
    print(f"Character: {char}, Value: {value}, Name: {name}")

# === 2. Dual-Mode APIs: str vs bytes ===
print("\n=== DUAL-MODE APIs: STR VS BYTES ===")

# Regular expressions with str vs bytes
print("Regular expressions behavior:")
text_str = "Ramanujan saw 1729 = 1³ + 12³ = 9³ + 10³."
text_bytes = text_str.encode('utf-8')

# String pattern - matches all numeric characters
re_str = re.compile(r'\d+|\d+\.\d+|\d+[\u00B2\u00B3\u2070-\u2079]+')
# Bytes pattern - matches only ASCII digits
re_bytes = re.compile(rb'\d+')

print(f"Original text: {text_str}")
print(f"String regex matches: {re_str.findall(text_str)}")
print(f"Bytes regex matches: {[m.decode('utf-8') for m in re_bytes.findall(text_bytes)]}")

# OS functions with str vs bytes
print("\nOS functions with str vs bytes:")
try:
    # Create test file with non-ASCII characters
    test_filename = "digits-of-π.txt"
    Path(test_filename).touch()
    
    # List directory with str argument
    str_listing = os.listdir('.')
    print(f"os.listdir('.') with str: {str_listing}")
    
    # List directory with bytes argument
    bytes_listing = os.listdir(b'.')
    print(f"os.listdir(b'.') with bytes: {bytes_listing}")
    
    # Convert between str and bytes representations
    encoded = os.fsencode(test_filename)
    decoded = os.fsdecode(encoded)
    print(f"os.fsencode('{test_filename}'): {encoded}")
    print(f"os.fsdecode({encoded}): '{decoded}'")
    
    # Clean up
    Path(test_filename).unlink()
    
except Exception as e:
    print(f"OS operations error: {e}")
    print("Note: Some OS operations may fail in restricted environments like notebooks")

# === 3. Practical Unicode Processing Utilities ===
print("\n=== PRACTICAL UNICODE PROCESSING UTILITIES ===")

def normalize_and_extract_numbers(text):
    """Normalize text and extract numeric values"""
    # Normalize to NFKD to decompose characters
    normalized = unicodedata.normalize('NFKD', text)
    # Extract numeric values
    numbers = []
    for char in normalized:
        if char.isnumeric():
            try:
                numbers.append(unicodedata.numeric(char))
            except (TypeError, ValueError):
                pass
    return numbers

def remove_diacritics(text):
    """Remove diacritics from Latin characters"""
    normalized = unicodedata.normalize('NFD', text)
    return ''.join(
        c for c in normalized 
        if not unicodedata.combining(c) or c not in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    )

# Test with real-world examples
sample_text = "Café ½ cup of Œtker™ at 100€, São Paulo, 4²=16"
print(f"Original text: {sample_text}")
print(f"Numeric values: {normalize_and_extract_numbers(sample_text)}")
print(f"Without diacritics: {remove_diacritics(sample_text)}")

# === 4. Unicode Sorting Examples ===
print("\n=== UNICODE SORTING EXAMPLES ===")

fruits = ['caju', 'atemoia', 'cajá', 'açaí', 'acerola']
print(f"Standard sort: {sorted(fruits)}")

# Try locale-based sorting
try:
    # Set Portuguese/Brazil locale
    locale.setlocale(locale.LC_COLLATE, 'pt_BR.UTF-8')
    sorted_fruits = sorted(fruits, key=locale.strxfrm)
    print(f"Locale sort (pt_BR.UTF-8): {sorted_fruits}")
except (locale.Error, ValueError):
    print("Locale 'pt_BR.UTF-8' not available on this system")

# Try pyuca sorting (if installed)
try:
    from pyuca import Collator
    coll = Collator()
    sorted_fruits = sorted(fruits, key=coll.sort_key)
    print(f"pyuca sort: {sorted_fruits}")
except ImportError:
    print("pyuca library not installed. Install with: pip install pyuca")

"""
KEY TAKEAWAYS:

1. **Numeric Characters in Unicode**:
   - Unicode contains many numeric characters beyond standard digits (0-9)
   - `str.isdecimal()` checks for decimal characters (0-9 only)
   - `str.isnumeric()` checks for any numeric characters (including fractions, superscripts)
   - `unicodedata.numeric(char)` returns the numeric value of a character
   - Regex `\d` matches different characters in str vs bytes patterns

2. **Dual-Mode APIs**:
   - Many standard library functions work with both str and bytes
   - `re` module: str patterns match Unicode characters, bytes patterns match ASCII only
   - `os` module: accepts str or bytes for filenames/pathnames
   - `os.fsencode()`/`os.fsdecode()` convert between str and bytes representations
   - Use bytes mode when dealing with problematic filenames that can't be decoded

3. **Practical Processing**:
   - Always normalize text (NFC/NFKC) before processing
   - Use `unicodedata.normalize('NFKD', text)` to decompose characters for diacritic removal
   - For numeric extraction, combine normalization with `isnumeric()` checks
   - Case folding (`str.casefold()`) is better than `str.lower()` for Unicode text

4. **Sorting Unicode Text**:
   - Standard sorting by code points produces incorrect results for non-ASCII text
   - Two main approaches:
     * `locale.strxfrm`: OS-dependent, requires proper locale configuration
     * `pyuca.Collator`: Pure Python implementation of Unicode Collation Algorithm
   - For proper internationalized sorting, always use a specialized collator

5. **Critical Insights**:
   - Unicode numeric values aren't always integers (e.g., fractions like ½ = 0.5)
   - Bytes regex patterns only match ASCII digits (0-9), not Unicode numeric characters
   - OS filename handling requires special care for non-decodable byte sequences
   - Different languages have different sorting rules (e.g., German Ä vs Swedish Ä)

6. **Best Practices**:
   - Use explicit encoding/decoding everywhere (never rely on defaults)
   - Normalize text to NFC before storing (W3C recommendation)
   - For numeric processing, consider what "number" means in your context
   - When removing diacritics, consider language-specific rules
   - For international applications, use pyuca for reliable cross-platform sorting

7. **Debugging Tips**:
   - Use `unicodedata.name(char)` to identify mysterious characters
   - Check character categories with `unicodedata.category(char)`
   - When debugging encoding issues, print code points: `[ord(c) for c in text]`
   - For file issues, check `sys.getfilesystemencoding()` and locale settings
"""

### CHAPTER 5: Data Class Builders
