# PCEP-30-02 3.4 Operate with Strings

In [None]:
# Strings are one of the most fundamental data types in Python. This section will cover:
# - Constructing strings
# - Indexing, slicing, and immutability
# - Escaping characters using \
# - Quotes and apostrophes inside strings
# - Multi-line strings
# - Basic string functions and methods

### Ways to Create Strings

In [None]:
# Single and double quotes
s1 = 'Hello'

s2 = "World"

# Triple quotes for multi-line strings
s3 = '''This is
a multi-line
string.'''

s4 = """Another
multi-line string."""


### Strings: Indexing

In [None]:
text = "Python"
print(text[0])                    # <-- grabs first string literal 'P' at index 0
print(text[3])                    # <-- grabs the 4th element at index 3
print(text[-1])                   # <-- grabs the last index of the string

### Strings: Slicing

In [None]:
#            str[:]  -->  full shallow copy of string

#       str[start:]  -->  from 'start' to 'end', by 'one'

#        str[:stop]  -->  from 0 to (stop-1)

#       str[::step]  -->  from 0 to last index, by 'step'

#   str[start:stop]  -->  from 'start' to 'stop-1', by 'one'

#  str[start::step]  -->  from 'start' to last index, by 'step'

#   str[:stop:step]	 -->  from '0'	to 'stop', by 'step'	

#         str[::-1]  -->  from '-1' to '0', by -1


In [None]:
text = "Python"
print(text[0:4])                               # <-- starts at index 0, and stops at index 3
print(text[:3])                                # <-- starts at index 0, and stops at index 2
print(text[2:])                                # <-- starts at index 2, and stops at last index
print(text[::2])                               # <-- starts at index 0, stops at last index, but only slices every other string literal
print(text[::-1])                              # <-- starts at index 0, stops at last index, but slices starting with last index and goes all the way to index 0
print(text[-6:] == text[::1] == text[:])       # <-- same thing

In [None]:
# Start: if 'start' is ommited, and 'step' is '-', python defaults to 'start=-1'
# Stop:  defaults to beginning of string 'None'
# Step:  -6, means it moves backwards by 6 places each time


 #  P   y   t   h   o   n
 #  0   1   2   3   4   5
 # -6  -5  -4  -3  -2  -1


text = "Python"
print(text[::-6]) # <-- starts at -1, because 'start' and 'stop' are 'None', prints -1 (n),  moves -6 places, finds nothing, then gracefully ends
print(text[None:None:-6] == text[::-6])
print(text[::-5]) # <-- starts at -1  because 'start' and 'stop' are 'None', prints -1,  moves -5 places, and prints the 'P'
print(text[None:None:-5] == text[::-5])
print(text[::-2]) # <-- starts at -1, prints every other index, moving right-to-left
print(text[None:None:-2] == text[::-2])
print(text[100::-1]) # <-- if start does not exist Python adjusts it automatically

### Strings: Immutability

In [None]:
# Strings in Python are immutable. This means we cannot change them once they are created

In [None]:
# Cannot perform this operation on strings, therefore a 'TypeError' is raised 

text = "Python"

text[0] = 'y'

### Strings: Escape Characters

In [None]:
# - single quote:        \'
# - double quote:        \"
# - backslash:           \\
# - newline:             \n
# - tab:                 \t
# - carriage return:     \r           <-- moves cursor to start of lines
# - backspace:           \b           <-- deletes previous character
# - page-break (rare)    \f
# - vertical tab         \v           <-- similar to newline, but vertical spacing
# - octal value          \ooo         <-- \141 = 'a' , Octal -> Ascii
# - hexadecimal          \xhh         <-- \x61 = 'a' , Hex -> Ascii
# - unicode (name)       \N{name}     <-- \N{delta} = Δ
# - unicode 16-bit       \u03a9       <-- Greek Omega: Ω
# - unicode 32-bit       \U0001F600   <-- emoji: 😀

In [None]:
print('Single quote: \'')
print("Double quote: \"")
print("Backslash: \\")
print("Newline:\nNext line")
print("Tab:\tIndented text")
print("Carriage return: Hello\rWorld")                       # <-- World replaces Hello
print("Backspace: Hello\b World")                            # <-- Deletes last character of "Hell"
print("Unicode (Ω): \u03A9")
print("Emoji 😀: \U0001F600")

In [None]:
# Carriage Return
print("Hello\rWor")  # output: World (overwrites "Hello")


# Use Case:

import time

for i in range(0, 101, 10):
    print(f"\rProgress: {i}%", end="", flush=True)
    time.sleep(0.5)


In [None]:
# Octal & Hexadecimal
print("\141")  # output: a  (Octal for 'a')
print("\x61")  # output: a  (Hex for 'a')

In [None]:
# Raw Strings

# print("C:\Users\Documents\file.txt")     # <-- produces error because Python interprets \ as escape characters

# fixes:

# Use escape characters
print("C:\\Users\\Documents\\file.txt")

# User raw string
print(r"C:\Users\Documents\file.txt")  

### Strings: Methods & Functions

#### Replacing Substrings

In [None]:
text = "I love Python"
print(text.replace("o", "t"))             # <-- output: I love Java, replaces whole words or single letters

#### Search for SubStrings

In [None]:
# How .find() and .index() work:
# They search for a substring (a sequence of characters).
# They return the starting index of the first occurrence.
# They do not check if it is a separate word.

In [None]:
text = 'Find P string in Python'

print(text.find('in'))                    # <-- note: .find() searches for entire substring, not just a single char
                                          # <-- note: if the substring is not found, returns -1
                                          # <-- note: .find() is case sensitive: 'P' is diff than 'p'. Good to use in conjunction with .lower()

print("aaaa".find("aa"))                  # <-- note: output: 0, finds first occurence of substring
print("aaaa".rfind("aa"))                 # <-- note: output: finds last occurrence of substring

print(text.index('in'))                   # <-- same as find but raises error if not found


In [None]:
text = "hello.txt"
print(text.startswith("hello"))          # <-- output: True
print(text.endswith(".txt"))             # <-- output: True

#### Splitting Strings

In [None]:
text = "bapple,banana,grape"
words = text.split("b")  
print(words)                             # <-- output: splits into a list ['apple', 'banana', 'grape']

#### Joining Strings

In [None]:
words = ["apple", "banana", "grape"]
text = ", ".join(words)
print(text)                              # <-- output: joins the elements of list together into one string:  "apple, banana, grape"

### Edge Cases: Strings, String Operators

In [None]:
s = "Hello" + str(5)                     # <-- Works fine: "Hello5"
print(s)
s = "Hello" + 5                          # <-- TypeError: can only concatenate str (not "int") to str

In [None]:
# Concatenating empty strings

print("Hello" + "")                      # <-- both do nothing and does not error. output: "Hello"
print("" + "World")                      # <-- does nothing and does not error output: "World"


In [None]:
# Multiplying strings

print("Hello" * 0)                       # output: "" (empty string)
print("Hello" * -7)                      # output: "" (empty string)
print("Hello" * -0)                      # output: "" (empty string)
print("Hello" * True)
print("Hello"*int(float(2)))

In [None]:
print(" " in "Hello World")              # <-- note: output: True

In [None]:
print("" in "Hello World")               # <-- note: output: True

In [None]:
# Slicing: Edge Cases

text = 'Python'
print(text[0:100])         # <-- no index error is raised. Python handles them gracefully for strings
print(text[-100:])         # <-- no index error is raised. Python handles them gracefully for strings

In [None]:
# Case: start > stop with a + step

text = "Python"
print(text[4:2:-1])        # <-- output: "" (empty string)

In [None]:
# Case: start < stop with a - step

text = "Python"
print(text[4:2])           # <-- output: "" (empty string)

In [None]:
# Case: ommiting start or stop

text = "Python"
print(text[:])             # <-- output: "Python" (full string)
print(text[None:])         # <-- output: "Python" (same as above)
print(text[:None])         # <-- output: "Python" (same as above)

In [None]:
# Case: setting step=0

text = "Python"
print(text[::0])           # <-- ValueError: slice step cannot be zero

In [None]:
# Case: large negative step

text = "Python"
print(text[::-6])          # <-- output: "n", starting from 'n' going backward there are not -6 indexes, so the first is only returned

In [None]:
# Case: slicing an empty string

empty = ""
print(empty[:])            # <-- output: "" (empty string)
print(empty[::-1])         # <-- output: "" (still empty)

In [None]:
# Case: Step skips over stop

text = "Python"
print(text[0:5:2])         # <-- output: "Pto", includes

## String Methods: Mutation

### Method: .upper()

In [None]:
# - .upper()
# - takes: no args
# - returns: a new string with all characters converted to uppercase
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)

In [None]:
print("   hello world   ".upper())               # <-- note: output: " HELLO WORLD "

### Method: .lower()

In [None]:
# - .lower()
# - takes: no args
# - returns: a new string with all characters converted to lowercase
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)

In [None]:
print("   HELLO WORLD   ".lower())               # <-- note: output: " hello world "

### Method: .title()

In [None]:
# - .title()
# - takes: no args
# - returns: a new string where the first character of each word is capitalized
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)

In [None]:
print("hello world".title())                     # <-- note: output: "Hello World"

In [None]:
print("HELLO WORLD".title())                     # <-- note: output: "Hello World"

In [None]:
print("hello2world".title())                     # <-- note: output: "Hello2World", #'s do not break title-case

In [None]:
print("#hello world".title())                    # <-- note: output: "#Hello World"


In [None]:
print("o'reilly's book".title())                 # <-- note: output: "O'Reilly'S Book"

In [None]:
print("self-learning ai".title())                # <-- note: output: "Self-Learning Ai"

### Method: .capitalize()

In [None]:
# - .capitalize()
# - takes: no args
# - returns: a new string with the first character capitalized and the rest in lowercase
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)

In [None]:
print("hello world".capitalize())              # <-- note: output: "Hello world"

In [None]:
print("Hello world".capitalize())              # <-- note: output: "Hello world"

In [None]:
print("HELLO WORLD".capitalize())              # <-- note: output: "Hello world"

In [None]:
print("123hello world".capitalize())           # <-- note: output: "123hello world", returns input string

In [None]:
print("#hello world".capitalize())             # <-- note: output: "#hello world", special chars prevent leading letter capitalization

In [None]:
print("ábc".capitalize())                      # <-- note: output: "Ábc", works with non-ASCII characters

### Method: .strip()

In [None]:
# - .strip()
# - takes: one optional arg (a string of characters to remove from both sides, defaults to whitespace)
# - returns: a new string with the specified characters removed from both the left and right sides
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)
# - functionality:  removes leading (left) and trailing (right) characters from a string (whitespace by default). Can strip different chars

In [None]:
print("   hello world   ".strip())                 # <-- note: output: "hello world"

In [None]:
print("---hello---".strip("-"))                    # <-- note: output: "hello"

In [None]:
# .strip() only removes from the ends not the middle

print("xxhelloxx".strip("x"))                      # <-- note: output: "hello"

In [None]:
print("hello".strip(""))                           # <-- note: output: "hello", no chars to strip

In [None]:
print("abcHelloXYZabc".strip("abcXYZ"))            # <-- note: output: "Hello", removes all occurences from either end (ODM)

In [None]:
print("hello".strip("xyz"))                        # <-- note: output: "hello", no matching chars returns input string

In [None]:
print("xxxxxx".strip("x"))                         # <-- note: output: ""

In [None]:
print("\n\t hello world \n\t".strip())             # <-- note: output: "hello world", removes tabs and newlines

In [None]:
print("\u2003Hello\u2003".strip())                 # <-- note: output: "Hello" (only removes standard whitespace, NOT special ones [unicode])

### Method: .lstrip()

In [None]:
# - .lstrip()
# - takes: one optional arg (a string of characters to remove from the left, defaults to whitespace)
# - returns: a new string with the specified characters removed from the left
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)

In [None]:
print("   hello world   ".lstrip())               # <-- note: output: "hello world   "

### Method: .rstrip()

In [None]:
# - .rstrip()
# - takes: one optional arg (a string of characters to remove from the right, defaults to whitespace)
# - returns: a new string with the specified characters removed from the right
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)

In [None]:
print("   hello world   ".rstrip())               # <-- note: output: "  hello world"

### Method: .replace()

In [None]:
# - .replace()
# - takes: two required positional args (old substring, new substring), one optional arg (count, number of replacements)
# - returns: a new string with occurrences of the old substring replaced by the new substring
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)

In [None]:
text = "banana banana banana"
print(text.replace("banana", "apple"))           # <-- note: normal functioning

In [None]:
text = "banana apple banana"
print(text.replace("banana", "apple", 2))        # <-- note: with optional 'num of replacements' arg
# output: "apple apple banana"


In [None]:
print("hello world".replace("z", "X"))           # <-- note: output: "hello world" (unchanged)

In [None]:
print("hello world".replace("o", ""))            # <-- note: output: "hell wrld"

In [None]:
print("hello".replace("", "-"))                  # <-- note: output: "-h-e-l-l-o-" (inserts between each character)

In [None]:
print("hello".replace("", " "))                                      # <-- note: output: "hello"  (unchanged)

In [None]:
print("banana banana".replace("banana", "apple", 10))                # <-- note: output: "apple apple"

In [None]:
print("hello world".replace("o", "X", 0))                            # <-- note: output: "hello world"

In [None]:
print("hello world".replace("world", "world"))                       # <-- note: output: "hello world"

In [None]:
print("aaaa".replace("aa", "b"))                                     # <-- note: output: "bb"

### Method: .translate()

In [None]:
# - .translate(table)
# - takes: one arg (a translation table, created using str.maketrans())
# - returns: a new string with characters replaced based on the translation table
# - exceptions: 'TypeError' if the argument is not a valid translation table
# - modifies: does not modify the original string (strings are immutable)

In [None]:
# Create a translation table
table = str.maketrans("abc", "123")                 # <-- note: maps 'a' → '1', 'b' → '2', 'c' → '3'

# Apply translation
text = "banana"
print(text.translate(table))                        # <-- note: output: "b1n1n1" (replaces 'a' with '1')

# Handling removal
remove_table = str.maketrans("", "", "aeiou")       # <-- note: maps ' ' → a, ' ' → e, ' ' → 'i', ' ' → 'o', ' ' → 'u'
print("hello world".translate(remove_table))  


### Method: .casefold()

In [None]:
# - .casefold()
# - takes: no args
# - returns: a new string with all characters aggressively converted to lowercase (for case-insensitive comparisons)
# - exceptions: no exceptions
# - modifies: does not modify the original string (strings are immutable)
# - NOTE: is a one-way conversion designed for case-insensitive comparisons, not reversible transformations 
# -       use .lower() for english and .casefold() for unicode characters

In [None]:
print("HELLO WORLD".casefold())                     # <-- note: output: "hello world"

In [None]:
print("straße".lower())                             # <-- note: output: "straße"

print("straße".casefold())                          # <-- note: output: "strasse"


In [None]:
print("Μικρά".lower())                              # <-- note: output: "μικρά"

print("Μικρά".casefold())                           # <-- note: output: "μικρα"

In [None]:
str1 = "HELLO"
str2 = "hello"

print(str1.lower() == str2.lower())                 # <-- note: output: True

print(str1.casefold() == str2.casefold())           # <-- note: output: True

In [None]:
print("İstanbul".lower())                           # <-- note: output: "i̇stanbul"  (keeps the dot!)

print("İstanbul".casefold())                        # <-- note: output: "istanbul"  (removes the dot)

## String Methods: Search & Identification

### Method: .maketrans()

In [None]:
# - .maketrans()
# - takes: either (1) three separate strings (`old`, `new`, `delete`), or (2) a dictionary mapping characters to replacements
# - returns: a translation table that can be used with `.translate()`
# - exceptions: ValueError if `old` and `new` have different lengths (in string form)
# - modifies: does not modify the original string (creates a mapping for later use)

In [None]:
table = str.maketrans("abc", "123")            # <-- note: replace 'a' → '1', 'b' → '2', 'c' → '3'
print("banana".translate(table))  
# output: "b1n1n1"

# Removing characters
remove_table = str.maketrans("", "", "aeiou")  # <-- note: remove vowels
print("hello world".translate(remove_table))   # <-- note: output: "hll wrld"

### Method: .isidentifier()

In [None]:
# - .isidentifier()
# - takes: no args
# - returns: True if the string is a valid Python identifier (variable name), otherwise False
# - exceptions: no exceptions
# - modifies: does not modify the original string (only checks validity)

In [None]:
print("variable_name".isidentifier())          # <-- note: output: True (valid identifier)

print("2nd_variable".isidentifier())           # <-- note: output: False (cannot start with a number)

print("class".isidentifier())                  # <-- note: output: True (but is a Python keyword!)

print("hello world".isidentifier())            # <-- note: output: False (contains a space)

print("hello".isidentifier())                  # <-- note: output: True (all underscores)

### Method: .isalpha()

In [None]:
# - .isalpha()
# - takes: no args
# - returns: True if the string contains only letters (no numbers or symbols)
# - exceptions: no exceptions
# - modifies: does not modify the original string (only checks validity)

In [None]:
print("HelloWorld".isalpha())                   # <-- note: output: True (only letters)

print("Hello123".isalpha())                     # <-- note: output: False (contains numbers)

print("Hello!".isalpha())                       # <-- note: output: False (contains punctuation)

### Method: .isdigit()

In [None]:
# - .isdigit()
# - takes: no arguments
# - returns: 'True' if the string contains only numeric digits ('0-9'), otherwise 'False'
# - exceptions: no exceptions
# - modifies: does not modify the original string (returns a boolean)
# - NOTE: does not recognize negative numbers, decimal points, or fractions as digits

In [None]:
print("1245".isdigit())                        # <-- note: output: True, only contains digits between 0-9

print("abc123".isdigit())                      # <-- note: output: False

In [None]:
print("-12345".isdigit())                      # <-- note: output: False, doesn't recognize negative digits

### Method: .isalnum()

In [None]:
# - .isalnum()
# - takes: no args
# - returns: True if the string contains only letters and/or digits (no spaces or symbols)
# - exceptions: no exceptions
# - modifies: does not modify the original string (only checks validity)

In [None]:
print("Hello123".isalnum())                     # <-- note: output: True (only letters and numbers)

print("Hello 123".isalnum())                    # <-- note: output: False (contains a space)

print("Hello!".isalnum())                       # <-- note: output: False (contains a special character)

print("".isalnum())                             # <-- note: output: False (contains empty string)

### Method: .isspace()

In [None]:
# - .isspace()
# - takes: no arguments
# - returns: `True` if the string contains only whitespace characters, otherwise `False`
# - exceptions: no exceptions
# - modifies: does not modify the original string (returns a boolean)
# - NOTE: recognizes spaces, tabs, newlines, and other Unicode whitespace characters

In [None]:
print("   ".isspace())                          # <-- note: output: True
print("hello world".isspace())                  # <-- note: output: False

In [None]:
print("\t".isspace())                           # <-- note: output: True
print("\n".isspace())                           # <-- note: output: True
print("\r".isspace())                           # <-- note: output: True

In [None]:
print("".isspace())                             # <-- note: output: False
print("\u2003".isspace())                       # <-- note: output: True


### Method: .isnumeric()

In [None]:
# - .isnumeric()
# - takes: no arguments
# - returns: `True` if the string contains only numeric characters (including fractions, Roman numerals, and Unicode digits)
# - exceptions: no exceptions
# - modifies: does not modify the original string (returns a boolean)
# - NOTE: recognizes fractions (`½`), superscripts (`²`), and full-width numbers (`１２３`)

In [None]:
print("12345".isnumeric())      # <-- note: output: True
print("٣٤٥".isnumeric())        # <-- note: output: True (Arabic digits)
print("Ⅻ".isnumeric())         # <-- note: output: True (Roman numeral for 12)
print("²".isnumeric())         # <-- note: output: True (Superscript 2)
print("½".isnumeric())         # <-- note: output: True (Fraction one-half)
print("-123".isnumeric())      # <-- note: output: False
print("3.14".isnumeric())      # <-- note: output: False
print("".isnumeric())          # <-- note: output: False

### Method: .isdecimal()

In [None]:
# - .isdecimal()
# - takes: no arguments
# - returns: `True` if the string contains only decimal digits (`0-9`) or Unicode decimal numbers
# - exceptions: no exceptions
# - modifies: does not modify the original string (returns a boolean)
# - NOTE: stricter than `.isnumeric()` (does NOT allow fractions, superscripts, or Roman numerals)

In [None]:
print("12345".isdecimal())       # <-- note: output: True
print("٣٤٥".isdecimal())         # <-- note: output: True (Arabic digits)
print("Ⅻ".isdecimal())         # <-- note: output: False (Roman numeral)
print("²".isdecimal())          # <-- note: output: False (Superscript 2)
print("½".isdecimal())          # <-- note: output: False (Fraction one-half)
print("-123".isdecimal())       # <-- note: output: False
print("3.14".isdecimal())       # <-- note: output: False
print("".isdecimal())           # <-- note: output: Falseaa

## String Methods: Splitting & Joining

### Method: .split()

In [None]:
# - .split(sep=None, maxsplit=-1)
# - takes: optional separator (string), optional maxsplit (int)
# - returns: a list of substrings
# - exceptions: none
# - modifies: does not modify the original string
# - iteration considerations: if using `maxsplit`, ensure your loop accounts for varying list lengths

In [None]:
s = "Python is great"
print(s.split())                     # <-- note: ['Python', 'is', 'great'] (splits on whitespace)

In [None]:
s = "Python   is    great"           # <-- note: when 'sep=None', .split() treats consecutive spaces as one separator
print(s.split())                     # <-- note: ['Python', 'is', 'great'] (extra spaces are ignored)



s = "Python   is    great"           # <-- note: when sep=" ", .split() treats consecutive spaces as empty strings
print(s.split(sep=' '))              # <-- note: ['Python', '', '', 'is', '', '', '', 'great'] (extra spaces are not ignored)

In [None]:
s = "Python is great"
print(s.split(maxsplit=1))           # <-- note: ['Python', 'is great'] (only splits once)


s = "Python is great"
print(s.split(maxsplit=0))           # <-- note: puts whole string (including whitespaces into a list as one element)
                                     #           turns off splitting entirely. Good for dynamically switching on & off in algos


In [None]:
## Splitting Empty Strings ##

s = ""
print(s.split())                     # <-- note: [] (empty list)
print(s.split(","))                  # <-- note: [''] (list with one empty string)
print(s.split("!"))                  # <-- note: [''] (list with one empty string)
print(s.split("?"))                  # <-- note: [''] (list with one empty string) etc...

In [None]:
## Splitting When Separator is Not Found ##

s = "Python is great"
print(s.split(","))                  # <-- note: ['Python is great'] puts whole string (including whitespaces into a list as one element)

In [None]:
## Splitting on Non-Space Characters ##

s = "apple,banana,,grape"
print(s.split(","))                  # <-- note: ['apple', 'banana', '', 'grape'] (keeps empty string)
                                     # <-- note: preserves empty sections between separators

In [None]:
## Trailing Separators ##

s = "apple,banana,grape,"
print(s.split(","))                  # <-- note: ['apple', 'banana', 'grape', ''] (trailing comma creates an empty string)

In [None]:
## Splitting on Multi-Character Separators ##

s = "hello<>world<>Python"
print(s.split("<>"))                 # <-- note: ['hello', 'world', 'Python'] (splits correctly)

In [None]:
## Overlapping Separators ##

s = "hello<<>>world<<>>Python"
print(s.split("<<>>"))               # <-- note: ['hello', 'world', 'Python'] (splits correctly)
print(s.split("<<>"))                # <-- note: ['hello', '>world', '>Python'] (not what you expect!)

In [None]:
## Unicode Characters ##

s = "apple🍎banana🍎grape"
print(s.split("🍎"))                 # <-- note: ['apple', 'banana', 'grape'] (splits correctly)

### Method: .rsplit()

In [None]:
# - .rsplit()
# - takes: one optional arg (`sep`), one optional arg (`maxsplit`, default `-1` meaning no limit)
# - returns: a list of substrings split from the right, based on `sep`
# - exceptions: no exceptions (works even if `sep` is not found)
# - modifies: does not modify the original string (returns a new list)
# - NOTE: works like `.split()`, but starts splitting from the right instead of the left
# -       Difference betweeen rsplit() and split() is seen only when 'maxsplit' parameter is used

In [None]:
print("apple banana cherry".rsplit())  # <-- note: output: ['apple', 'banana', 'cherry']
                                       # <-- note: output: just like .split(), splits on whitespace by default

In [None]:
print("apple,banana,cherry".rsplit(","))  # <-- note: output: ['apple', 'banana', 'cherry']
                                          # <-- note: output:  

In [None]:
print("apple,banana,cherry".rsplit(",", 1))   # <-- note: output: ['apple,banana', 'cherry']

print("apple,banana,cherry".split(",", 10))   # <-- note: output: ['apple', 'banana,cherry']

### Method: .partition()

In [None]:
# - .partition()
# - takes: one required arg (`sep`), the separator to split on
# - returns: a tuple (`before`, `sep`, `after`), where:
#     - `before` → the part before `sep`
#     - `sep` → the separator itself
#     - `after` → the part after `sep`
# - exceptions: no exceptions (works even if `sep` is not found)
# - modifies: does not modify the original string (returns a tuple)
# - NOTE: always returns a **3-element tuple**, even if `sep` is missing

In [155]:
print("apple-banana-cherry".partition("-"))  # <-- note: output: ('apple', '-', 'banana-cherry')
                                             # <-- note: partitions on the first occurence of the separator
                                             # <-- note: always returns a 3tuple with string before "-" sep at 3tuple[0], sep at 3tuple[1]
                                             #           and the rest of the string at 3tuple[2]

('apple', '-', 'banana-cherry')


In [158]:
print("".partition("-"))                     # <-- note: output: ('', '', '')

('', '', '')


In [156]:
print("apple banana cherry".partition(","))  # <-- note: output: ('apple banana cherry', '', '')

('apple banana cherry', '', '')


In [None]:
print("-apple-banana".partition("-"))        # <-- note: output: ('', '-', 'apple-banana')

In [157]:
print("apple-banana-".partition("-"))        # <-- note: output: ('apple', '-', 'banana-')

('apple', '-', 'banana-')


In [None]:
print("apple banana cherry".partition(" "))  # <-- note: output: ('apple', ' ', 'banana cherry')

In [None]:
print("   ".partition(" "))                  # <-- note: output: ('', ' ', '  ')

### Method: .rpartition()

In [None]:
# - .rpartition()
# - takes: one required arg (`sep`), the separator to split on
# - returns: a tuple (`before`, `sep`, `after`), where:
#     - `before` → the part before the **last occurrence** of `sep`
#     - `sep` → the separator itself
#     - `after` → the part after the **last occurrence** of `sep`
# - exceptions: no exceptions (works even if `sep` is not found)
# - modifies: does not modify the original string (returns a tuple)
# - NOTE: always returns a **3-element tuple**, even if `sep` is missing

In [159]:
print("".partition("-"))                     # <-- note: Output: ('', '', '')
print("".rpartition("-"))                    # <-- note: Output: ('', '', '')

('', '', '')
('', '', '')


In [160]:
print("apple banana cherry".partition(","))  # <-- note: Output: ('apple banana cherry', '', '')
print("apple banana cherry".rpartition(",")) # <-- note: Output: ('', '', 'apple banana cherry')

('apple banana cherry', '', '')
('', '', 'apple banana cherry')


In [161]:
print("apple,,banana,,cherry".partition(","))  # <-- note: Output: ('apple banana cherry', '', '')
print("apple,,banana,,cherry".rpartition(",")) # <-- note: Output: ('', '', 'apple banana cherry')

('apple', ',', ',banana,,cherry')
('apple,,banana,', ',', 'cherry')


### Method: .join()

In [None]:
# - .join(iterable)
# - takes: an iterable of strings
# - returns: a single string with elements joined by the separator
# - exceptions: raises 'TypeError' if any element is not a string
# - modifies: does not modify the original iterable
# - iteration considerations: ensure all elements are strings before joining

In [None]:
## Basic Functionality ##

words = ["Python", "is", "awesome"]
result = " ".join(words)                  # <-- note: joins with a space
print(result)                             # <-- note: separator goes where the commas are in the list

In [None]:
## Joining Empty List ##

empty_list = []
print(",".join(empty_list))               # <-- note: returns an '' (empty string, not None or [])

In [None]:
## Joining a List with a Single Elements ##

single = ["Python"]
print(" ".join(single))          # <-- note: 'Python' (no separator added). sep='H' proves no whitespace at end

In [None]:
## Joining with Different Separators ##

words = ["Python", "is", "awesome"]
print(" | ".join(words))                 # <-- note: 'Python | is | awesome'
print("".join(words))                    # <-- note: 'Pythonisawesome' (no separator)
print("\n".join(words))                  # <-- note: multi-line output. separator can be anything!

In [None]:
## .join() Only Works on Strings ##

numbers = [1, 2, 3]
print(",".join(numbers))                 # <-- note: TypeError: sequence item 0: expected str instance, int found

In [None]:
## Fix: Convert to Strings First ## 

numbers = [1, 2, 3]
print(",".join(map(str, numbers)))       # <-- note: '1,2,3'. The ",".join() says, join these strings together with this

print(", ".join(map(str, numbers)))      # <-- note: '1, 2, 3'

In [None]:
## Unicode in .join() ##

words = ["Python", "rocks"]
print("🔥".join(words))                 # <-- note: 'Python🔥rocks'

In [None]:
## Joining Self-Referencing Lists ##

lst = ["a", "b"]
lst.append(lst)                          # <-- note: self-reference
print(",".join(str(lst)))                # <-- note: 'TypeError': sequence item 2: expected str instance, list found

In [None]:
## Joining with Dictionaries ##

d = {"name": "Alice", "age": "25", "city": "NY"}

print(", ".join(d))                     # <-- note: 'name, age, city' (only joins keys) if all are strings


In [None]:

d = {"name": "Alice", "age": "25", "city": "NY"}
print(", ".join(d.values()))  # <-- note: 'Alice, 25, NY'


## String Methods: Formatting

### Method: .center()

In [None]:
# - .center()
# - takes: one required arg ('width'), one optional arg ('fillchar', default is " ")
# - returns: a new string centered in a field of given width, padded with 'fillchar'
# - exceptions: TypeError if `fillchar` is more than one character
# - modifies: does not modify the original string (returns a new one)

In [None]:
print("Python".center(10))                    # <-- note: output: "  Python  " (centered with spaces)

print("Python".center(100, "-"))              # <-- note: output: "--Python--" (centered with "-")

# Error Case:
try:
    print("Python".center(10, "**"))          # <-- note: TypeError: The fill character must be exactly one character long
except TypeError as e:
    print(repr(e))


### Method: .ljust()

In [None]:
# - .ljust()
# - takes: one required arg ('width'), one optional arg ('fillchar', default " ")
# - returns: a new string left-aligned within `width` characters, padded on the right with 'fillchar'
# - exceptions: TypeError if 'fillchar' is more than one character
# - modifies: does not modify the original string (returns a new one)
# - NOTE: commonly used for aligning text in tables, formatting reports, or creating fixed-width output
# -       .rjust() is the generalization of .zfill(), and .ljust() is the generalization of .zfill() for the right margin

In [None]:
print("Python".ljust(10))            # <-- note: output: "Python    "
print("Python".ljust(10, "-"))       # <-- note: output: "Python----"
print("Python".ljust(6))             # <-- note: output: "Python" , if width <= len(str), python returns the string as is
print("Python".ljust(3))             # <-- note: output: "Python" , width <= len(str)
print("Python".ljust(10, "0"))

In [None]:
print("".ljust(5, "*"))                # <-- note: output: "*****"
print("你好".ljust(6, "。"))           # <-- note: output: "你好。。"
print("Price:".ljust(10) + "$5.00")   # <-- note: output: "Price:    $5.00"

In [None]:
try:
    print("Python".ljust(10, "--"))  # <-- note: TypeError: The fill character must be exactly one character long
except TypeError as e:
    print(repr(e))

### Method: .zfill()

In [None]:
# - .zfill()
# - takes: one required arg (`width`)
# - returns: a new string padded with leading zeros (`0`) to ensure it is at least `width` characters long
# - exceptions: no exceptions
# - modifies: does not modify the original string (returns a new one)
# - NOTE: commonly used for formatting numbers, padding numeric strings, or ensuring fixed-length output
# -       the width must exceed the len(str). The # of 0's to appear padded on the left = width - len(str)
# -       If there is a +, or - sign at index 0, then the + or - sign comes before the 0's

In [None]:
print("42".zfill(5))                 # <-- note: output: "00042"
print("hello".zfill(10))             # <-- note: output: "00000hello"
print("Python".zfill(6))             # <-- note: output: "Python", width <= len(str), string is returned as is
print("Python".zfill(3))             # <-- note: output: "Python", if width <= len(str), then Python returns the string as is

In [None]:
print("".zfill(5))                   # <-- note: output: "00000"
print("-42".zfill(6))                # <-- note: output: "-00042"
print("+42".zfill(6))                # <-- note: output: "+00042"
print("你好".zfill(6))               # <-- note: output: "00你好 "

### Method: .format()

In [None]:
# - .format()
# - takes: multiple positional (`{}`) and/or keyword (`{name}`) arguments
# - returns: a new string with placeholders replaced using the provided arguments
# - exceptions: IndexError if a positional placeholder is missing, KeyError if a keyword argument is missing
# - modifies: does not modify the original string (returns a formatted copy)
# - NOTE: replaced by f-strings (`f"{var}"`) in Python 3.6+, but still useful for complex formatting cases
# -       positional & keyword arguments can be combined

In [None]:
print("Hello, {}!".format("Alice"))                   # <-- note: output: "Hello, Alice!"

In [None]:
print("{} {} {}".format("red", "green", "blue"))      # <-- note: output: "red green blue"

In [None]:
print("{1} {0} {2}".format("one", "two", "three"))    # <-- note: output: "two one three"

In [None]:
print("Hello, {name}!".format(name="Alice"))          # <-- note: output: "Hello, Alice!"

In [None]:
print("{first} + {second} = {result}".format(first=5, second=3, result=8))         # <-- note: output: "5 + 3 = 8"

In [None]:
print("My name is {0} and I am {age} years old.".format("Alice", age=25))          # <-- note: output: "My name is Alice and I am 25 years old."

In [None]:
print("The number {0} appears twice: {0}".format(42))                              # <-- note: output: "The number 42 appears twice: 42"

In [None]:
data = {"name": "Alice", "age": 25}
print("Hello, {name} & {age}!".format(**data))         # <-- note: ** unpacks the dictionaries values and passes them to the string

In [None]:
data = {"name": "Alice", "age": 25}
print("Hello, {name}! You are {age} years old.".format(name=data["name"], age=data["age"]))  # <-- note: output: "Hello, Alice! You are 25 years old."


In [None]:
try:
    print("Hello, {name}!".format(age=30))             # <-- note: missing 'name' keyword argument
except KeyError as e:
    print(repr(e))                                     # <-- note: output: KeyError: 'name'

In [None]:
try:
    print("Hello, {0}, {4}, {1}!".format("index", "error"))  
except IndexError as e:
    print(repr(e))                                     # <-- note: output: IndexError: 'Replacement index 4 out of range for positional args tuple'

### Method: .format_map()

In [None]:
# - .format_map()
# - takes: one required arg ('mapping'), a dictionary containing values to substitute
# - returns: a new string with placeholders replaced using the given dictionary
# - exceptions: KeyError if a placeholder is missing in the dictionary
# - modifies: does not modify the original string (returns a formatted copy)
# - NOTE: best used with the default dict to provide a default value (or just use the newer .format() method which allows a default value)

In [None]:
data = {"name": "Alice", "age": 25}
print("Hello, my name is {name} and I am {age}.".format_map(data))  # <-- note: output: "Hello, my name is Alice and I am 25 years old."

In [None]:
from collections import defaultdict

try:
    data = defaultdict(lambda: "Unknown", {"name": "Alice"})
    print("{name} is {age} years old.".format_map(data))             # <-- note: KeyError: 'surname'
except KeyError as e:
    print(repr(e))