# 1.

In [1]:
# String Methods:

# The str class provides a rich set of methods for working with strings. These methods allow you to perform various operations
# on strings, including:
    
# a) String modification (limited): While strings are immutable, some methods create a new string object with the modification.
#     Examples include upper(), lower(), strip(), etc.
# b) Searching: Methods like find(), index(), and rfind() help you locate substrings within a string.
# c) Splitting and joining: You can split strings into lists using split() and join elements of a list into a string using join().
# d) Formatting: String formatting with the format() method or f-strings (f literals) allows you to create dynamic strings
#     with variables.
# e) Regular expressions: The re module provides powerful tools for advanced pattern matching and string manipulation using 
#     regular expressions
    
# some important methods:
# 1. capitalize(): Converts the first character of the string to uppercase and the rest to lowercase.
# 2. casefold(): Returns a casefolded copy of the string (similar to lowercase but more aggressive).
# 3. center(width, fillchar): Returns a centered string of length width with the original string centered and padded 
#     with fillchar if necessary.
# 4. count(sub[, start[, end]]): Returns the number of non-overlapping occurrences of substring sub in the string.
# 5. endswith(suffix[, start[, end]]): Returns True if the string ends with the specified suffix, otherwise False.
# 6. find(sub[, start[, end]]): Returns the lowest index of substring sub in the string, or -1 if not found.

# 2.

In [2]:
# Here's an overview of the differences:

# a) Single-Quoted Strings ('...'):

# 1. Single-quoted strings are enclosed in single quotes.
# 2. Escape sequences such as \n, \t, and \\ are interpreted within single-quoted strings.
# 3. To include a single quote within a single-quoted string, you can either escape it with a backslash (\') or use double 
#     quotes instead.
# Example: 'Hello, World!'

# b) Double-Quoted Strings ("..."):

# 1. Double-quoted strings are enclosed in double quotes.
# 2. Like single-quoted strings, escape sequences such as \n, \t, and \\ are interpreted within double-quoted strings.
# 3. To include a double quote within a double-quoted string, you can either escape it with a backslash (\") or use single 
#     quotes instead.
# Example: "Hello, World!"
                                                                                                       
# c) Triple-Quoted Strings ('''...''' or """..."""):

# 1. Triple-quoted strings are enclosed in three single quotes ('''...''') or three double quotes ("""...""").
# 2. These strings can span multiple lines, making them suitable for multiline strings.
# 3. Escape sequences are interpreted in triple-quoted strings, similar to single- and double-quoted strings.
# 4. Triple-quoted strings are often used for docstrings (documentation strings) and multiline text.

# Example:
# '''
# This is a 
# multiline
# string.
# '''

# 3.

In [4]:
# Here's how you can include non-ASCII Unicode characters in a string:

# a) Using Unicode Escape Sequences:
# You can represent Unicode characters in a string using escape sequences of the form \uXXXX, where XXXX is the hexadecimal 
# Unicode code point.
# Unicode escape sequence for 'é'
s = '\u00e9'  # 'é'

# b) Using Unicode Characters Directly:
# If your source code file is encoded in UTF-8 or another encoding that supports Unicode characters directly, you can include 
# non-ASCII Unicode characters directly in the string.
# # Using Unicode character 'é' directly
s = 'é'  # 'é'

# c) Using Unicode Code Points:
# You can also include Unicode characters directly using their code points, specified in hexadecimal format, using the 
# \UXXXXXXXX escape sequence.
# # Unicode code point for 'é'
s = '\U000000e9'  # 'é'

# 4.

In [5]:
# Differences between text-mode and binary-mode files:

# a) Text-Mode Files:

# 1. Text-mode files are opened using the 't' mode flag (or no mode flag, as 'r' or 'w' implicitly open files in text mode).
# 2. These files are used for reading and writing text data, such as strings of characters.
# 3. Text-mode files handle line endings ('\n', '\r\n', or '\r') automatically based on the platform (Unix, Windows, etc.).
# When reading from a text-mode file, Python decodes the bytes to Unicode strings using the specified encoding (default is 
#     platform-dependent or UTF-8).
# 4. When writing to a text-mode file, Python encodes Unicode strings into bytes using the specified encoding before writing
# them to the file.

# b) Binary-Mode Files:

# 1. Binary-mode files are opened using the 'b' mode flag.
# 2. These files are used for reading and writing raw binary data, such as images, audio files, or any file where you don't
#     want automatic encoding/decoding.
# 3. Binary-mode files treat data as sequences of bytes and do not perform any automatic encoding/decoding or line ending 
#     conversions.
# 4. When reading from a binary-mode file, data is read as-is without any interpretation or decoding, and you get bytes objects.
# 5. When writing to a binary-mode file, you must provide data as bytes objects, and they are written directly to the file 
#     without any encoding.
    
# example:
# Text-mode file (reading and writing strings)
with open('text_file.txt', 'wt') as f:
    f.write('Hello, world!\n')

with open('text_file.txt', 'rt') as f:
    data = f.read()
    print(data)  # Outputs: Hello, world!

# Binary-mode file (reading and writing bytes)
with open('binary_file.bin', 'wb') as f:
    f.write(b'\x48\x65\x6c\x6c\x6f\x2c\x20\x77\x6f\x72\x6c\x64\x21\x0a')

with open('binary_file.bin', 'rb') as f:
    data = f.read()
    print(data)  # Outputs: b'Hello, world!\n'

Hello, world!

b'Hello, world!\n'


# 5.

In [7]:
# If you have a Unicode text file that is encoded in a different encoding than your platform's default, you can interpret it correctly by explicitly specifying the encoding when reading the file. In Python, you can do this using the open() function with the appropriate encoding parameter.

# Here's how you can interpret a Unicode text file encoded in a different encoding:

# a) Identify the Encoding: First, you need to know the encoding used to save the text file. Common encodings include UTF-8, 
#     UTF-16, ISO-8859-1 (Latin-1), etc.

# b) Open the File with the Correct Encoding: Use the open() function with the 'r' mode for reading and specify the encoding 
#     parameter to match the actual encoding of the file.

# c) Read and Decode the File Content: When you read from the file, Python will automatically decode the bytes into Unicode 
#     strings using the specified encoding.


# 6.

In [9]:
# The best way to create a Unicode text file in a specific encoding format depends on the tools and programming languages you
# are using. In Python, for example, you can use the open() function with the appropriate encoding parameter to write Unicode 
# text to a file in a specific encoding format.

# example:
# Specify the text content to be written to the file
text_content = "This is a Unicode text file."

# Open the file with the correct encoding (e.g., UTF-8)
with open('unicode_file.txt', 'w', encoding='utf-8') as f:
    f.write(text_content)

print("Unicode text file created successfully.")

Unicode text file created successfully.


# 7.

In [10]:
# ASCII (American Standard Code for Information Interchange) text can be considered a form of Unicode text under certain 
# conditions:

# a) Compatibility: ASCII text is a subset of Unicode. This means that any ASCII text is inherently Unicode-compatible because
#     Unicode includes the ASCII character set in its encoding.

# b) Encoding: Unicode supports multiple encoding formats, such as UTF-8, UTF-16, and UTF-32. ASCII text encoded using UTF-8,
#     for example, is Unicode text because UTF-8 is a Unicode encoding that represents characters using a variable number of 
#     bytes, where ASCII characters are represented using a single byte that matches their ASCII code.

# c) Interoperability: Since ASCII is a subset of Unicode, ASCII text can be seamlessly used and interchanged with Unicode text
#     in applications and systems that support Unicode encoding. This allows ASCII text to coexist and interact with other 
#     Unicode text without any issues.

# d) Character Representation: In Unicode, ASCII characters are mapped to the same code points as in ASCII. For example, 
#     the ASCII character 'A' (U+0041) is also represented by the same code point in Unicode. This ensures that ASCII text
#     represented in Unicode retains its original character meanings and properties.

# 8.

In [None]:
# The change in string types in Python 3.x, specifically the introduction of Unicode as the default string type, has several 
# effects on code compared to earlier versions of Python (Python 2.x). Here are some notable effects:

# a) Default Unicode Behavior:

# 1. In Python 3.x, strings are Unicode by default. This means that strings can represent a wider range of characters, including
#     non-ASCII characters from different languages and symbols.
# 2. In Python 2.x, strings were represented as bytes by default, and Unicode strings required a 'u' prefix.

# b) Encoding and Decoding:

# 1. In Python 3.x, explicit encoding and decoding are necessary when working with bytes and strings. This ensures clarity and
#     helps prevent encoding-related bugs.
# 2. Python 2.x had implicit conversion between byte strings and Unicode strings, which could lead to encoding errors if not
#     handled carefully.

# c) Print Statement:

# 1. In Python 3.x, the print statement was replaced by the print() function, which requires parentheses. This change facilitates 
#     consistency with other function calls.
# 2.Python 2.x used the print statement without parentheses, which could lead to confusion and inconsistencies in code style.

# d) str vs bytes Distinction:

# 1. Python 3.x distinguishes clearly between text (Unicode) strings (str type) and byte strings (bytes type). This distinction 
#     helps in writing code that handles text and binary data appropriately.
# 2. Python 2.x had a single str type that could represent both text and binary data, leading to potential confusion and bugs.

# e) Unicode Literals:

# 1. Python 3.x introduced Unicode literals, which are specified by adding a 'u' prefix before the string literal. This helps
#     explicitly denote Unicode strings.
# 2. Python 2.x treated string literals as byte strings by default unless specified otherwise.

# f) String Formatting:

# 1. Python 3.x introduced new string formatting methods like f-strings (f"Hello {name}") and the str.format() method, 
#     which provide more flexibility and readability compared to the % formatting used in Python 2.x.