# Python String Variables

In [None]:
#This cell changes the notebooks default behavior of only showing
#the last item in a cell and causes it to show all the values in a cell.


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Lets talk about data in memory on our computer.  Its all binary. But we can look at or "interpret" the data in different ways.

In [None]:
x = "\x70\x79\x74\x68\x6f\x6e"


#As binary 
format(int.from_bytes(x, byteorder="big"),"048b")

#As hex
format(int.from_bytes(x, byteorder="big"),"06x")

#As decimal numbers
print(",".join([num for num in x]))

#As characters
print(x.decode())

#But also as code!
print("Print is just bytes in memory", print)

#And EXE's and DLL's, and JPEG (Images) and PDF's etc,etc

There are a couple of different ways that strings store text.

ASCI 

https://www.asciitable.com/

2-Byte Unicode 

UTF-8 Unicode

https://www.lookuptables.com/text/unicode-characters

# Python String Types

Python has several String Type Variables.  Here are some of the most commonly used ones and their purposes:

str - This is the most common string type in Python, and it is used for working with Unicode text.  These values are stored in UTF-8 format.

bytes - Every 8 bits are stored as values between 0 and 255. No interpretation is done on them unless they are printed or viewed. If printed or viewed Python will print any characters it ASCII character and it will print hexadecimal for non-ASCII characters. This is used for working with byte data, such as images, audio, and binary file formats.

bytearray - This is similar to bytes, but it is "mutable", meaning that you can change its contents.


## String Methods

In [None]:
# Define a string
s = "   hello world!   "

# Capitalize the first letter of the string
t = s.capitalize()
print("Capitalized string:", t)

# Convert the entire string to uppercase
t = s.upper()
print("Uppercase string:", t)

# Convert the entire string to lowercase
t = s.lower()
print("Lowercase string:", t)

# Strip leading and trailing whitespace from the string
t = s.strip()
print("Stripped string:", t)

# Replace all occurrences of "l" with "x"
t = s.replace("l", "x")
print('Replacing "l" with "x":', t)

# Split the string into a list of words
t = s.split()
print("Split string:", t)

# Join the list of words into a single string, separated by commas
t = ",".join(t)
print("Joined string:", t)

# Check if the string starts with "hello"
if s.startswith("hello"):
    print("The string starts with 'hello'")
else:
    print("The string does not start with 'hello'")

# Check if the string ends with "world"
if s.endswith("world"):
    print("The string ends with 'world'")
else:
    print("The string does not end with 'world'")


It is important to recognize that these methods DO NOT change the value in the variable. Instead they produce NEW strings that show the changes.

In [None]:
x = '    hello world    '
x.upper()

#Note that x does not change!
print(x)

#If we want to change the contents of `x`` we have to reassign it.
x = x.upper()
print(x)

#This behavior of NOT updating the variable is required of all "immutable" variable types.

## String Slicing

Python Slicing operations allow us to extract a substring from a larger string.

string_object[start:stop:step]

start, stop and step are optional values.
If no start is provided it is the same as starting at 0.
If no stop is provided it will go all the way to the end of the string.
If no step is provided it will assume a step of 1.
If you specify a stop it must come after the first colon.
If you specify a step it must come after the second colon.
When you place a negative value in step start:stop are reversed to stop:start and it it is now "up to and including" instead of "up to not including"


In [None]:
# Define a string
s = "Hello, World!"

# Extract the first character of the string
t = s[0]
print("First character:", t)

# Extract the last character of the string
t = s[-1]       
print("Last character:", t)

# Extract a substring from index 2 to index 6 (exclusive)
t = s[2:6]      #means s[2:6:1]
print("Substring [2:6]:", t)

# Extract a substring from index 2 to the end of the string
t = s[2:]      #means s[2:end:1]
print("Substring [2:]:", t)

# Extract a substring from the start of the string to index 6 (exclusive)
t = s[:6]    #means s[0:6:1]
print("Substring [:6]:", t)

# Extract a substring from the start of the string to the end of the string
t = s[:]    # means s[0:end:1]
print("Substring [:]:", t)

# Extract every other character starting from index 0
t = s[::2]    # means s[0:end:2]
print("Every other character [::2]:", t)

# Extract every other character starting from index 1
t = s[1::2]      #means s[1:end:2]
print("Every other character [1::2]:", t)

# Extract the string in reverse order
t = s[::-1]      #means s[end:0:-1]
print("Reversed string [::-1]:", t)

# Extract every other character in reverse order
t = s[::-2]      #means s[end:0:-2]
print("Every other character in reverse order [::-2]:", t)

# Extract a substring from index -6 to index -1 (exclusive)
t = s[-6:-1]    #means s[-6:-1:1]
print("Substring [-6:-1]:", t)

# Extract a substring from index -6 to the end of the string
t = s[-6:]
print("Substring [-6:]:", t)

# Extract a substring from the start of the string to index -7 (exclusive)
t = s[:-7]
print("Substring [:-7]:", t)


## Converting Integers into Strings

The `format()` function is a useful way to convert integers into strings that show the value in other bases (base2, base16).

In [None]:
# Convert integer to 8 bit binary with leading zeros
x = 132
print("132 as binary with format", format(x, "08b"))

# This is often better than simply using bin() because of leading zeros
x = 132
print("132 as binary with bin", bin(x))

# Convert integer to 16 bit binary with leading zeros
x = 132
print("132 as binary 16 characters wide", format(x, "016b"))

# Convert integer to 4 hexadecimal digits with leading zeros
x = 255
print("255 as hex 4 characters wide with format"< format(x, "04x"))

# Often better than using hex() because of leading zeros
x = 255
print("255 as hex using hex function", hex(x))

# Convert integer to 4 hexadecimal digits with leading zeros
x = 255
print("255 as hex with capital letters", format(x, "04X"))

### Basic String Operations

Here are some examples of common operations you might perform on strings

In [None]:
# Create a string
s = "Hello, World!"

# Get the length of the string
n = len(s)
print("Length of string:", n)

# Concatenate two strings
s1 = "Hello"
s2 = "World"
s3 = s1 + " " + s2
print("Concatenated string:", s3)

# Repeat a string
s4 = "Ha" * 4
print("Repeated string:", s4)

# Replace a substring with another substring
s5 = s.replace("World", "Universe")
print("Replaced string:", s5)

# Convert a number to a string
x = 42
s6 = str(x)
print("Converted number to string:", s6)

# Convert a string to a float
s7 = "3.14159"
x = float(s7)
print("Converted string to a float:", x)

# Convert the first letter of each word in a string to uppercase
s8 = "hello world"
s9 = s8.title()
print("Title-cased string:", s9)

# Convert all the letters in a string to uppercase
s10 = "hello world"
s11 = s10.upper()
print("Uppercase string:", s11)

# Convert all the letters in a string to lowercase
s12 = "HELLO WORLD"
s13 = s12.lower()
print("Lowercase string:", s13)

# Check if a substring is present in a string
s14 = "Hello, World!"
if "World" in s14:
    print("Substring found!")

# Find the index of the first occurrence of a substring
s15 = "Hello, World!"
i = s15.find("World")
print("Index of first occurrence of substring:", i)

# Remove whitespace from the beginning and end of a string
s16 = "  Hello, World!  "
s17 = s16.strip()
print("Stripped string:", s17)

# Split a string into a list of substrings
s18 = "apple,banana,orange"
s19,s20,s21 = s18.split(",")
print("The third value upper case is :", s21.upper())

# Join a list of substrings into a single string
l = ["apple", "banana", "orange"]
s22 = ",".join(l)
print("Joined string:", s22)


### Examples of fstrings

F-Strings allow you to variable names and/or expressions inside of your strings inside of curly brackets.

{ variable : format-specifier}

Example:
```
x = "slim shady"
print("My name is {x:*^10}")
```

https://docs.python.org/3/library/string.html#formatspec

In [None]:
# define some variables
name = "Alice"
age = 30
height = 1.75
weight = 65.4

# basic f-string
print(f"My name is {name}.")

# integer formatting with minimum width
print(f"I am {age:02d} years old.")

# float formatting with precision
print(f"My height is {height:.2f} meters.")

# float formatting with exponent notation
print(f"My weight is {weight:e} kilograms.")

# string padding with center alignment
message = "Hello"
print(f"{message:-^20}")

# string padding with right alignment
message = "World"
print(f"{message:->20}")

# converting values to strings
x = 42
y = 3.14
print(f"x = {str(x)}")
print(f"y = {str(y)}")

# combining strings with join method
words = ["hello", "world"]
sentence = " ".join(words)
print(f"sentence = {sentence}")

# removing parts of a string with replace method
text = "The quick brown fox jumps over the lazy dog."
new_text = text.replace("fox", "cat")
print(f"new_text = {new_text}")


In [None]:
# integer formatting with leading zeroes
x = 42
leading_zeroes = format(x, "03d")
print("integer formatting with leading zeroes:", leading_zeroes)

# binary, octal, and hexadecimal formatting
x = 42
binary = format(x, "b")
octal = format(x, "o")
hexadecimal = format(x, "x")
print("binary formatting:", binary)
print("octal formatting:", octal)
print("hexadecimal formatting:", hexadecimal)

# float formatting with precision and exponent notation
y = 3.14159
precision = format(y, ".2f")
exponent = format(y, "e")
print("float formatting with precision:", precision)
print("float formatting with exponent notation:", exponent)

# string padding with center alignment
message = "Hello"
padding = format(message, "^20")
print("string padding with center alignment:", padding)

# converting values to strings
x = 42
y = 3.14
x_string = str(x)
y_string = str(y)
print("converting integer to string:", x_string)
print("converting float to string:", y_string)

# converting integers to different bases
x = 42
binary = format(x, "b")
octal = format(x, "o")
hexadecimal = format(x, "x")
print("converting integer to binary:", binary)
print("converting integer to octal:", octal)
print("converting integer to hexadecimal:", hexadecimal)



In [None]:
# Create a byte object with an initial value
x = b"Hello, World!"

# Decode bytes into a string
print("Decode bytes into a string:", x.decode())

# Encode string into bytes using ASCII encoding
s = "Hello, World!"
x = s.encode("ascii")
print("Encode string into bytes using ASCII encoding:", x)

# Get the length of the byte object
print("Get the length of the byte object:", len(x))

# Convert byte object to a list of integers
print("Convert byte object to a list of integers:", list(x))

# Get the index of the first occurrence of a byte in the byte object
print("Get the index of the first occurrence of a byte in the byte object:", x.index(b"o"))

# Count the number of occurrences of a byte in the byte object
print("Count the number of occurrences of a byte in the byte object:", x.count(b"o"))

# Replace all occurrences of a byte with another byte in the byte object
print("Replace all occurrences of a byte with another byte in the byte object:", x.replace(b"o", b"i"))

# Convert all bytes to lowercase
print("Convert all bytes to lowercase:", x.lower())

# Convert all bytes to uppercase
print("Convert all bytes to uppercase:", x.upper())

# Check if all bytes are alphanumeric
print("Check if all bytes are alphanumeric:", x.isalnum())

# Check if all bytes are alphabetical
print("Check if all bytes are alphabetical:", x.isalpha())

# Check if all bytes are digits
print("Check if all bytes are digits:", x.isdigit())

# Check if all bytes are printable characters
print("Check if all bytes are printable characters:", x.isprintable())

# Check if all bytes are whitespace characters
print("Check if all bytes are whitespace characters:", x.isspace())

# Return a new byte object with the bytes reversed
print("Return a new byte object with the bytes reversed:", x[::-1])


Not all byte sequences can be interpreted as strings.

Example: x =  b'\xf0\x9f\x90\x8d'

In [None]:
x = b'\xf0\x9f\x90\x8d'

print('  '.join(['{0:08b}'.format(byte) for byte in x]))

x.decode()