In [None]:
# we do a LOT with strings 
# we're going to learn some new tricks with working with them

In [1]:
palindrome = "a man a plan a canal panama"
breakfast = "toast"

In [3]:
palindrome[0] # strings are indexed like lists

'a'

In [5]:
# there are negative indexes
palindrome[-1]

'a'

In [7]:
palindrome[-2]
# negative indexes go from right to left starting at the last letter
# in these examples, -1 is a, -2 is m, et cetera

'm'

In [8]:
# what's the first letter of the variable breakfast using negative indexes?
breakfast[-5]

't'

In [10]:
# slice notation with strings?  you betcha.
breakfast[1:]

'oast'

In [12]:
breakfast[::2]

'tat'

In [13]:
palindrome[::3]

'aaal capa'

In [14]:
# strings are immutable. this means that they cannot be modified.
# palindrome = "racecar" # this just sets palindrome equal to a new thing
palindrome[0] = "!"

TypeError: 'str' object does not support item assignment

In [15]:
# you could cast a string to a list and then replace the letter you want to replace
# and then cast it back to a string.  that's a lot of extra steps.
paliList = list(palindrome)
print(paliList)

['a', ' ', 'm', 'a', 'n', ' ', 'a', ' ', 'p', 'l', 'a', 'n', ' ', 'a', ' ', 'c', 'a', 'n', 'a', 'l', ' ', 'p', 'a', 'n', 'a', 'm', 'a']


In [16]:
paliList[0] = "!"
editedPaliString = "".join(paliList)

In [17]:
editedPaliString

'! man a plan a canal panama'

In [18]:
# python has better ways of doing this sort of thing
easierPaliEdit = palindrome.replace("a","!")
print(easierPaliEdit)
# the problem with this is that replace replaces every instance of the thing you specify

! m!n ! pl!n ! c!n!l p!n!m!


In [20]:
# .replace doesn't change the variable
palindrome

'a man a plan a canal panama'

In [21]:
palindrome.replace("a","!")
# .replace returns a new string

'! m!n ! pl!n ! c!n!l p!n!m!'

In [22]:
# replace can replace multiple characters at a time
palindrome.replace("panama","new york city")

'a man a plan a canal new york city'

In [23]:
# if you need to find the occurrence of a substring in a string
# use .find
palindrome.find("a")
# .find returns the index of the first instance that you're looking for

0

In [24]:
palindrome.find("!")
# .find returns a -1 if the thing you're looking for isn't there

-1

In [26]:
palindrome.find("man")
# the number .find returns is where the string you're looking for starts

2

In [27]:
# you can also use in instead of find
# what to know if a letter is a vowel?
"a" in "aeiouAEIOU"

True

In [28]:
"aeiouAEIOU".find("a")

0

In [29]:
"man" in palindrome

True

In [30]:
"mna" in palindrome
# the in keyword is letting us check if the string mna is in palindrome, not if the letters
# m, n, and a are all in palindrome

False

In [32]:
# attributes and the string module
import string

In [33]:
string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [34]:
string.ascii_uppercase

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [35]:
string.ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [36]:
string.digits

'0123456789'

In [37]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [39]:
string.whitespace

' \t\n\r\x0b\x0c'

In [40]:
print(string.whitespace)

 	



In [41]:
# whitespace characters
# ' ' - space
# '\t' - tab
# '\n' - newline
# '\r' - a return
# '\x0b' - vertical tab (also '\v') - was used for printing, not really used now
# '\x0c' - a form feed or a page break

# notice that they all start with the forward slash. this indicates one of two things: either
# that what comes after is a special character or you're escaping a character that normally
# means something

In [43]:
myString = "hello my name is pickles"
print(myString)

hello my name is pickles


In [44]:
print("hello my name is \"pickles\"") # the slash escapes the quote character and python
# treats it like a regular old character

hello my name is "pickles"


In [45]:
stringWithSpecialChars = "hello my name is \"pickles\""
repr(stringWithSpecialChars)
# repr evaluates a string representation - will print the special characters

'\'hello my name is "pickles"\''

In [46]:
stringWithBreaks = "\n\nHowdy\nHey"
print(stringWithBreaks)



Howdy
Hey


In [47]:
repr(stringWithBreaks)

"'\\n\\nHowdy\\nHey'"

In [48]:
# trick for finding whitespace in something
print(palindrome)
for letter in palindrome:
    letterIsWhitespace = letter in string.whitespace
    print(letter + " is whitespace? "+str(letterIsWhitespace))

a man a plan a canal panama
a is whitespace? False
  is whitespace? True
m is whitespace? False
a is whitespace? False
n is whitespace? False
  is whitespace? True
a is whitespace? False
  is whitespace? True
p is whitespace? False
l is whitespace? False
a is whitespace? False
n is whitespace? False
  is whitespace? True
a is whitespace? False
  is whitespace? True
c is whitespace? False
a is whitespace? False
n is whitespace? False
a is whitespace? False
l is whitespace? False
  is whitespace? True
p is whitespace? False
a is whitespace? False
n is whitespace? False
a is whitespace? False
m is whitespace? False
a is whitespace? False


In [2]:
# concatenation
palindrome+palindrome

'a man a plan a canal panamaa man a plan a canal panama'

In [3]:
breakfast+breakfast

'toasttoast'

In [4]:
# multiplication?
breakfast*5

'toasttoasttoasttoasttoast'

In [6]:
# order of operations holds even with string concatenation
print(breakfast+' '*3+breakfast)

toast   toast


In [7]:
print((breakfast+' ')*3)

toast toast toast 


In [8]:
# upper / lower case
"YES!".lower()

'yes!'

In [9]:
"noooooooOOOOOOO".upper()

'NOOOOOOOOOOOOOO'

In [10]:
palindrome.title()

'A Man A Plan A Canal Panama'

In [11]:
palindrome

'a man a plan a canal panama'

In [12]:
"nOoOoOoOooooo".swapcase()

'NoOoOoOoOOOOO'

In [13]:
stringWithSpace = "\t\t\t\t\n\n     \nsomewords surrounded by whitespace\n\n\t\n  "
print(stringWithSpace)

				

     
somewords surrounded by whitespace

	
  


In [14]:
repr(stringWithSpace)

"'\\t\\t\\t\\t\\n\\n     \\nsomewords surrounded by whitespace\\n\\n\\t\\n  '"

In [15]:
# remove whitespace from end of string: .rstrip()
# remove whitespace from beginning of string: .lstrip()
print(stringWithSpace.rstrip())


				

     
somewords surrounded by whitespace


In [16]:
print(stringWithSpace.lstrip())

somewords surrounded by whitespace

	
  


In [17]:
# .strip removes whitespace from both sides of the string
print(stringWithSpace.strip())
# important! .strip does not remove whitespace within the string

somewords surrounded by whitespace


In [18]:
# to get individual words from a string, use .split
# what kind of data type does split return?
words = stringWithSpace.split()
words
# there's no whitespace in the list. by default, split splits on whitespace

['somewords', 'surrounded', 'by', 'whitespace']

In [1]:
weirdSplit = stringWithSpace.split("o")
weirdSplit
# the parameter passed to split is the delimiter

NameError: name 'stringWithSpace' is not defined

In [20]:
# how would we put this back together?
# we can use join
"o".join(weirdSplit)

'\t\t\t\t\n\n     \nsomewords surrounded by whitespace\n\n\t\n  '

In [22]:
# formatting strings
# if we want to print out numbers and strings together, what do we normally do?
# we normally cast the number as a string and concatenate

# today we'll learn the format string method to make concatenation and printing easier/faster
# /better/et cetera

pedalOne = "Wah"
pedalTwo = "Fuzz"
pedalThree = "More Fuzz"
pedalFour = "Phaser"

print("The order of the pedals is "+pedalOne+", "+pedalTwo+", "+pedalThree+", and "+pedalFour)

The order of the pedals is Wah, Fuzz, More Fuzz, and Phaser


In [23]:
print("The order of the pedals is {0}, {1}, {2}, {3}".format(pedalOne,pedalTwo,pedalThree,pedalFour))

The order of the pedals is Wah, Fuzz, More Fuzz, Phaser


In [24]:
# you can change the order around as you see fit
print("The order of the pedals is {1}, {0}, {3}, {2}".format(pedalOne,pedalTwo,pedalThree,pedalFour))

The order of the pedals is Fuzz, Wah, Phaser, More Fuzz


In [25]:
pedalFive = 19
print("The order of the pedals is {0}, {1}, {2}, {3}".format(pedalOne,pedalTwo,pedalThree,pedalFive))

The order of the pedals is Wah, Fuzz, More Fuzz, 19


In [26]:
# you don't have to use every argument in the .format method
print("The order of the pedals is {1}, {1}, {2}, {1}".format(pedalOne,pedalTwo,pedalThree,pedalFour))

The order of the pedals is Fuzz, Fuzz, More Fuzz, Fuzz


In [27]:
for i in range(0,5):
    print("The current number is {0}".format(i))

The current number is 0
The current number is 1
The current number is 2
The current number is 3
The current number is 4


In [29]:
# you can name the parameters too
print("The order of the pedals is {pOne}, {pTwo}, {pThree}, {pFour}"
      .format(pOne=pedalOne,pThree=pedalTwo,pTwo=pedalThree,pFour=pedalFour))

The order of the pedals is Wah, More Fuzz, Fuzz, Phaser


In [31]:
# this one might hurt your brains a little
pedalDictionary = {"pOne":"Wah","pTwo":"Fuzz","pThree":"More Fuzz","pFour":"Phaser"}
print("The order of the pedals is {pOne}, {pTwo}, {pThree}, {pFour}".format(**pedalDictionary))

The order of the pedals is Wah, Fuzz, More Fuzz, Phaser


In [None]:
# the ** operator allows us to take a dictionary of key-value pairs and "unpack" it into a 
# commas separated list of named arguments in a function a function call. for example,
# keyName1=keyName1Value,keyName2=keyName2Value, et cetera. in our example, we get
# pOne='Wah',pTwo='Fuzz',pThree='More Fuzz',pFour='Phaser'

# this is very useful to use with .format

In [32]:
pedalDictionary.keys()

dict_keys(['pOne', 'pTwo', 'pThree', 'pFour'])

In [35]:
print("Pedals: {0},{1},{2},{3}".format(**pedalDictionary))
# this doesn't work because there aren't keys in pedalDictionary named 0,1,2,3

IndexError: tuple index out of range

In [37]:
# you can do more complex stuff when you have more complex dictionaries, e.g., nested elements
pedals = {"pedalOne":{"Manufacturer":"DOD","Model":"Carcosa"}}
print("This pedal, the {pedalOne[Model]}, is made by {pedalOne[Manufacturer]}"
      .format(**pedals))

This pedal, the Carcosa, is made by DOD


In [2]:
# let's make things easier to read
twitterDeets = {"favorites_count":10,"followers_count":84814791,"friends_count":631357}
print("Followers {followers_count:,}, Friends Count: {friends_count:,}".format(**twitterDeets))

Followers 84,814,791, Friends Count: 631,357


In [None]:
# colon after the variable name signals formatting. whatever is after the colon 
# specifies the kind of formatting. in this case, the commas puts commas into numbers as we
# would expect them (which is every three digits)

In [3]:
print("What happens if we try to put commas in a string? {0:,}".format("format 12345"))
# this produces an error that means you can't use the , with a string

ValueError: Cannot specify ',' with 's'.

In [4]:
print("What happens if we try to put commas in a string? {0:,}".format("12345"))
# same thing even when the string really looks like a number

ValueError: Cannot specify ',' with 's'.

In [5]:
print("What happens if we try to put commas in a string? {0:,}".format(int("12345")))

What happens if we try to put commas in a string? 12,345


In [6]:
import math
print(math.pi)

3.141592653589793


In [7]:
# more number formatting
sortOfPi = 22/7
sortOfPi

3.142857142857143

In [8]:
print("two decimal places: {0:.4f}".format(sortOfPi))

two decimal places: 3.1429


In [9]:
print("scientific notation: {0:.3e}".format(3450000))

scientific notation: 3.450e+06


In [12]:
famousPeople = 604174/7059837187 # as of jan 2013
print("Percentage of famous people in the world: {0:.2%}".format(famousPeople))
print("Percentage of famous people in the world: {0:.8%}".format(famousPeople))

Percentage of famous people in the world: 0.01%
Percentage of famous people in the world: 0.00855790%


In [15]:
# align some text
print("This is an example of {:<30}".format("left aligned"))
print("This is an example of {:>30}".format("right aligned"))
print("This is an example of {:^30}".format("centered aligned"))

This is an example of left aligned                  
This is an example of                  right aligned
This is an example of        centered aligned       


In [16]:
print("This is an example of {:*^30}".format("left aligned"))
# character after colon but before alignment character (<, >, ^) will fill in 
# any whitespace

This is an example of *********left aligned*********


In [21]:
print("This is an example of {:*<15}".format("left aligned"))

This is an example of left aligned***


In [23]:
# multiple numeric systems
# binary
# 0000 = 0
# 0001 = 1
# 0010 = 2
# 0011 = 3
# 0100 = 4
# each digit represents a bit
# 0 means nothing from that bit
# 1 means something from that bit
# going from right to left, the first bit is 2^0 = 1
# the second bit is 2^1 = 2
# the third bit is 2^2 = 4
# the fourth bit is 2^3 = 8

# 0010 = 0*2^3 + 0*2^2 + 1*2^1 + 0*2^0 = 2

# hexadecimal: base 16
# html colors are rgb, e.g., #ffffff for white, #000000 for black
# these are three pairs of numbers, #ff, #ff, #ff for white, #00, #00, #00 for black
# they represent the r, g, b: red, green, blue

print("int: {0:d}, hex: {0:x}, oct: {0:o}, bin: {0:b}".format(42))
# d: decimal (base 10)
# x: hex (base 16)
# o: octal (base 8)
# b: binary (base 2)

int: 42, hex: 2a, oct: 52, bin: 101010


In [None]:
# encoding and decoding
# from wikipedia: character encoding is used to represent a repertoire of characters by some
# kind of encoding system
# for example, us-ascii consists of 7 bits
# utf-8: 8 bits
# utf-16: 16 bits
# utf-32: 32 bits

In [25]:
# what you need to know: different encoding systems exist and that you should use utf-8 
# whenever possible
# ascii characters are part of unicode

# unicode: (from the wikipedia page)
# utf-8 (u stands for unicode) is used pretty much all over the web (92%)
# uses one byte for the first 128 code points, up to 4 bytes for other characters
# the first 128 unicode numbers are the ascii characters

print("coffee" > "pancakes")

False


In [26]:
# what?  how can one string be greater than or lesser than another string?
# length?
print("tea" > "coffee")

True


In [27]:
# what's going on here?
# it turns out that each character has a number associated with it
# we find that number with ord and this will give us the ascii values of each character
ord("t")

116

In [28]:
ord("tea") # only works for one character

TypeError: ord() expected a character, but string of length 3 found

In [29]:
ord("T")

84