# Ch 8. Strings and Regular Expressions

## A String Is a Sequence

In [4]:
# select a character with the bracket operator
fruit = "banana"
letter = fruit[1]
letter

'a'

In [6]:
# the index can be a variable
i = 1
fruit[i]

'a'

In [7]:
# the index can be an expression
fruit[i+1]

'n'

In [33]:
# the index has to be an integer
# fruit[1.5]  # error

In [11]:
# get the length of a string
n = len(fruit)
n

6

In [12]:
# because we start counting at zero
fruit[n-1]

'a'

In [13]:
# but there's an easier way
fruit[-1]

'a'

## String Slices

In [17]:
# selecting a slice
fruit = "banana"
fruit[0:3]

'ban'

In [19]:
# start the slice at the beginning
fruit[:3]

'ban'

In [20]:
# slice goes to the end
fruit[3:]

'ana'

In [21]:
# this makes an empty string
fruit[3:3]

''

In [23]:
# no index produces the entire string
fruit[:]

'banana'

## Strings are Immutable

In [29]:
greeting = "Hello, world!"
# greeting[0] = 'J'  # error

In [30]:
# do this instead
new_greeting = 'J' + greeting[1:]
new_greeting

'Jello, world!'

In [32]:
# original string intact
greeting

'Hello, world!'

## String Comparison

In [36]:
# relational operator on strings
word = "banana"

if word == "banana":
    print("All right, banana.")

All right, banana.


In [38]:
# other relational operators
def compare_word(word):
    if word < "banana":
        print(word, "comes before banana.")
    elif word > "banana":
        print(word, "comes after banana.")
    else:
        print("All right, banana.")

compare_word("apple")

apple comes before banana.


In [45]:
def compare_words(word1, word2):
    word1 = word1.lower()
    word2 = word2.lower()
    if word1 < word2:
        print(f"{word1} comes before {word2}")
    elif word1 > word2:
        print(f"{word1} comes after {word2}")
    else:
        print(f"All right, {word1} is {word2}")

compare_words("apple", "banana")
compare_words("Banana", "apple")
compare_words("banana", "banana")

apple comes before banana
banana comes after apple
All right, banana is banana


## String Methods

In [46]:
# string methods
word = "banana"
new_word = word.upper()
new_word

'BANANA'

## Writing Files

In [63]:
def is_special_line(line):
    return line.startswith('*** ')

In [64]:
reader = open("pg345.txt")

for line in reader:
    if is_special_line(line):
        print(line.strip())

reader.close()

*** START OF THE PROJECT GUTENBERG EBOOK DRACULA ***
*** END OF THE PROJECT GUTENBERG EBOOK DRACULA ***


In [73]:
reader = open("pg345.txt")

i = 0
for line in reader:
    print(line)
    if i == 10:
        break
    i += 1

reader.close()

﻿The Project Gutenberg eBook of Dracula

    

This ebook is for the use of anyone anywhere in the United States and

most other parts of the world at no cost and with almost no restrictions

whatsoever. You may copy it, give it away or re-use it under the terms

of the Project Gutenberg License included with this ebook or online

at www.gutenberg.org. If you are not located in the United States,

you will have to check the laws of the country where you are located

before using this eBook.



Title: Dracula



In [81]:
# reader keeps track of where it is in the file
reader = open('pg345.txt')
writer = open('pg345)cleaned.txt', 'w')

# this first loop takes us up the first first special line
for line in reader:
    if is_special_line(line):
        break

# when the loop breaks, line contains the special line
print(line.strip())

# this second loop continues thru the file to the next special line
for line in reader:
    if is_special_line(line):
        break
    # write the body of file to a new file
    writer.write(line)

print(line.strip())

reader.close()
writer.close()

*** START OF THE PROJECT GUTENBERG EBOOK DRACULA ***
*** END OF THE PROJECT GUTENBERG EBOOK DRACULA ***


In [84]:
# read the first few lines of the new file
for line in open('pg345_cleaned.txt'):
    line = line.strip()
    # omit blank lines
    if len(line) > 0:
        print(line)
    # set the break condition
    if line.endswith('Stoker'):
        break 

DRACULA
_by_
Bram Stoker


In [85]:
# use a with statement to automatically close the file
with open('pg345_cleaned.txt') as f:
    for line in f:
        line = line.strip()
        # omit blank lines
        if len(line) > 0:
            print(line)
        # set the break condition
        if line.endswith('Stoker'):
            break

DRACULA
_by_
Bram Stoker


## Find and Replace

In [86]:
# count the lines in the cleaned version 
total = 0
with open('pg345_cleaned.txt') as f:
    for line in f:
        total += 1

total

15477

In [88]:
total = 0
with open('pg345_cleaned.txt') as f:
    for line in f:
        if 'Jonathan' in line:
            total += 1

total

199

In [90]:
# use the count method to count all instances of a string
total = 0
with open('pg345_cleaned.txt') as f:
    for line in f:
        # look for each instance on a line, line by line
        total += line.count('Jonathan')

total

200

In [93]:
# replace a word
writer = open('pg345_replaced.txt', 'w')

with open('pg345_cleaned.txt') as f:
    for line in f:
        line = line.replace('Jonathon', 'Thomas')
        writer.write(line)

writer.close()

In [95]:
# rewritten to open multiple files with a single with statement
with open('pg345_cleaned.txt') as reader, open('pg345_replaced.txt', 'w') as writer:
    for line in reader:
        line = line.replace('Jonathon', 'Thomas')
        writer.write(line)

## Regular Expressions

In [None]:
# this