# Text

The objective for this part of the lecture is to explain how Python represents strings and how they can be used.

* What is a string
* How to manipulate and parse strings
* Printing out strings

In [1]:
"Dr. James"

'Dr. James'

In [2]:
dr='Dr. James'
dr

'Dr. James'

In [3]:
'Dr. James' + "1"

'Dr. James1'

In [4]:
'Dr. James' + str(1)

'Dr. James1'

In [5]:
'Dr. James' + ' and spouse'

'Dr. James and spouse'

In [6]:
'Dr. James\'s spouse'

"Dr. James's spouse"

In [7]:
"Dr. James's spouse"

"Dr. James's spouse"

In [8]:
len('Dr. James')

9

In [9]:
len('Dr. James\'s Spouse')

18

In [10]:
len("Dr. James's Spouse")

18

In [11]:
dr = 'Dr. James\'s "Spouse"'

In [12]:
dr

'Dr. James\'s "Spouse"'

In [13]:
"Dr. James's \"Spouse\""

'Dr. James\'s "Spouse"'

In [14]:
print(dr)

Dr. James's "Spouse"


In [15]:
len(dr)

20

In [16]:
seq = 'AGCCGAC'
seq[5]

'A'

In [17]:
# [start_position:up_until_but_not_including]
# First character is in position 0
seq[5:8]

'AC'

In [18]:
start_position=0
number_of_characters=5
seq[start_position:start_position+number_of_characters]

'AGCCG'

In [19]:
txt = '''This
string
is
on
multiple
lines'''

In [20]:
txt

'This\nstring\nis\non\nmultiple\nlines'

In [21]:
print(txt)

This
string
is
on
multiple
lines


In [22]:
len(txt)

32

In [23]:
txt2 = 'This\nstring\nis\non\nmultiple\nlines'

In [24]:
print(txt2)

This
string
is
on
multiple
lines


In [25]:
tbl = "a1\ta2\nb10\tb2"
print(tbl)

a1	a2
b10	b2


# Parts of strings

In [26]:
name = "Paul E. Boal"

`[]` is used to "slice" a string

```
a[start:end] # items start through end-1
a[start:]    # items start through the rest of the array
a[:end]      # items from the beginning through end-1
a[:]         # a copy of the whole array
```

If `start` or `end` is a negative number, then it means that many characters from the end of the string.

In [27]:
len(name)

12

In [28]:
name[0]

'P'

In [29]:
name[0:4]

'Paul'

In [30]:
name[-4:]

'Boal'

In [31]:
name[5:]

'E. Boal'

In [32]:
print(name)

Paul E. Boal


In [33]:
name.find(' ')

4

In [34]:
name[:name.find(' ')]

'Paul'

In [35]:
name[name.find(' ')+1:]

'E. Boal'

In [36]:
pos_of_middle_name = name.find(' ') + 1
middle_and_last = name[pos_of_middle_name:]
middle_and_last

'E. Boal'

In [37]:
# Position of the middle name is up to the first space
pos_of_middle_name = name.find(' ') + 1

middle_and_last = name[pos_of_middle_name:]

# Position of the last name is the position of the first space after the end of the first name
pos_of_last = middle_and_last.find(' ') + 1

middle_and_last[pos_of_last:]

'Boal'

In [38]:
name

'Paul E. Boal'

In [39]:
name.split(' ')

['Paul', 'E.', 'Boal']

# Converting text to numbers

In [40]:
int('45')

45

In [41]:
float('45.12')

45.12

In [43]:
int('45_12')

4512

In [44]:
score='45.12'
type(score)

str

In [45]:
int(score[:score.find('.')])

45

In [47]:
int('1')

1

In [48]:
int('a',base=16)

10

In [51]:
float(10.3 + 4)

14.3

In [52]:
a="14.5"
float(a)

14.5

## Printing text

In [54]:
try:
    formatted_number = float(a)  # Attempt to convert to float
    print('Put a formatted number here: {:.2f}'.format(formatted_number))
except ValueError:
    print("Error: The variable 'a' cannot be converted to a float.")

Put a formatted number here: 14.50


In [56]:
# "new" style formatting
def print_numbers(a,b):
    return 'Here is {:d} and here is {:d}'.format(a,b)

In [57]:
print(print_numbers(1,2))

Here is 1 and here is 2


See https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting for a good reference on formatting.

In [58]:
physician = 'Dr. James'
patient = 'Jane Doe'
diagnosis = 'Right Ulnar Fracture'
print(physician, patient, diagnosis, sep=' ')

Dr. James Jane Doe Right Ulnar Fracture


## Splitting Strings

In [59]:
names="Paul,Sarahlynn,Ellie,Ada,Teddy"

In [60]:
names.split(',')

['Paul', 'Sarahlynn', 'Ellie', 'Ada', 'Teddy']

In [61]:
names="Paul, Sarahlynn, Ellie, Ada, Teddy"

In [62]:

names.split(',')

['Paul', ' Sarahlynn', ' Ellie', ' Ada', ' Teddy']

In [63]:
names.split(', ')

['Paul', 'Sarahlynn', 'Ellie', 'Ada', 'Teddy']

## Find the Nth Occurrence of a String

In [64]:
our_string = "love is love is love is love"

In [65]:
first_love = our_string.find("love")
first_love

0

In [66]:
second_love = our_string.find("love", first_love+1)
second_love

8

In [67]:
third_love = our_string.find("love", second_love+1)
third_love

16

In [68]:
def find_nth(str_to_search, for_str, nth):

    start = 0
    for i in range(1, nth+1):
        pos = str_to_search.find(for_str, start)
        if pos == -1:
            return -1
        else:
            start = pos+1

    return pos

In [69]:
find_nth(our_string, "love", 4)

24

In [70]:
find_nth(our_string, "is", 1)

5

In [71]:
for n in range(1,10):
    pos = find_nth(our_string, "love", n)
    print(pos)

0
8
16
24
-1
-1
-1
-1
-1


In [72]:
def find_nth(str_to_search, for_str, nth, base=0):

    first = str_to_search.find(for_str)
    #print(nth, str_to_search)

    if first == -1 and nth == 1:
        return -1
    elif nth == 1:
        return first + base
    else:
        return find_nth(str_to_search[first+1:], for_str, nth-1, base+first+1)


In [73]:
for n in range(1,10):
    pos = find_nth("love is love is love is love", "love", n)
    print(pos)

0
8
16
24
-1
-1
-1
-1
-1
