# Strings
*You can "update" an existing string by (re)assigning a variable to another string.*

*Python does not support a character type; these are treated as strings of length one, thus also considered a substring.*

## **Read more!** [Strings in python](https://www.tutorialspoint.com/python/python_strings.htm)

In [None]:
print("My name is %s and weight is %d kg!" % ('Zara', 21))

My name is Zara and weight is 21 kg!


In [None]:
print(r'C:\\nowhere')

C:\\nowhere


In [None]:
# Normal strings in Python are stored internally as 8-bit ASCII,
# while Unicode strings are stored as 16-bit Unicode.
print(u'Hello, world!')

In [None]:
st = "hi this IS ME a strING with MulTI % characters and #s so let's start"

In [None]:
# Capitalizes first letter of string
st.capitalize()

"Hi this is me a string with multi % characters and #s so let's start"

In [None]:
st = 'wow!'
# returns centered in a string of (length width). 
# Padding is done using the specified (fillchar).  Default filler is a space.
st.center(40, '.')

'..................wow!..................'

In [None]:
# Returns a space-padded string with the original string left-justified to a total of width columns.
str = "this is string example....wow!!!"
str.ljust(50, '0')

'this is string example....wow!!!000000000000000000'

In [None]:
# Returns a space-padded string with the original string right-justified to a total of width columns.
str = "this is string example....wow!!!"
str.rjust(50, '0')

'000000000000000000this is string example....wow!!!'

In [None]:
#  zfill() pads string on the left with zeros to fill width.
"this is string example....wow!!!".zfill(40)

'00000000this is string example....wow!!!'

In [None]:
st.count('w')

2

In [None]:
Str = "this is string example ワオ ワオ ....wow!!!";
Str = Str.encode('shift_jis_2004','strict')

print ("Encoded String: " + str(Str))
print ("Decoded String: " + Str.decode('shift_jis_2004','strict'))

Encoded String: b'this is string example \x83\x8f\x83I \x83\x8f\x83I ....wow!!!'
Decoded String: this is string example ワオ ワオ ....wow!!!


In [None]:
st.endswith('!')

True

In [None]:
st.endswith('W')

False

In [None]:
"free your mind".startswith('f')

True

In [None]:
# returns a copy of the string in which tab characters ie. '\t' are expanded using spaces, optionally using the given tabsize (default 8)
str = "this is\tstring example....wow!!!";

display("Original string: " + str)
display("Defualt exapanded tab: " +  str.expandtabs())
display("Double exapanded tab: " +  str.expandtabs(16))

'Original string: this is\tstring example....wow!!!'

'Defualt exapanded tab: this is string example....wow!!!'

'Double exapanded tab: this is         string example....wow!!!'

In [None]:
# Determine if str occurs in string or in a substring of string if starting index beg 
# and ending index end are given returns index if found and -1 otherwise.
str.find('ex')

15

In [None]:
str.find('hghg')

-1

In [None]:
#  returns the last index where the substring str is found 
#  find() but backwards
"a this is really a string example....wow!!! ha ha".rfind('a')

48

In [None]:
# Same as find(), but raises an exception if str not found.
str.index('ex')

15

In [None]:
str.index('hghg')

ValueError: ignored

In [None]:
"hi this me".rindex('i')

5

In [None]:
# checks whether the string consists of alphanumeric characters.
"this2009".isalnum()

True

In [None]:
"  this with space ".isalnum()

False

In [None]:
# Returns true if string has at least 1 character and
# all characters are alphabetic and false otherwise.
"dfsdfd".isalpha()

True

In [None]:
"dsfdsf5".isalpha()

False

In [None]:
# Returns true if string contains only digits and false otherwise.
"254364535".isdigit()

True

In [None]:
"f1".isdigit()

False

In [None]:
'''checks whether the string consists of only decimal characters. This method are present only on unicode objects.

Note − To define a string as Unicode, one simply prefixes a 'u' to the opening quotation mark of the assignment. Below is the example.
'''
str = u"this2009"
str.isdecimal()

False

In [None]:
str = u"23443434"
str.isdecimal()

True

In [None]:
"dgdfgfd".islower()

True

In [None]:
"Gjhfgh".islower()

False

In [None]:
"fdgfdgH".isupper()

False

In [None]:
# Returns true if a unicode string contains only numeric characters and false otherwise.
str = u"this2009";  
print(str.isnumeric())

str = u"23443434";
print(str.isnumeric())

False
True


In [None]:
# Returns true if string contains only whitespace characters and false otherwise.
"                  ".isspace()

True

In [None]:
# returns true if the string is a titlecased string 
str = "This Is String Example...Wow!!!";
display(str.istitle())

str = "This is string example....wow!!!";
display (str.istitle())

True

False

In [None]:
'convert me to a heading or a title'.title()

'Convert Me To A Heading Or A Title'

In [None]:
# returns a string in which the string elements of sequence have been joined by (str separator).
s = "-";
seq = ("a", "b", "c"); # This is sequence of strings.
s.join( seq )

'a-b-c'

In [None]:
# Returns the length of the string
len(str)

32

In [None]:
"Hi this ME!".lower()

'hi this me!'

In [None]:
'hi this a text'.upper()

'HI THIS A TEXT'

In [None]:
"HI this ME".swapcase()

'hi THIS me'

In [None]:
# Removes all leading whitespace in string.
"                   hi this me".lstrip()

'hi this me'

In [None]:
"                   hi this me              ".rstrip()

'                   hi this me'

In [None]:
# Performs both lstrip() and rstrip() on string.
"                   hi this me              ".strip()

'hi this me'

In [None]:
# Returns a translation table to be used in translate function.
# that maps each character in the intabstring into the character at the same position 
# in the outtab string. Then this table is passed to the translate() function.

# In Python2, maketrans is a function belongs to the string module.
# However in Python3, maketrans is a static method of the str type.

intab = "aeiou"
outtab = "12345"
trantab = "".maketrans(intab, outtab)

str = "this is string example....wow!!!"
# returns a copy of the string in which all characters
# have been translated using table (constructed with the maketrans()
# function in the string module),
# optionally deleting all characters found in the string (deletechars).
str.translate(trantab)

'th3s 3s str3ng 2x1mpl2....w4w!!!'

In [None]:
max("this is a string example....wow!!!")

'x'

In [None]:
min("this is a string example....wow!!!")

' '

In [None]:
min('ahello')

'a'

In [None]:
"this is string example....wow!!! this is really string".replace('!',':)')

'this is string example....wow:):):) this is really string'

In [None]:
"hi this a cat".split()

['hi', 'this', 'a', 'cat']

In [None]:
"Line1-a b c d e f\nLine2- a b c\n\nLine4- a b c d".splitlines( )

['Line1-a b c d e f', 'Line2- a b c', '', 'Line4- a b c d']

## Exercises

### Manipulate a string in python as much as you can using the discussed string methods

# Regular expression

###  **Read more!** [Regular expression in python](https://docs.python.org/3/howto/regex.html)

## Compiling Regular Expressions

In [2]:
import re
p = re.compile('ab*')

In [3]:
p

re.compile(r'ab*', re.UNICODE)

In [4]:
p = re.compile('ab*', re.IGNORECASE)

In [7]:
p

re.compile(r'[a-z]+', re.UNICODE)

## Match()

In [21]:
# Determine if the RE matches at the beginning of the string.
p = re.compile('[a-z]+')

In [9]:
# if not matched the string it will return None
print(p.match(""))

None


In [10]:
# if matched the string it will return match object
p.match('tempo')

<re.Match object; span=(0, 5), match='tempo'>

Match object has the following attributes and methods:

group()

Return the string matched by the RE

start()

Return the starting position of the match

end()

Return the ending position of the match

span()

Return a tuple containing the (start, end) positions of the match

In [24]:
m = re.match(r"([abc])+", "abc")
m.groups()

('c',)

In [25]:
print(re.match(r'From\s+', 'Fromage amk'))

None


In [35]:
re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998')

<re.Match object; span=(0, 5), match='From '>

In [41]:
p = re.compile('(a(b)c)d')
m = p.match('abcd')
m.group(0)

'abcd'

In [42]:
m.group()

'abcd'

In [39]:
m.group(1)

'abc'

In [40]:
m.group(2)

'b'

In [43]:
m.group(2,1,2)

('b', 'abc', 'b')

In [44]:
m.groups()

('abc', 'b')

## Findall()

In [22]:
# Find all substrings where the RE matches, and returns them as a list.
p = re.compile(r'\d+')
p.findall('12 drummers drumming, 11 pipers piping, 10 lords a-leaping')

['12', '11', '10']

## Finditer()

In [23]:
# Find all substrings where the RE matches, and returns them as an iterator.
# returns a sequence of match object instances as an iterator
iterator = p.finditer('12 drummers drumming, 11 ... 10 ...')
for match in iterator:    
    print(match.group())
    print(match.group(0)) # the same as match.group()
    print(match.span())

12
12
(0, 2)
11
11
(22, 24)
10
10
(29, 31)


# Search()

In [27]:
# Scan through a string, looking for any location where this RE matches.
print(re.search('^From', 'From Here to Eternity'))  

<re.Match object; span=(0, 4), match='From'>


In [28]:
print(re.search('^From', 'Reciting From Memory'))

None


In [29]:
print(re.search('}$', '{block}'))

<re.Match object; span=(6, 7), match='}'>


In [45]:
print(re.search('}$', '{block} '))

None


In [47]:
p = re.compile(r'\b(\w+)\s+\1\b')
p.search('Paris in the the spring').group()

'the the'

In [52]:
re.search(r"\b(\w+).+\1\b",'Hi I am Muhammad and my full name is Muhammad Elgendi').group()

'Muhammad and my full name is Muhammad'

In [55]:
re.search(r"\b(\w+).+\1\b",'Hi I am Muhammad and my full name is Muhammad Elgendi').groups()

('Muhammad',)

## \b
Word boundary. This is a zero-width assertion that matches only at the beginning or end of a word. A word is defined as a sequence of alphanumeric characters, so the end of a word is indicated by whitespace or a non-alphanumeric character.

In [33]:
p = re.compile(r'\bclass\b')
print(p.search('no class at all'))

<re.Match object; span=(3, 8), match='class'>


In [34]:
print(p.search('the declassified algorithm'))

None


# Split()
## split(string[, maxsplit=0])

In [60]:
# Split the string into a list, splitting it wherever the RE matches
p = re.compile(r'\W+')
p.split('This is a test, short and sweet, of split().')

['This', 'is', 'a', 'test', 'short', 'and', 'sweet', 'of', 'split', '']

In [59]:
p.split('This is a test, short and sweet, of split().', 3)

['This', 'is', 'a', 'test, short and sweet, of split().']

# Sub()
## sub(replacement, string[, count=0])

In [61]:
# Find all substrings where the RE matches, and replace them with a different string
p = re.compile('(blue|white|red)')
p.sub('colour', 'blue socks and red shoes')

'colour socks and colour shoes'

In [62]:
p.sub('colour', 'blue socks and red shoes', count=1)

'colour socks and red shoes'

# Subn()

In [65]:
# Does the same thing as sub(), but returns the new string and the number of replacements
# returns a 2-tuple containing the new string value and the number of replacements that were performed
p = re.compile('(blue|white|red)')
p.subn('colour', 'blue socks and red shoes')

('colour socks and colour shoes', 2)

In [66]:
p.subn('colour', 'no colours at all')

('no colours at all', 0)