## String data manipulation using list methods

A Jupyter Notebook presentation to introduce string data manipulation using list methods.

Ian Stewart - Hamilton Python User Group - 8 October 2018.


### Introduction

The list() function to convert a string to an array of characters is complemented by a string's join() method. The list methods include append(), extend() and pop(0) which allow a list to be used as a fifo buffer.

### Python's list() function

print(list("abc"))

['a', 'b', 'c']

### Python's string.join() method

print("".join(['a', 'b', 'c']))

abc

### Main List methods

append, clear, copy, count, extend, index, insert, pop, remove, reverse, sort.

In [None]:
# 0
# list() and its complement "".join()

alphabet_string = "abc"
print("Alphabet string:",alphabet_string)

alphabet_list = list(alphabet_string)
print("Alphabet list:", alphabet_list)

alphabet_string_new = "".join(alphabet_list)
print("New alphabet string:", alphabet_string_new)


In [None]:
# 1
# list() converts a string of text to characters in a list.
paragraph = "This is a paragraph.\n"
paragraph_list = list(paragraph)
    
print("paragraph_list:\n", paragraph_list)
print("".join(paragraph_list))

In [None]:
# 2
# Copy one list to another, one character at a time.
paragraph_new_list = []

for character in paragraph_list:
    paragraph_new_list.append(character)
    
print("paragraph_new_list:\n", paragraph_new_list)
print("".join(paragraph_new_list))


In [None]:
# 3
# Pass through a 2 character first in first out (fifo) list.
# .append() to add to the right hand end of the list.
# .pop(0) to remove from the left hand end of the list.

fifo_list = []
paragraph_new_list = []

for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 2:
        paragraph_new_list.append(fifo_list.pop(0))
        
print("paragraph_new_list via the fifo:\n", paragraph_new_list)
print("".join(paragraph_new_list))
print("Note: It is missing the last two characters")

In [None]:
# 4
# Pass through a 2 character fifo list. Flush fifo at the end
fifo_list = []
paragraph_new_list = []

for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 2:
        paragraph_new_list.append(fifo_list.pop(0))

# Flush. Using loop
for character in fifo_list:
    paragraph_new_list.append(character)
    
print("paragraph_new_list via the fifo with flush:\n", paragraph_new_list)
print("".join(paragraph_new_list))

In [None]:
# 5
# Examine and modify the contents of the fifo.
# If "is" then change to "IS"
fifo_list = []
paragraph_new_list = []

for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 2:
        paragraph_new_list.append(fifo_list.pop(0))
    if fifo_list[0] == "i" and fifo_list[1] == "s":   
        fifo_list[0] = "I"
        fifo_list[1] = "S"
        
# Flush, using .extend()
paragraph_new_list.extend(fifo_list)
    
print("paragraph_new_list via the fifo with flush:\n", paragraph_new_list)
print("".join(paragraph_new_list))

In [None]:
# 6
# Examine the contents of the fifo. Modify the paragraph_new.
# If "is" then change to "IS"
fifo_list = []
paragraph_new_list = []
string = "__is__"
string_list = list(string)

for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 2:
        paragraph_new_list.append(fifo_list.pop(0))
    if fifo_list[0] == "i" and fifo_list[1] == "s":
        for char in string_list:
            paragraph_new_list.append(char)
        fifo_list = []       

# Flush
paragraph_new_list.extend(fifo_list)
    
print("paragraph_new_list via the fifo with flush:\n", paragraph_new_list)
print("".join(paragraph_new_list))

In [None]:
# 7
# 4 character fifo
# Examine the contents of the fifo. Modify the paragraph_new.
# If " is " then change to " IS "
fifo_list = []
paragraph_new_list = []
string = " IS "
string_list = list(string)

for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 4:
        paragraph_new_list.append(fifo_list.pop(0))
    if (fifo_list[0] == " " and 
        fifo_list[1] == "i" and 
        fifo_list[2] == "s" and
        fifo_list[3] == " "):
        paragraph_new_list.extend(string_list)
        fifo_list = []       

# Flush
paragraph_new_list.extend(fifo_list)
    
print("paragraph_new_list via the fifo with flush:\n", paragraph_new_list)
print("".join(paragraph_new_list))

In [None]:
# 8
# 4 character fifo
# Examine the contents of the fifo. Modify the paragraph_new.
# If " is " then change to " IS "
paragraph = "This is a paragraph.\n is ."
paragraph_list = list(paragraph)

fifo_list = []
paragraph_new_list = []
string = " IS "
string_list = list(string)

for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 4:
        paragraph_new_list.append(fifo_list.pop(0))
    if (fifo_list[0] == " " and 
        fifo_list[1] == "i" and 
        fifo_list[2] == "s" and
        fifo_list[3] == " "):
        paragraph_new_list.extend(string_list)
        fifo_list = []       

# Flush
paragraph_new_list.extend(fifo_list)
    
print("paragraph_new_list via the fifo with flush:\n", paragraph_new_list)
print("".join(paragraph_new_list))

In [None]:
# 9
# So what?
# Change specific text to be an html link.
# UNSC resolutions are referred to in text by resolution running number 
# and the year in parenthesis. E.g. 2034 (2018)
# Link to the resolution document is: https://undocs.org/S/RES/2034(2018)

paragraph = "UNSC resolution 2436 (2018) was passed on 21 September 2018.\n"
paragraph_list = list(paragraph)

fifo_list = []
paragraph_new_list = []
    
# 01234567890 <-- ruler
# 2436 (2018) 
for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 11:
        paragraph_new_list.append(fifo_list.pop(0))
    if (len(fifo_list) == 11 and
        fifo_list[4] == " " and 
        fifo_list[5] == "(" and 
        fifo_list[10] == ")"):

        resolution = "".join(fifo_list[0:4])
        year = "".join(fifo_list[6:10])

        # Build the http link as a string
        string = ("<a href=https://undocs.org/S/RES/" 
                      "{}({})>{} ({})</a>"
                      .format(resolution, year, resolution,  year))  
        print(string)

        string_list = list(string)
        paragraph_new_list.extend(string_list)
        fifo_list = []       

# Flush
paragraph_new_list.extend(fifo_list)
    
print("paragraph_new_list via the fifo with flush:\n", paragraph_new_list)
print("\nHttp text:\n","".join(paragraph_new_list))

from IPython.display import display, HTML 

display(HTML("<br>Original text as HTML:<br>" + paragraph))
display(HTML("<br>HTML with embedded links:<br>" + "".join(paragraph_new_list)))
         
# check by creating a html file and view it with a browser.
#import os
#print("Current Working Directory:", os.getcwd(), "File:", "test.html")
#with open("test.html", "w") as f:
#    f.write(paragraph + "</br></br>" + "".join(paragraph_new_list) + "\n")


In [None]:
# 10
# UNSC resolutions may be either 1 through 4 digits.
# The year is always 4 x digits in parenthesis, from 1946.
# Examples: 1 (1946), 9 (1946), 99 (1953), 999 (1995), 2436 (2018)

paragraph = "The first UNSC resolution was in 1946. This is resolution 1 " \
    "(1946). It was also in 1946 that resolution 9 (1946) was passed. " \
    "The last double digit resolution was 99 (1953), and the last " \
    "tripple digit resolution was 999 (1995). The most recent UNSC " \
    "resolution is 2436 (2018) which was passed on 21 September 2018.\n" \
    "This should raise an error xxxx (2018)."

paragraph = "The first UNSC resolution was in 1946. This is resolution 1 " \
    "(1946). It was also in 1946 that resolution 9 (1946) was passed. " \
    "The last double digit resolution was 99 (1953), and the last " \
    "tripple digit resolution was 999 (1995). The most recent UNSC " \
    "resolution is 2436 (2018) which was passed on 21 September 2018.\n"
    
print("\nOriginal text:\n", paragraph)
paragraph_list = list(paragraph)

fifo_list = []
paragraph_new_list = []
    
# 01234567890 <-- ruler
# 2436 (2018) 
    
# 01234567890 <-- ruler
# xx 1 (2018)

for character in paragraph_list:
    fifo_list.append(character)
    if len(fifo_list) > 11:
        paragraph_new_list.append(fifo_list.pop(0))
    if (len(fifo_list) == 11 and
        fifo_list[4] == " " and 
        fifo_list[5] == "(" and 
        fifo_list[10] == ")"):

        resolution = "".join(fifo_list[0:4])
        year = "".join(fifo_list[6:10])

        # Filtering: True / False list for resolution digits
        is_digit_list = []
        for i in range(0, 4):
            #print(fifo_list[i])
            #print(fifo_list[i].isdigit())
            # Scenarios: Resolutions can be from 1 to xxxx
            # currently resolutions are in 2000's.
            # If all are digit.
            # If 1 not digit, but 2 to 4 are.
            # If 1 and 2 not digit, but 3 and 4 are
            # If 1 to 3 not digit, but 4 is. 
            is_digit_list.append(fifo_list[i].isdigit())
            
        print("\nFifo_list[0:4]:", fifo_list[0:4], is_digit_list)
        
        if is_digit_list == [1,1,1,1]:
            print("Is_digit_list: 1111")
            # 1000 to 2436 (2018) <-- 21 September 2018
            resolution_list = fifo_list[0:4]
            resolution = "".join(resolution_list)
            
        elif is_digit_list == [0,1,1,1]:  
            print("Is_digit_list: 0111") # 999 (1995)
            resolution_list = fifo_list[1:4]
            resolution = "".join(resolution_list)
            paragraph_new_list.append(fifo_list.pop(0)) 

        elif is_digit_list == [0,0,1,1]:
            print("Is_digit_list: 0011") # 99 (1953)
            resolution_list = fifo_list[2:4]
            resolution = "".join(resolution_list)
            paragraph_new_list.append(fifo_list.pop(0))
            paragraph_new_list.append(fifo_list.pop(0))

        elif is_digit_list == [0,0,0,1]:
            print("Is_digit_list: 0001") # 9 (1946), # 1 (1946)
            resolution_list = fifo_list[3:4]
            resolution = "".join(resolution_list)
            paragraph_new_list.append(fifo_list.pop(0))
            paragraph_new_list.append(fifo_list.pop(0))
            paragraph_new_list.append(fifo_list.pop(0))

        else:                                
        # Above should be elif's and then else is a fail.
            print("Format Error:", fifo_list)
            pass

        print("Resolution number:", resolution)

        print("HTML link:", 
                "<a href=https://undocs.org/S/RES/{}({})>{} ({})</a>"
                .format(resolution, year, resolution,  year))

        string = ("<a href=https://undocs.org/S/RES/{}({})>{} ({})</a>"
            .format(resolution, year, resolution,  year))  
         
        string_list = list(string)
        paragraph_new_list.extend(string_list)
        fifo_list = []       

# Flush
paragraph_new_list.extend(fifo_list)        
        
print("\nparagraph_new_list via the fifo with flush:\n", paragraph_new_list)
print("\nHTML text:\n","".join(paragraph_new_list))

from IPython.display import display, HTML 

display(HTML("<br>Original text as HTML:<br>" + paragraph))
display(HTML("<br>HTML with embedded links:<br>" + "".join(paragraph_new_list)))
             
             
# check by creating a html file and view it with a browser.
#import os
#print("Current Working Directory:", os.getcwd(), "File:", "test_1.html")
#with open("test_1.html", "w") as f:
#    f.write(paragraph + "</br></br>" + "".join(paragraph_new_list) + "\n")        