In [63]:
# v002

def person_born_dead(person_name):
    """
    Provided with a Wikipedia Page Name (str) (e.g., 'Jim Morrison')
    Return a set(Date born, Date died)
    """
    import requests
    import re
    
    try:
        wiki_url = create_wiki_url(person_name)
    except:
        print(person_name, "Error. Check create_wiki_url()")
        
    try:
        text = scrape_wiki_page(wiki_url)
    except:
        print(person_name, "Error. Check scrape_wiki_page()")
    
    try:
        page_slice = return_page_slice(text)
    except:
        print(person_name, "Check out: return_page_slice()")
    
    try:
        # create an empty set to have dates added to; set protects against dups
        dates = []
        dates_dict = {}
        
        pattern01 = re.compile(r'\b\w{3,9}\s\d{1,2}\W\s\d{4}')  # Month Date, Year
        pattern02 = re.compile(r'\b\d{1,2}\s\w{3,9}\s\d{4}')  # Date Month Year
        
        
        matches01 = pattern01.finditer(page_slice)
        matches02 = pattern02.finditer(page_slice)
        
        # pattern 01 was matched, append to dates
        for match in matches01:
            
            # protect against duplicates
            if match.group(0) not in dates:
                dates.append(match.group(0))
        
        # if pattern 01 was not matched, try pattern 02
        if len(dates) == 0:
            
            for match in matches02:
                # protect against duplicates
                if match.group(0) not in dates:
                    dates.append(match.group(0))
                
        # create a tuple, avoids sorting of dates
        dates_tuple = tuple(dates)
        
        # store in a dictionary
        titles = ('Born', 'Dead')
        
        for title, match in zip(titles, dates_tuple):
            dates_dict[title] = match

        return dates_dict
    
    except:
        print(person_name, "is not a human! function: person_born_dead")

## Dependencies

In [64]:
def create_wiki_url(page_name):
    """
    Given a wikipedia page name, return a url address
    """
    
    url = "http://en.wikipedia.org/wiki/"
        
    format_page_name = page_name.strip().replace(" ","_")
    wiki_url = url + format_page_name
        
    return wiki_url

In [65]:
def scrape_wiki_page(wiki_url):
    """
    Given wikipedia url, return scraped text
    """
    
    import requests

    wiki_page = requests.get(wiki_url)
    wiki_page_text = wiki_page.text
    
    return wiki_page_text

In [66]:
def return_page_slice(text, start_point = "Born", end_point = 1000):
    """
    Given scraped html page and a start point
    Return a page slice
    """

    start = text.find(start_point) + len(start_point)
    end = start + end_point
    page_slice = text[start:end] #slice after born
        
    return page_slice

# Execute

In [67]:
def time_per(operation, yur_list, digits = 2):
    """
    Given an method and the target of the method
    Return the time it takes to process an item in your list
    
    required: time
    """
    # import and create an instance
    import time    
    start = time.time()
    
    # perform your operation
    operation(yur_list)
    
    # end the instance
    end = time.time()
    
    # operation time
    time_taken = end-start
    
    # how many items on the list that were processed
    yurlist_len = len(yur_list)
    
    # unit time
    time_per = (end-start) / yurlist_len
    
    return f'It takes {round(time_per, digits)} s to process an item in your list.'

In [68]:
person_born_dead('Ludvig van Beethoven')

{'Born': '17 December 1770', 'Dead': '26 March 1827'}

In [69]:
person_name = 'Ludvig van Beethoven'

time_per(person_born_dead, person_name)

'It takes 0.03 s to process an item in your list.'

## Under Construction

In [52]:
person_name = 'Jim Morrison'

In [57]:
import requests
import re

wiki_url = create_wiki_url(person_name)
    
text = scrape_wiki_page(wiki_url)
   
page_slice = return_page_slice(text)

# create an empty set to have dates added to; set protects against dups
dates = []
dates_dict = {}
        
pattern01 = re.compile(r'\b\w{3,9}\s\d{1,2}\W\s\d{4}')  # Month Date, Year
pattern02 = re.compile(r'\b\d{1,2}\s\w{3,9}\s\d{4}')  # Date Month Year
        
titles = ('Born', 'Dead')

In [54]:
matches01 = pattern01.finditer(page_slice)
        
for match in matches01:
    dates.append(match.group(0))
            
    dates_tuple = tuple(dates)
        
    for title, match in zip(titles, dates_tuple):
        dates_dict[title] = match
        
dates_dict

{'Born': 'December 8, 1943', 'Dead': 'July 3, 1971'}

In [58]:
matches02 = pattern02.finditer(page_slice)
            
for match in matches02:
    dates.append(match.group(0))
                
    dates_tuple = tuple(dates)
                
    for title, match in zip(titles, dates_tuple):
        dates_dict[title] = match
            
dates_dict

{}