# Workflow: Return Age at Death

In [53]:
def death_age(page_list):
    """
    Given a list of wikipedia page names, return a dictionary = ['Person Name']:Age at death
    """
    
    # Requirements
    import pandas as pd
    
    # Create an empty dictionary to be filled
    age_at_death = {}
    
    for page in page_list:
        
        try:
            person_dates = person_born_dead(page)
            
            if len(person_dates.values()) == 2:  # only if both birth and death date are present
                birth = pd.to_datetime(person_dates['Born'])
                death = pd.to_datetime(person_dates['Dead'])
                
                death_age = calculate_age(birth, death)  # factoring for partial years calculate age at death
                
                age_at_death[page] = death_age  # add an element to the dictionary
                    
        except:
            continue  # skip page if doesn't have birth and death date
        
    return age_at_death

## Dependencies

In [54]:
# v002

def person_born_dead(person_name):
    """
    Provided with a Wikipedia Page Name (str) (e.g., 'Jim Morrison')
    Return a set(Date born, Date died)
    """
    import requests
    import re
    
    try:
        wiki_url = create_wiki_url(person_name)
    except:
        print(person_name, "Error. Check create_wiki_url()")
        
    try:
        text = scrape_wiki_page(wiki_url)
    except:
        print(person_name, "Error. Check scrape_wiki_page()")
    
    try:
        page_slice = return_page_slice(text)
    except:
        print(person_name, "Check out: return_page_slice()")
    
    try:
        # create an empty set to have dates added to; set protects against dups
        dates = []
        dates_dict = {}
        
        pattern01 = re.compile(r'\b\w{3,9}\s\d{1,2}\W\s\d{4}')  # Month Date, Year
        pattern02 = re.compile(r'\b\d{1,2}\s\w{3,9}\s\d{4}')  # Date Month Year
        
        
        matches01 = pattern01.finditer(page_slice)
        matches02 = pattern02.finditer(page_slice)
        
        # pattern 01 was matched, append to dates
        for match in matches01:
            
            # protect against duplicates
            if match.group(0) not in dates:
                dates.append(match.group(0))
        
        # if pattern 01 was not matched, try pattern 02
        if len(dates) == 0:
            
            for match in matches02:
                # protect against duplicates
                if match.group(0) not in dates:
                    dates.append(match.group(0))
                
        # create a tuple, avoids sorting of dates
        dates_tuple = tuple(dates)
        
        # store in a dictionary
        titles = ('Born', 'Dead')
        
        for title, match in zip(titles, dates_tuple):
            dates_dict[title] = match

        return dates_dict
    
    except:
        print(person_name, "is not a human! function: person_born_dead")

In [55]:
def create_wiki_url(page_name):
    url = "http://en.wikipedia.org/wiki/"
    format_page_name = page_name.strip().replace(" ","_")
    
    wiki_url = url + format_page_name
        
    return wiki_url

In [56]:
def scrape_wiki_page(wiki_url):
    """
    Given wikipedia url, return scraped text
    """
    
    import requests

    wiki_page = requests.get(wiki_url)
    wiki_page_text = wiki_page.text
    
    return wiki_page_text

In [57]:
def return_page_slice(text, start_point = "Born", end_point = 1000):
    """
    Given scraped html page and a start point
    Return a page slice
    """

    start = text.find(start_point) + len(start_point)
    end = start + end_point
    page_slice = text[start:end] #slice after born
        
    return page_slice

In [58]:
# code source: https://stackoverflow.com/questions/2217488/age-from-birthdate-in-python

def calculate_age(born, died):
    """
    Give date at birth and death, return age
    """
    
    from datetime import date
    
    age_at_death = died.year - born.year - ((died.month, died.day) < (born.month, born.day))
    
    return age_at_death

## Execute: Get people date of birth and death

In [17]:
def time_per(operation, yur_list, digits = 2):
    """
    Given an method and the target of the method
    Return the time it takes to process an item in your list
    
    required: time
    """
    # import and create an instance
    import time    
    start = time.time()
    
    # perform your operation
    operation(yur_list)
    
    # end the instance
    end = time.time()
    
    # operation time
    time_taken = end-start
    
    # how many items on the list that were processed
    yurlist_len = len(yur_list)
    
    # unit time
    time_per = (end-start) / yurlist_len
    
    return f'It takes {round(time_per, digits)} s to process an item in your list.'

In [59]:
page_list = ['Jim Morrison', 'Janis Joplin', 'Arabinda Muduli', 'Rod Stewart', 'Nevermind', 'Ludvig van Beethoven', 'Paul Banks (American musician)', 'Peter Zak', 'Fontaine (Singer)']

page_list_len = len(page_list)

In [60]:
death_age(page_list)

{'Jim Morrison': 27,
 'Janis Joplin': 27,
 'Arabinda Muduli': 56,
 'Ludvig van Beethoven': 56}

In [61]:
time_per(death_age,page_list)

'It takes 0.57 s to process an item in your list.'

## Test **The Case of Peter Zak**

In [36]:
person_born_dead('Peter Zak')

{'Born': 'May 13, 1965', 'Dead': 'May 13, 1965'}

In [43]:
person_name = 'Peter Zak'

In [44]:
import requests
import re

try:
    wiki_url = create_wiki_url(person_name)
except:
    print(person_name, "Error. Check create_wiki_url()")
        
try:
    text = scrape_wiki_page(wiki_url)
except:
    print(person_name, "Error. Check scrape_wiki_page()")
    
try:
    page_slice = return_page_slice(text)
except:
    print(person_name, "Check out: return_page_slice()")

In [50]:
dates = []
dates_dict = {}
        
pattern01 = re.compile(r'\b\w{3,9}\s\d{1,2}\W\s\d{4}')  # Month Date, Year
pattern02 = re.compile(r'\b\d{1,2}\s\w{3,9}\s\d{4}')  # Date Month Year

matches01 = pattern01.finditer(page_slice)
matches02 = pattern02.finditer(page_slice)

In [51]:
# pattern 01 was matched, append to dates
for match in matches01:
    
    # protect against duplicates
    if match.group(0) not in dates:
        dates.append(match.group(0))
        print("pattern01", dates)
        
    # if pattern 01 was not matched, try pattern 02
    if len(dates) == 0:
            
        for match in matches02:
            
            # protect against duplicates
            if match.group(0) not in dates:
                dates.append(match.group(0))
                print("pattern02", dates)
        
# create a tuple, avoids sorting of dates
dates_tuple = tuple(dates)
print("tuple", dates_tuple)
        
# store in a dictionary
titles = ('Born', 'Dead')
        
for title, match in zip(titles, dates_tuple):
    dates_dict[title] = match
        
print(dates_dict)

match01 <re.Match object; span=(83, 95), match='May 13, 1965'>
pattern01 ['May 13, 1965']
match01 <re.Match object; span=(810, 822), match='May 13, 1965'>
tuple ('May 13, 1965',)
{'Born': 'May 13, 1965'}
