# Workflow: Return Age at Death

In [8]:
page_list = ['Janis Joplin', 'Arabinda Muduli', 'Rod Stewart', 'Nevermind', 'Ludvig van Beethoven', 'Paul Banks (American musician)']

In [2]:
def death_age(page_list):
    """
    Given a list of wikipedia page names, return a dictionary = ['Person Name']:Age at death
    """
    
    # Requirements
    import pandas as pd
    
    # Create an empty dictionary to be filled
    age_at_death = {}
    
    for page in page_list:
        
        try:
            person_dates = person_born_dead(page)  # returns list(birth date, death date)
            
            if len(person_dates) == 2:  # only if both birth and death date are present
                birth = pd.to_datetime(person_dates[0])
                death = pd.to_datetime(person_dates[1])
                
                death_age = calculate_age(birth, death)  # factoring for partial years calculate age at death
                
                age_at_death[page] = death_age  # add an element to the dictionary
                    
        except:
            continue  # skip page if doesn't have birth and death date
        
    return age_at_death

Dependencies for **death_age**

In [3]:
def person_born_dead(person_name):
    """
    Provided with a Wikipedia Page Name (str) (e.g., 'Jim Morrison')
    Return a set(Date born, Date died)
    """
    import requests
    import re
    
    wiki_url = create_wiki_url(person_name)
    text = scrape_wiki_page(wiki_url)
    
    # create an empty set to have dates added to; set protects against dups
    dates = set()
               
    try:
        start = text.find("Born")
        page_slice = text[start+4:start+1000] #slice after born
                
        pattern01 = re.compile(r'\b\w{3,9}\s\d{1,2}\W\s\d{4}')  # Month Date, Year
        pattern02 = re.compile(r'\b\d{1,2}\s\w{3,9}\s\d{4}')  # Date Month Year
             
        matches01 = pattern01.finditer(page_slice)
        
        for match in matches01:
            dates.add(match.group(0))
                      
        if len(dates) == 0:
            
            matches02 = pattern02.finditer(page_slice)
            
            for match in matches02:
                dates.add(match.group(0))
                
        dates = list(dates)

        return dates
    
    except:
        print(person_name, "is not a human! function: person_born_dead")

In [4]:
def create_wiki_url(page_name):
    url = "http://en.wikipedia.org/wiki/"
    format_page_name = page_name.strip().replace(" ","_")
    
    wiki_url = url + format_page_name
        
    return wiki_url

In [5]:
def scrape_wiki_page(wiki_url):
    """
    Given wikipedia url, return scraped text
    """
    
    import requests
    
    try:
        wiki_page = requests.get(wiki_url)
        wiki_page_text = wiki_page.text
        return wiki_page_text
    
    except:
        return "They don't have a wikipedia page!" 

In [6]:
# code source: https://stackoverflow.com/questions/2217488/age-from-birthdate-in-python

def calculate_age(born, died):
    """
    Give date at birth and death, return age
    """
    
    from datetime import date
    
    age_at_death = died.year - born.year - ((died.month, died.day) < (born.month, born.day))
    
    return age_at_death

## Execute: Get people date of birth and death

In [9]:
death_age(page_list)

{'Janis Joplin': 27, 'Arabinda Muduli': 56, 'Ludvig van Beethoven': -57}