# Workflow: Return Age at Death

In [59]:
page_list = ['Jim Morrison', 'Janis Joplin', 'Arabinda Muduli', 'Rod Stewart', 'Nevermind', 'Ludvig van Beethoven', 'Paul Banks (American musician)']

page_list_len = len(page_list)

In [2]:
def death_age(page_list):
    """
    Given a list of wikipedia page names, return a dictionary = ['Person Name']:Age at death
    """
    
    # Requirements
    import pandas as pd
    
    # Create an empty dictionary to be filled
    age_at_death = {}
    
    for page in page_list:
        
        try:
            person_dates = person_born_dead(page)  # returns list(birth date, death date)
            
            if len(person_dates) == 2:  # only if both birth and death date are present
                birth = pd.to_datetime(person_dates[0])
                death = pd.to_datetime(person_dates[1])
                
                death_age = calculate_age(birth, death)  # factoring for partial years calculate age at death
                
                age_at_death[page] = death_age  # add an element to the dictionary
                    
        except:
            continue  # skip page if doesn't have birth and death date
        
    return age_at_death

Dependencies for **death_age**

In [67]:
def person_born_dead(person_name):
    """
    Provided with a Wikipedia Page Name (str) (e.g., 'Jim Morrison')
    Return a set(Date born, Date died)
    """
    import requests
    import re
    
    wiki_url = create_wiki_url(person_name)
    text = scrape_wiki_page(wiki_url)
    
    # create an empty set to have dates added to; set protects against dups
    dates = set()
               
    try:
        start = text.find("Born")
        page_slice = text[start+4:start+1000] #slice after born
                
        pattern01 = re.compile(r'\b\w{3,9}\s\d{1,2}\W\s\d{4}')  # Month Date, Year
        pattern02 = re.compile(r'\b\d{1,2}\s\w{3,9}\s\d{4}')  # Date Month Year
             
        matches01 = pattern01.finditer(page_slice)
        
        for match in matches01:
            dates.add(match.group(0))
                      
        if len(dates) == 0:
            
            matches02 = pattern02.finditer(page_slice)
            
            for match in matches02:
                dates.add(match.group(0))
                
        return dates
    
    except:
        print(person_name, "is not a human! function: person_born_dead")

In [4]:
def create_wiki_url(page_name):
    url = "http://en.wikipedia.org/wiki/"
    format_page_name = page_name.strip().replace(" ","_")
    
    wiki_url = url + format_page_name
        
    return wiki_url

In [5]:
def scrape_wiki_page(wiki_url):
    """
    Given wikipedia url, return scraped text
    """
    
    import requests
    
    try:
        wiki_page = requests.get(wiki_url)
        wiki_page_text = wiki_page.text
        return wiki_page_text
    
    except:
        return "They don't have a wikipedia page!" 

In [64]:
# code source: https://stackoverflow.com/questions/2217488/age-from-birthdate-in-python

def calculate_age(born, died):
    """
    Give date at birth and death, return age
    """
    print (born, died)
    from datetime import date
    
    age_at_death = died.year - born.year - ((died.month, died.day) < (born.month, born.day))
   
    
    return age_at_death

## Execute: Get people date of birth and death

In [42]:
def time_per(operation, yur_list, digits = 2):
    """
    Given an method and the target of the method
    Return the time it takes to process an item in your list
    
    required: time
    """
    # import and create an instance
    import time    
    start = time.time()
    
    # perform your operation
    operation(yur_list)
    
    # end the instance
    end = time.time()
    
    # operation time
    time_taken = end-start
    
    # how many items on the list that were processed
    yurlist_len = len(yur_list)
    
    # unit time
    time_per = (end-start) / yurlist_len
    
    return f'It takes {round(time_per, digits)} s to process an item in your list.'

In [68]:
death_age(page_list)

{}

In [43]:
time_per(death_age,page_list)

'It takes 0.56 s to process an item in your list.'

## Under Construction

In [48]:
death_list = death_age(page_list)

1970-10-04 00:00:00 1943-01-19 00:00:00
1961-09-01 00:00:00 2018-03-01 00:00:00
1827-03-26 00:00:00 1770-12-17 00:00:00


{'Janis Joplin': -28, 'Arabinda Muduli': 56, 'Ludvig van Beethoven': -57}

In [53]:
import pandas as pd

death01 = pd.to_datetime("1970-10-04 00:00:00")
death02 = pd.to_datetime("1943-01-19 00:00:00")

In [58]:
calculate_age(death01,death02)

1970-10-04 00:00:00 1943-01-19 00:00:00


-27

In [62]:
# code source: https://stackoverflow.com/questions/2217488/age-from-birthdate-in-python

def calculate_age(born, died):
    """
    Give date at birth and death, return age
    """
    
    from datetime import date
    
    age_at_death = died.year - born.year
    #- ((died.month, died.day) < (born.month, born.day))
    print (born, died)
    
    return age_at_death

In [66]:
person_born_dead('Jim Morrison')

['July 3, 1971', 'December 8, 1943']