Let's get a list of all of our MPs first

In [None]:
import requests
import json

#Dewan Rakyat MP Posts in Sinar Malaysia Popit Database

dewan_rakyat_request = requests.get('https://sinar-malaysia.popit.mysociety.org/api/v0.1/organizations/53633b5a19ee29270d8a9ecf')

posts = json.loads(dewan_rakyat_request.content)['result']['memberships']

Now we will load up information on the MPs holding these posts

In [None]:
MP_ids = []

#Current MPs should not have any end dates
for member in posts:
    #db not always clean, making sure either no end_date or empty value
    
    if member.has_key('end_date'):
        if member['end_date'] == '':
            MP_ids.append(member['person_id'])
            
    if not member.has_key('end_date'):
        MP_ids.append(member['person_id'])
        
#There are some bad data during automated import from GE13 results.
#This will be cleaned up in a few weeks, but we check for duplicates just in case.
#There should only ever be 1 person holding 1 post at a moment in time.

def uniq(seq):
    #http://www.peterbe.com/plog/uniqifiers-benchmark
    seen = set()
    seen_add = seen.add
    return [ x for x in seq if not (x in seen or seen_add(x))]

def person(person_id):
    #Load up information of persons from Popit database
    req = requests.get('https://sinar-malaysia.popit.mysociety.org/api/v0.1/persons/' + person_id)
    return json.loads(req.content)['result']

import datetime
from dateutil import parser

def age(str):
    #calculate age based on date strings stored in Popit
    born = parser.parse(str)
    today = datetime.date.today()
    age = today.year - born.year - ((today.month, today.day) < (born.month, born.day)) 
    return int(age)
    
MP_ids = uniq(MP_ids)

#Pull down the data of current MPs from Popit Database
MPs = []
for id in MP_ids:
    person_data = person(id)
    if person_data.has_key('birth_date'):
        if person_data['birth_date']:
            #add current age in addition to the values in Popit
            person_data['age'] = age(person_data['birth_date'])
            MPs.append(person_data)
    

Fun with data!

In [None]:
import numpy

#list of ages
ages = []
for i in MPs:
    ages.append(int(i['age']))
    
print numpy.median(ages)
print numpy.max(ages)
print numpy.min(ages)


Pandas

pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.

If you're learning Python to work with data, it's worth getting used to this library, as it provides pretty much all you will need when working with data from importing and cleaning messy data, to exporting it, including working with very large data sets.

A lot of the earlier work, such as cleaning, getting unique values etc. could be done easily with built-in functions of pandas as a DataFrame.

In [None]:
import pandas

pandas.DataFrame(MPs)

In [None]:
df = pandas.DataFrame(MPs)


print df['age'].median()
print df['age'].max()
print df['age'].min()

We could have dropped duplicates from bad data: 

df.drop_duplicates('id')

Parse and set birth_date column as datetime to calculate age without parsing it manually:

df['birth_date']= pandas.to_datetime(df['birth_date'])

Best of all after cleaning up the data, we can easily export it to CSV format where it is more easily usable by normal users in spreadsheets or plotting charts.

In [23]:
MP_source = {'name': df['name'], 'birth_date':df['birth_date'],'age':df['age']}

In [24]:
MP_Names = pandas.DataFrame(MP_source)

MP_Names.sort('age')

Unnamed: 0,age,birth_date,name
33,32,1982-10-17,Zairil Khir Johari
34,33,1982-05-13,Sim Chee Keong
75,34,1981-01-27,Teo Nie Ching
47,34,1980-11-19,Nurul Izzah Anwar
30,35,1979-08-09,Kasthuriraani Patto
6,37,1977-11-27,Liew Chin Tong
59,37,1977-10-19,Chua Tee Yong
49,38,1977-04-28,Loke Siew Fook
54,38,1976-11-13,Mas Ermieyati Samsudin
2,39,1975-09-12,Ong Kian Ming


In [None]:
grouped = MP_Names.groupby('age')
grouped.age.count().plot(kind='bar',figsize=(15,15))