In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
import json

In [2]:
with open('Data/incomeTax.json') as data_file:    
    data = json.load(data_file)

# Testing
print data[0]["data"]["New Brunswick"][0]["rate"]
print data[0]["data"]["New Brunswick"][0]["upper"]

9.68
39305


In [3]:
def getAvgRate(location, income, data):
    
    def govtAvg(govt):
        # Fetch the tax bracket data
        brackets = data[govt]
        untaxed_income = income # This will keep track of the part of the income that has not been taxed yet
        tax_paid = 0.0 # A running total of the amount of tax paid
        lower = 0 # The lower value of tax bracket
        n = 0 # Keeps track of which tax bracket we're in
        while untaxed_income > 0:
            # Fetch the current bracket
            bracket = brackets[n]
            # This checks to see if we're on the final bracket
            try:
                upper = int(bracket["upper"]) # Upper value of tax bracket
                rate = float(bracket["rate"])/100.0 # Tax rate of bracket
            except:
                upper = "max" # There is no upper value for the final bracket
                rate = float(bracket["rate"])/100.0

            if upper == "max" or income < upper:
                # Does the calculation for the final bracket the person falls into
                tax_paid = tax_paid + untaxed_income*rate
                untaxed_income = 0
            else:
                # Gets the tax paid on the brackets a person is totally above
                tax_paid = tax_paid + (upper - lower)*rate
                untaxed_income = untaxed_income - (upper - lower)
                lower = upper
                n = n + 1
        # This is the average rate the person ends up paying
        return round(tax_paid / income * 100.0, 2)
    
    avg_fed_tax = govtAvg("Federal")
    avg_prov_tax = govtAvg(location)
    
    avg_tax_total = round(avg_prov_tax + avg_fed_tax, 2)

    return {str(income) : {"avg_fed": avg_fed_tax, "avg_prov": avg_prov_tax, "avg_total": avg_tax_total}}

In [4]:
# Gets the average tax paid for a number of incomes for a given province in a given year
def provAvgs(province, year_data):
    incomeList = xrange(10000, 250000, 10000)
    return [getAvgRate(province, income, year_data) for income in incomeList]

In [5]:
def processYear(year_data):
    yearData = {}
    for province, brackets in year_data.iteritems():
        yearData[province] = provAvgs(province, year_data)
    return yearData

In [6]:
allData = {}
for year_record in data:
    year = year_record["year"]
    year_data = year_record["data"]
    allData[year] = processYear(year_data)
json.dumps(allData)

'{"2014": {"Ontario": [{"10000": {"avg_total": 20.05, "avg_prov": 5.05, "avg_fed": 15.0}}, {"20000": {"avg_total": 20.05, "avg_prov": 5.05, "avg_fed": 15.0}}, {"30000": {"avg_total": 20.05, "avg_prov": 5.05, "avg_fed": 15.0}}, {"40000": {"avg_total": 20.05, "avg_prov": 5.05, "avg_fed": 15.0}}, {"50000": {"avg_total": 21.71, "avg_prov": 5.86, "avg_fed": 15.85}}, {"60000": {"avg_total": 23.28, "avg_prov": 6.41, "avg_fed": 16.87}}, {"70000": {"avg_total": 24.4, "avg_prov": 6.8, "avg_fed": 17.6}}, {"80000": {"avg_total": 25.24, "avg_prov": 7.09, "avg_fed": 18.15}}, {"90000": {"avg_total": 26.21, "avg_prov": 7.54, "avg_fed": 18.67}}, {"100000": {"avg_total": 27.31, "avg_prov": 7.9, "avg_fed": 19.41}}, {"110000": {"avg_total": 28.21, "avg_prov": 8.2, "avg_fed": 20.01}}, {"120000": {"avg_total": 28.96, "avg_prov": 8.45, "avg_fed": 20.51}}, {"130000": {"avg_total": 29.58, "avg_prov": 8.65, "avg_fed": 20.93}}, {"140000": {"avg_total": 30.2, "avg_prov": 8.83, "avg_fed": 21.37}}, {"150000": {"avg

In [31]:
# Convert the results into an object that can be visualized
# with each province representing a line, the income as the x-value
# and the avg tage as the y-value
year_prov_avgs = allData["2014"]
year_dict = {}
for prov, income_data in year_prov_avgs.iteritems():
    prov_dict = {}
    for income_record in income_data:
        income = income_record.keys()[0]
        avg_tax = income_record.values()[0]['avg_total']
        prov_dict[int(income)] = avg_tax
    prov_series = Series(prov_dict)
    year_dict[prov] = prov_series
year_df = pd.concat(year_dict, axis=1)
year_df = year_df.drop('Federal', axis=1)
year_df

Unnamed: 0,New Brunswick,Newfoundland and Labrador,Nova Scotia,Ontario,Prince Edward Island
10000,24.68,22.7,23.79,20.05,24.8
20000,24.68,22.7,23.79,20.05,24.8
30000,24.68,22.7,23.87,20.05,24.8
40000,24.77,23.39,25.39,20.05,25.6
50000,26.63,25.06,27.15,21.71,27.09
60000,28.32,26.63,28.81,23.28,28.54
70000,29.53,27.77,30.21,24.4,29.82
80000,30.47,28.71,31.27,25.24,30.93
90000,31.46,29.53,32.18,26.21,31.89
100000,32.57,30.52,33.3,27.31,32.98


In [16]:
something.keys()[0]

'240000'

In [20]:
something.values()[0]['avg_total']

40.29