In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
import json
# import matplotlib.pyplot as plt
# %matplotlib inline



In [32]:
with open('web/data/incomeTax.json') as data_file:    
    data = json.load(data_file)

# Testing
print data[0]["data"]["New Brunswick"]["brackets"][0]["rate"]
print data[0]["data"]["New Brunswick"]["brackets"][0]["upper"]

9.68
39305


In [33]:
def getAvgRate(location, income, data):
    
    def govtTaxAmount(govt):
        # Fetch the tax bracket data
        brackets = data[govt]["brackets"]
        untaxed_income = income # This will keep track of the part of the income that has not been taxed yet
        tax_paid = 0.0 # A running total of the amount of tax paid
        lower = 0 # The lower value of tax bracket
        n = 0 # Keeps track of which tax bracket we're in
        while untaxed_income > 0:
            # Fetch the current bracket
            bracket = brackets[n]
            # This checks to see if we're on the final bracket
            try:
                upper = int(bracket["upper"]) # Upper value of tax bracket
                rate = float(bracket["rate"])/100.0 # Tax rate of bracket
            except:
                upper = "max" # There is no upper value for the final bracket
                rate = float(bracket["rate"])/100.0

            if upper == "max" or income < upper:
                # Does the calculation for the final bracket the person falls into
                tax_paid = tax_paid + untaxed_income*rate
                untaxed_income = 0
            else:
                # Gets the tax paid on the brackets a person is totally above
                tax_paid = tax_paid + (upper - lower)*rate
                untaxed_income = untaxed_income - (upper - lower)
                lower = upper
                n = n + 1
        # This is the average rate the person ends up paying
        # Quebec abatement adjustment: http://www.fin.gc.ca/fedprov/altpay-eng.asp
        if (location == "Quebec" and govt == "Federal"):
            tax_paid = tax_paid*0.835
        return tax_paid
    
    # Get the base amount of tax that you pay
    fed_tax = govtTaxAmount("Federal")
    prov_tax = govtTaxAmount(location)
    
    # A function for deducting the personal exemption amount federally and provincially
    def govtCredits(govt):
        credit = data[govt]["credits"][0]
        credit_amount = int(credit["amount"])*float(credit["percent"])/100.0
        # Quebec abatement adjustment: http://www.fin.gc.ca/fedprov/altpay-eng.asp
        if (location == "Quebec" and govt == "Federal"):
            credit_amount = credit_amount*0.835
        return credit_amount
    
    # Subtract the tax credits
    fed_tax = fed_tax - govtCredits("Federal")
    prov_tax = prov_tax - govtCredits(location)
    
    # If you have any surtaxes to pay, this takes that into account
    try:
        # Get the surtaxes. Throws an error if there are none.
        surtaxes = data[location]["surtaxes"]
        prov_tax_with_surtax = prov_tax
        for surtax in surtaxes:
            lower_limit = int(surtax["lower"])
            rate = float(surtax["rate"])/100.0
            # If the amount of tax you pay is above the lower limit for each surtax,
            # add the amont of surtax you have to pay to the running total
            if prov_tax_after_credit > lower_limit:
                prov_tax_with_surtax += (prov_tax_after_credit - lower_limit) * rate
        prov_tax = prov_tax_with_surtax
    except:
        pass
    
    # Adds the amount for the health fee that some provinces have
    try:
        health_fee = 0
        # Get the health fee, or throw an error if there are none
        health_brackets = data[location]["healthfee"]
        for bracket in health_brackets:
            lower = int(bracket["lower"])
            upper = int(bracket["upper"])
            value = int(bracket["value"])
            rate = float(bracket["rate"])
            if (income >= lower) and (income <= upper):
                health_fee = health_fee + value
                health_fee = health_fee + (income - lower - 1) * rate / 100.0
                prov_tax = prov_tax + health_fee
    except:
        pass
    
    # This is for the special health fee that quebec has
    try:
        health_fee = 0
        # Get the health fee, or throw an error if there are none
        health_brackets = data[location]["healthfeeqc"]
        for bracket in health_brackets:
            lower = int(bracket["lower"])
            upper = int(bracket["upper"])
            value = float(bracket["value"])
            rate = float(bracket["rate"])
            max_amount = float(bracket["max"])
            if (income >= lower) and (income <= upper):
                health_fee = health_fee + value
                health_fee = health_fee + (income - lower - 1) * rate / 100.0
                # Limit the tax to the upper bound
                if (health_fee > max_amount):
                    health_fee = max_amount
                prov_tax = prov_tax + health_fee
    except:
        pass
    
    # Calculate the percentage tax, and return 0 if it is negative
    avg_fed_tax = max(round(fed_tax / income * 100, 2), 0)
    avg_prov_tax = max(round(prov_tax / income * 100, 2), 0)
    avg_tax_total = max(round(avg_prov_tax + avg_fed_tax, 2), 0)

    return {str(income) : {"avg_fed": avg_fed_tax, "avg_prov": avg_prov_tax, "avg_total": avg_tax_total}}

In [34]:
# Testing
getAvgRate("British Columbia", 100000, data[1]["data"])

{'100000': {'avg_fed': 17.6, 'avg_prov': 7.11, 'avg_total': 24.71}}

In [35]:
# Gets the average tax paid for a number of incomes for a given province in a given year
def provAvgs(province, year_data, year):
    incomeList = xrange(5000, 500000, 5000)
    return [getAvgRate(province, income, year_data) for income in incomeList]

In [38]:
def processYear(year_data, year):
    year_result = {}
    for province, brackets in year_data.iteritems():
        year_result[province] = provAvgs(province, year_data, year)
    return year_result

In [39]:
allData = {}
for year_record in data:
    year = year_record["year"]
    year_data = year_record["data"]
    allData[year] = processYear(year_data, year)
print allData

{u'2015': {u'British Columbia': [{'5000': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'10000': {'avg_total': 0.03, 'avg_prov': 0.03, 'avg_fed': 0}}, {'15000': {'avg_total': 5.38, 'avg_prov': 1.71, 'avg_fed': 3.67}}, {'20000': {'avg_total': 9.05, 'avg_prov': 2.55, 'avg_fed': 6.5}}, {'25000': {'avg_total': 11.25, 'avg_prov': 3.05, 'avg_fed': 8.2}}, {'30000': {'avg_total': 12.72, 'avg_prov': 3.38, 'avg_fed': 9.34}}, {'35000': {'avg_total': 13.77, 'avg_prov': 3.62, 'avg_fed': 10.15}}, {'40000': {'avg_total': 14.69, 'avg_prov': 3.94, 'avg_fed': 10.75}}, {'45000': {'avg_total': 15.63, 'avg_prov': 4.36, 'avg_fed': 11.27}}, {'50000': {'avg_total': 17.03, 'avg_prov': 4.69, 'avg_fed': 12.34}}, {'55000': {'avg_total': 18.19, 'avg_prov': 4.97, 'avg_fed': 13.22}}, {'60000': {'avg_total': 19.15, 'avg_prov': 5.2, 'avg_fed': 13.95}}, {'65000': {'avg_total': 19.96, 'avg_prov': 5.39, 'avg_fed': 14.57}}, {'70000': {'avg_total': 20.65, 'avg_prov': 5.55, 'avg_fed': 15.1}}, {'75000': {'avg_total': 21

In [40]:
# Convert the results into an object that can be visualized
# with each province representing a line, the income as the x-value
# and the avg tax as the y-value
def createYearDict(year):
    year_prov_avgs = allData[year]
    year_dict = {}
    for prov, income_data in year_prov_avgs.iteritems():
        prov_dict = {}
        for income_record in income_data:
            income = income_record.keys()[0]
            avg_tax = income_record.values()[0]['avg_total']
            prov_dict[int(income)] = avg_tax
        prov_series = Series(prov_dict)
        year_dict[prov] = prov_series
    year_df = pd.concat(year_dict, axis=1)
    year_df = year_df.drop('Federal', axis=1)
    return year_dict
createYearDict("2014")

{u'Alberta': 5000       0.00
 10000      0.00
 15000      3.86
 20000      7.76
 25000     11.21
 30000     13.50
 35000     15.15
 40000     16.37
 45000     17.50
 50000     18.95
 55000     20.14
 60000     21.13
 65000     21.96
 70000     22.68
 75000     23.30
 80000     23.85
 85000     24.32
 90000     24.84
 95000     25.43
 100000    25.96
 105000    26.44
 110000    26.87
 115000    27.26
 120000    27.63
 125000    27.97
 130000    28.27
 135000    28.56
 140000    28.91
 145000    29.25
 150000    29.58
           ...  
 350000    34.96
 355000    35.02
 360000    35.08
 365000    35.13
 370000    35.18
 375000    35.24
 380000    35.28
 385000    35.33
 390000    35.37
 395000    35.42
 400000    35.47
 405000    35.51
 410000    35.56
 415000    35.59
 420000    35.64
 425000    35.67
 430000    35.72
 435000    35.75
 440000    35.79
 445000    35.82
 450000    35.86
 455000    35.90
 460000    35.92
 465000    35.96
 470000    35.99
 475000    36.03
 480000    36.06
 4

year_df.plot(figsize=(20, 12))

In [41]:
def convertYearD3(year_data):
    d3_year_data = []
    for province, avgs in year_data.iteritems():
        if province != 'Federal':
            line_data = {"values":[], "key": ""}
            line_data["key"] = province
            for income, avg in avgs.iteritems():
                line_data["values"].append({"x": income, "y": round(avg/100.0,3)})
            d3_year_data.append(line_data)
    return d3_year_data
convertYearD3(createYearDict("2014"))

[{'key': u'British Columbia',
  'values': [{'x': 5000, 'y': 0.0},
   {'x': 10000, 'y': 0.001},
   {'x': 15000, 'y': 0.056},
   {'x': 20000, 'y': 0.092},
   {'x': 25000, 'y': 0.114},
   {'x': 30000, 'y': 0.128},
   {'x': 35000, 'y': 0.139},
   {'x': 40000, 'y': 0.148},
   {'x': 45000, 'y': 0.158},
   {'x': 50000, 'y': 0.172},
   {'x': 55000, 'y': 0.184},
   {'x': 60000, 'y': 0.193},
   {'x': 65000, 'y': 0.201},
   {'x': 70000, 'y': 0.208},
   {'x': 75000, 'y': 0.214},
   {'x': 80000, 'y': 0.221},
   {'x': 85000, 'y': 0.227},
   {'x': 90000, 'y': 0.234},
   {'x': 95000, 'y': 0.242},
   {'x': 100000, 'y': 0.249},
   {'x': 105000, 'y': 0.255},
   {'x': 110000, 'y': 0.262},
   {'x': 115000, 'y': 0.268},
   {'x': 120000, 'y': 0.274},
   {'x': 125000, 'y': 0.279},
   {'x': 130000, 'y': 0.284},
   {'x': 135000, 'y': 0.289},
   {'x': 140000, 'y': 0.294},
   {'x': 145000, 'y': 0.299},
   {'x': 150000, 'y': 0.304},
   {'x': 155000, 'y': 0.308},
   {'x': 160000, 'y': 0.313},
   {'x': 165000, 'y': 

In [42]:
years = ["2014", "2015", "2016"]
all_d3_data = {}
for year in years:
    single_year = createYearDict(year)
    single_year_d3 = convertYearD3(single_year)
    all_d3_data[year] = single_year_d3
all_d3_data

{'2014': [{'key': u'British Columbia',
   'values': [{'x': 5000, 'y': 0.0},
    {'x': 10000, 'y': 0.001},
    {'x': 15000, 'y': 0.056},
    {'x': 20000, 'y': 0.092},
    {'x': 25000, 'y': 0.114},
    {'x': 30000, 'y': 0.128},
    {'x': 35000, 'y': 0.139},
    {'x': 40000, 'y': 0.148},
    {'x': 45000, 'y': 0.158},
    {'x': 50000, 'y': 0.172},
    {'x': 55000, 'y': 0.184},
    {'x': 60000, 'y': 0.193},
    {'x': 65000, 'y': 0.201},
    {'x': 70000, 'y': 0.208},
    {'x': 75000, 'y': 0.214},
    {'x': 80000, 'y': 0.221},
    {'x': 85000, 'y': 0.227},
    {'x': 90000, 'y': 0.234},
    {'x': 95000, 'y': 0.242},
    {'x': 100000, 'y': 0.249},
    {'x': 105000, 'y': 0.255},
    {'x': 110000, 'y': 0.262},
    {'x': 115000, 'y': 0.268},
    {'x': 120000, 'y': 0.274},
    {'x': 125000, 'y': 0.279},
    {'x': 130000, 'y': 0.284},
    {'x': 135000, 'y': 0.289},
    {'x': 140000, 'y': 0.294},
    {'x': 145000, 'y': 0.299},
    {'x': 150000, 'y': 0.304},
    {'x': 155000, 'y': 0.308},
    {'x': 16

In [43]:
with open('webcustom/avgTaxAllHealth.json', 'w') as data_file:    
    json.dump(all_d3_data, data_file)