In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
import json
import locale
locale.setlocale(locale.LC_ALL, 'en_US')
# import matplotlib.pyplot as plt
# %matplotlib inline



'en_US'

In [5]:
with open('web/data/incomeTax.json') as data_file:    
    data = json.load(data_file)

# Testing
print data[0]["data"]["New Brunswick"]["brackets"][0]["rate"]
print data[0]["data"]["New Brunswick"]["brackets"][0]["upper"]

9.68
32730


In [12]:
def getAvgRate(location, income, data):
    
    def govtTaxAmount(govt):
        # Fetch the tax bracket data
        brackets = data[govt]["brackets"]
        untaxed_income = income # This will keep track of the part of the income that has not been taxed yet
        tax_paid = 0.0 # A running total of the amount of tax paid
        lower = 0 # The lower value of tax bracket
        n = 0 # Keeps track of which tax bracket we're in
        while untaxed_income > 0:
            # Fetch the current bracket
            bracket = brackets[n]
            # This checks to see if we're on the final bracket
            try:
                upper = int(bracket["upper"]) # Upper value of tax bracket
                rate = float(bracket["rate"])/100.0 # Tax rate of bracket
            except:
                upper = "max" # There is no upper value for the final bracket
                rate = float(bracket["rate"])/100.0

            if upper == "max" or income < upper:
                # Does the calculation for the final bracket the person falls into
                tax_paid = tax_paid + untaxed_income*rate
                untaxed_income = 0
            else:
                # Gets the tax paid on the brackets a person is totally above
                tax_paid = tax_paid + (upper - lower)*rate
                untaxed_income = untaxed_income - (upper - lower)
                lower = upper
                n = n + 1
        # This is the average rate the person ends up paying
        # Quebec abatement adjustment: http://www.fin.gc.ca/fedprov/altpay-eng.asp
        if (location == "Quebec" and govt == "Federal"):
            tax_paid = tax_paid*0.835
        return tax_paid
    
    # Get the base amount of tax that you pay
    fed_tax = govtTaxAmount("Federal")
    if (location != "Federal"):
        prov_tax = govtTaxAmount(location)
    else:
        prov_tax = 0
    
    # A function for deducting the personal exemption amount federally and provincially
    def govtCredits(govt):
        credit = data[govt]["credits"][0]
        credit_amount = int(credit["amount"])*float(credit["percent"])/100.0
        # Quebec abatement adjustment: http://www.fin.gc.ca/fedprov/altpay-eng.asp
        if (location == "Quebec" and govt == "Federal"):
            credit_amount = credit_amount*0.835
        return credit_amount
    
    # Subtract the tax credits
    fed_tax = fed_tax - govtCredits("Federal")
    if (location != "Federal"):
        prov_tax = prov_tax - govtCredits(location)
    else:
        prov_tax = 0
    
    # If you have any surtaxes to pay, this takes that into account
    try:
        # Get the surtaxes. Throws an error if there are none.
        surtaxes = data[location]["surtaxes"]
        prov_tax_with_surtax = prov_tax
        for surtax in surtaxes:
            lower_limit = int(surtax["lower"])
            rate = float(surtax["rate"])/100.0
            # If the amount of tax you pay is above the lower limit for each surtax,
            # add the amont of surtax you have to pay to the running total
            if prov_tax_after_credit > lower_limit:
                prov_tax_with_surtax += (prov_tax_after_credit - lower_limit) * rate
        prov_tax = prov_tax_with_surtax
    except:
        pass
    
    # Adds the amount for the health fee that some provinces have
    try:
        health_fee = 0
        # Get the health fee, or throw an error if there are none
        health_brackets = data[location]["healthfee"]
        for bracket in health_brackets:
            lower = int(bracket["lower"])
            upper = int(bracket["upper"])
            value = int(bracket["value"])
            rate = float(bracket["rate"])
            if (income >= lower) and (income <= upper):
                health_fee = health_fee + value
                health_fee = health_fee + (income - lower - 1) * rate / 100.0
                prov_tax = prov_tax + health_fee
    except:
        pass
    
    # This is for the special health fee that quebec has
    try:
        health_fee = 0
        # Get the health fee, or throw an error if there are none
        health_brackets = data[location]["healthfeeqc"]
        for bracket in health_brackets:
            lower = int(bracket["lower"])
            upper = int(bracket["upper"])
            value = float(bracket["value"])
            rate = float(bracket["rate"])
            max_amount = float(bracket["max"])
            if (income >= lower) and (income <= upper):
                health_fee = health_fee + value
                health_fee = health_fee + (income - lower - 1) * rate / 100.0
                # Limit the tax to the upper bound
                if (health_fee > max_amount):
                    health_fee = max_amount
                prov_tax = prov_tax + health_fee
    except:
        pass
    
    # Quebec has different rates for cpp and EI
    if (location != "Quebec"):
        cpp = data["Federal"]["cpp"]
        ei = data["Federal"]["ei"]
    else:
        cpp = data["Quebec"]["cpp"]
        ei = data["Quebec"]["ei"]
        
    # Adds the CPP tax
    max_income = int(cpp["maxIncome"])
    exemption = int(cpp["exemption"])
    rate = float(cpp["rate"])
    if (income > max_income):
        pen_income = max_income
    else:
        pen_income = income
    pen_income = pen_income - exemption
    cpp_amount = pen_income * rate / 100.0
    fed_tax = fed_tax + cpp_amount
    
    # Adds the EI tax
    max_income = int(ei["maxIncome"])
    rate = float(ei["rate"])
    if (income > max_income):
        ei_income = max_income
    else:
        ei_income = income
    ei_amount = ei_income * rate / 100.0
    fed_tax = fed_tax + ei_amount
    
    # Adds the Quebec Parental Insurance Plan
    if (location == "Quebec"):
        try:
            qpip = data["Quebec"]["qpip"]
            max_income = int(qpip["maxIncome"])
            rate = float(qpip["rate"])

            if (income > max_income):
                qpip_income = max_income
            else:
                qpip_income = income

            qpip_amount = qpip_income * rate / 100.0
            prov_tax = prov_tax + qpip_amount
        except:
            pass
    
    # Calculate the percentage tax, and return 0 if it is negative
    avg_fed_tax = max(round(fed_tax / income * 100, 2), 0)
    avg_prov_tax = max(round(prov_tax / income * 100, 2), 0)
    avg_tax_total = max(round(avg_prov_tax + avg_fed_tax, 2), 0)

    return {str(income) : {"avg_fed": avg_fed_tax, "avg_prov": avg_prov_tax, "avg_total": avg_tax_total}}

In [7]:
### Testing
getAvgRate("New Brunswick", 100000, data[4]["data"])

Before health: 12117.3918
After health: 12117.3918


{'100000': {'avg_fed': 20.31, 'avg_prov': 12.12, 'avg_total': 32.43}}

In [8]:
data[4]["year"]

u'20162'

income = 80000
location = "Quebec"
test_data = data[0]["data"]

qpip = test_data["Quebec"]["qpip"]
max_income = int(qpip["maxIncome"])
rate = float(qpip["rate"])

if (income > max_income):
    qpip_income = max_income
else:
    qpip_income = income

qpip_amount = qpip_income * rate / 100.0
print qpip_amount

In [9]:
from itertools import chain
# Gets the average tax paid for a number of incomes for a given province in a given year
def provAvgs(province, year_data, year):
    incomeList = chain(range(1000, 10000, 100), range(10000, 60000, 1000), range(60000, 505000, 5000))
    return [getAvgRate(province, income, year_data) for income in incomeList]

In [10]:
def processYear(year_data, year):
    year_result = {}
    for province, brackets in year_data.iteritems():
        year_result[province] = provAvgs(province, year_data, year)
    return year_result

## Create a Dictionary for the Data

In [13]:
allData = {}
for year_record in data:
    year = year_record["year"]
    year_data = year_record["data"]
    allData[year] = processYear(year_data, year)
print allData

{u'2015': {u'British Columbia': [{'1000': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1100': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1200': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1300': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1400': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1500': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1600': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1700': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1800': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'1900': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'2000': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'2100': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'2200': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'2300': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'2400': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'2500': {'avg_total': 0.0, 'avg_prov': 0, 'avg_fed': 0}}, {'2600': {'avg_total': 

## Create a Dimensional Dataframe
### Combined Federal and Provincial Rate

In [14]:
# Convert the results into an object that can be visualized
# with each province representing a line, the income as the x-value
# and the avg tax as the y-value
def createYearDfCombined(year):
    year_prov_avgs = allData[year]
    year_dict = {}
    for prov, income_data in year_prov_avgs.iteritems():
        prov_dict = {}
        for income_record in income_data:
            income = income_record.keys()[0]
            avg_tax = income_record.values()[0]['avg_total']
            prov_dict[int(income)] = avg_tax
        prov_series = Series(prov_dict)
        year_dict[prov] = prov_series
    # Convert the dictionary to a dataframe
    year_df = pd.concat(year_dict, axis=1)
    # Drop the Federal column since we're only interested in combined rates
    year_df = year_df.drop('Federal', axis=1)
    # Assign the year to a column to use in a dimensional model later
    year_df["Year"] = year
    # Get the income out of the index so that we can use it in a dimensional model later
    year_df = year_df.reset_index(level=0)
    year_df = year_df.rename(columns={"index": "Income"})
    return year_df

In [16]:
years = ["2005", "2014", "2015", "2016", "20162"]
array_of_dfs = []
for year in years:
    avg_rates_combined = createYearDfCombined(year)
    df = pd.melt(avg_rates_combined, id_vars=["Year", "Income"], var_name="Province", value_name="Average Income Tax")
    array_of_dfs.append(df)
merged_data = pd.concat(array_of_dfs, ignore_index=True, axis=0)
merged_data.to_csv(path_or_buf="./MiscData/AvgRatesCombined.csv", index=False)
merged_data[(merged_data["Province"] == "New Brunswick") & (merged_data["Income"] == 350000)]

Unnamed: 0,Year,Income,Province,Average Income Tax
885,2005,350000,New Brunswick,43.28
3862,2014,350000,New Brunswick,42.56
6839,2015,350000,New Brunswick,45.66
9816,2016,350000,New Brunswick,45.37
12793,20162,350000,New Brunswick,47.13


### Adjust incomes based on inflation

Tax parameters are indexed using the Consumer Price Index (CPI), as reported by Statistics Canada. An indexed parameter for a new taxation year is equal to **A divided by B**, where:

**A** is the average monthly value of the CPI for the 12 months ending in September of the preceding year (e.g. October 2004 to September 2005 is used to calculate the indexed parameters for 2006); and
**B** is the average CPI calculated for the preceding 12-month period (e.g. October 2003 to September 2004).

_Source: http://www.fin.gc.ca/n05/05-087-eng.asp (modified slightly)_

In [17]:
# Load the file that contains the CPI indexes as calculated above
tax_index = pd.read_csv(filepath_or_buffer="./MiscData/TaxCPIIndex.csv")
def incAdjust(row):
    year = int(row["Year"])
    # Get the index values between the year and 2016
    index_values = tax_index.loc[tax_index["Year"] > year,"CPI Change"].values / 100.0 + 1 
    multiplier = np.prod(index_values) # Combine them
    return np.round(row["Income"] * multiplier)

In [18]:
# This is just for one-off conversions
tax_index = pd.read_csv(filepath_or_buffer="./MiscData/TaxCPIIndex.csv")
def incAdjustSingle(year, income):
    # Get the index values between the year and 2016
    index_values = tax_index.loc[tax_index["Year"] > year,"CPI Change"].values / 100.0 + 1 
    multiplier = np.prod(index_values) # Combine them
    return np.round(income * multiplier)

In [19]:
incAdjustSingle(2005, 14523)

17616.0

In [21]:
merged_data_adjusted = merged_data.copy()
merged_data_adjusted["Income Adjusted"] = 0
merged_data_adjusted["Income Adjusted"] = merged_data_adjusted.apply(incAdjust, axis=1)
merged_data_adjusted.to_csv(path_or_buf="./MiscData/AvgRatesAdjusted.csv", index=False)
merged_data_adjusted[(merged_data_adjusted["Province"] == "New Brunswick") & (merged_data_adjusted["Income"] == 350000)]

Unnamed: 0,Year,Income,Province,Average Income Tax,Income Adjusted
885,2005,350000,New Brunswick,43.28,424540.0
3862,2014,350000,New Brunswick,42.56,360577.0
6839,2015,350000,New Brunswick,45.66,354550.0
9816,2016,350000,New Brunswick,45.37,350000.0
12793,20162,350000,New Brunswick,47.13,350000.0


### Generate a list of provinces ordered by average tax rate for each income

In [40]:
# Get just the 2016 data and incomes at least 10000, since the 
# rank before that is quite meaningless since they are almost all 0
merged_data_2016 = merged_data[(merged_data["Year"] == "2016") & (merged_data["Income"] >= 10000)].copy()
merged_data_2016["Rank At Income"] = 0
# Grab the list of incomes for 2016 so that we can loop over them
incomes = merged_data_2016["Income"].unique()
# Generate a list of numbers that'll be used to rank each province
one_to_13 = Series(range(1,14))

for income in incomes:
    # Get the rows from the dataframe that equal the income of interest
    single_inc_df = merged_data_2016[merged_data_2016["Income"] == income]
    # Sort those rows in ascending order
    single_inc_df = single_inc_df.sort_values(by="Average Income Tax", ascending=False)
    # Give them a new column and assign it the value of their rank
    single_inc_df["Rank At Income"] = one_to_13.values
    merged_data_2016.loc[merged_data_2016["Income"] == income,:] = single_inc_df

# merged_data_2016.to_csv("./MiscData/RankedProvinces.csv", index=False)
merged_data_2016[(merged_data_2016["Province"] == "Quebec") & (merged_data_2016["Income"] == 100000)]

Unnamed: 0,Year,Income,Province,Average Income Tax,Rank At Income
11369,2016,100000,Quebec,34.71,1


### Provincial Only

In [22]:
def createYearDfProvOnly(year):
    year_prov_avgs = allData[year]
    year_dict = {}
    for prov, income_data in year_prov_avgs.iteritems():
        prov_dict = {}
        for income_record in income_data:
            income = income_record.keys()[0]
            avg_tax = income_record.values()[0]['avg_prov']
            prov_dict[int(income)] = avg_tax
        prov_series = Series(prov_dict)
        year_dict[prov] = prov_series
    # Convert the dictionary to a dataframe
    year_df = pd.concat(year_dict, axis=1)
    # Drop the Federal column since we're only interested in combined rates
    # Assign the year to a column to use in a dimensional model later
    year_df["Year"] = year
    # Get the income out of the index so that we can use it in a dimensional model later
    year_df = year_df.reset_index(level=0)
    year_df = year_df.rename(columns={"index": "Income"})
    return year_df
prov_only = createYearDfProvOnly("2016")
del prov_only["Federal"]

In [23]:
years = ["2005", "2014", "2015", "2016", "20162"]
array_of_dfs = []
for year in years:
    avg_rates = createYearDfProvOnly(year)
    df = pd.melt(avg_rates, id_vars=["Year", "Income"], var_name="Province", value_name="Average Income Tax")
    array_of_dfs.append(df)
merged_data = pd.concat(array_of_dfs, ignore_index=True, axis=0)
merged_data.to_csv(path_or_buf="./MiscData/AvgRatesProvOnly.csv", index=False)
merged_data

Unnamed: 0,Year,Income,Province,Average Income Tax
0,2005,1000,Alberta,0.00
1,2005,1100,Alberta,0.00
2,2005,1200,Alberta,0.00
3,2005,1300,Alberta,0.00
4,2005,1400,Alberta,0.00
5,2005,1500,Alberta,0.00
6,2005,1600,Alberta,0.00
7,2005,1700,Alberta,0.00
8,2005,1800,Alberta,0.00
9,2005,1900,Alberta,0.00


In [24]:
merged_data_adjusted = merged_data.copy()
merged_data_adjusted["Income Adjusted"] = 0
merged_data_adjusted["Income Adjusted"] = merged_data_adjusted.apply(incAdjust, axis=1)
merged_data_adjusted.to_csv(path_or_buf="./MiscData/AvgRatesProvOnlyAdjusted.csv", index=False)
merged_data_adjusted

Unnamed: 0,Year,Income,Province,Average Income Tax,Income Adjusted
0,2005,1000,Alberta,0.00,1213.0
1,2005,1100,Alberta,0.00,1334.0
2,2005,1200,Alberta,0.00,1456.0
3,2005,1300,Alberta,0.00,1577.0
4,2005,1400,Alberta,0.00,1698.0
5,2005,1500,Alberta,0.00,1819.0
6,2005,1600,Alberta,0.00,1941.0
7,2005,1700,Alberta,0.00,2062.0
8,2005,1800,Alberta,0.00,2183.0
9,2005,1900,Alberta,0.00,2305.0


## Prepare data for D3

In [537]:
# Load the file that contains the CPI indexes as calculated above
tax_index = pd.read_csv(filepath_or_buffer="./MiscData/TaxCPIIndex.csv")

# Convert the results into an object that can be visualized
# with each province representing a line, the income as the x-value
# and the avg tax as the y-value
def createYearDict(year):
    year_prov_avgs = allData[year]
    year_dict = {}
    for prov, income_data in year_prov_avgs.iteritems():
        prov_dict = {}
        for income_record in income_data:
            income = income_record.keys()[0]
            # Adjust for inflation
            index_values = tax_index.loc[tax_index["Year"] > int(year),"CPI Change"].values / 100.0 + 1
            multiplier = np.prod(index_values)
            income = np.round(float(income) * multiplier)
            
            # Store the values in the dict
            avg_tax = income_record.values()[0]['avg_total']
            # Filter out values above 300000
            if (income <= 300000):
                prov_dict[int(income)] = avg_tax
        prov_series = Series(prov_dict)
        year_dict[prov] = prov_series
    return year_dict
createYearDict("2014")

{u'Alberta': 1030       0.00
 1133       0.00
 1236       0.00
 1339       0.00
 1442       0.00
 1545       0.00
 1648       0.00
 1751       0.00
 1854       0.00
 1957       0.00
 2060       0.00
 2163       0.00
 2266       0.00
 2370       0.00
 2473       0.00
 2576       0.00
 2679       0.00
 2782       0.00
 2885       0.00
 2988       0.00
 3091       0.00
 3194       0.00
 3297       0.00
 3400       0.00
 3503       0.00
 3606       0.00
 3709       0.00
 3812       0.00
 3915       0.00
 4018       0.00
           ...  
 149382    31.55
 154533    31.80
 159684    32.04
 164835    32.26
 169986    32.46
 175138    32.65
 180289    32.83
 185440    33.00
 190591    33.17
 195742    33.32
 200893    33.47
 206044    33.60
 211195    33.73
 216346    33.86
 221498    33.98
 226649    34.09
 231800    34.20
 236951    34.31
 242102    34.40
 247253    34.50
 252404    34.59
 257555    34.68
 262706    34.77
 267857    34.85
 273009    34.93
 278160    35.00
 283311    35.07
 2

In [538]:
def convertYearD3(year_data):
    d3_year_data = []
    for province, avgs in year_data.iteritems():
        if province != 'Federal':
            line_data = {"values":[], "key": ""}
            line_data["key"] = province
            for income, avg in avgs.iteritems():
                line_data["values"].append({"x": income, "y": round(avg/100.0,3)})
            d3_year_data.append(line_data)
    return d3_year_data
# convertYearD3(createYearDict("2014"))

In [539]:
years = ["2005", "2014", "2015", "2016"]
all_d3_data = {"tax":{},"population":{}}
for year in years:
    single_year = createYearDict(year)
    single_year_d3 = convertYearD3(single_year)
    all_d3_data["tax"][year] = single_year_d3
all_d3_data

{'population': {},
 'tax': {'2005': [{'key': u'British Columbia',
    'values': [{'x': 1213, 'y': 0.0},
     {'x': 1334, 'y': 0.0},
     {'x': 1456, 'y': 0.0},
     {'x': 1577, 'y': 0.0},
     {'x': 1698, 'y': 0.0},
     {'x': 1819, 'y': 0.0},
     {'x': 1941, 'y': 0.0},
     {'x': 2062, 'y': 0.0},
     {'x': 2183, 'y': 0.0},
     {'x': 2305, 'y': 0.0},
     {'x': 2426, 'y': 0.0},
     {'x': 2547, 'y': 0.0},
     {'x': 2669, 'y': 0.0},
     {'x': 2790, 'y': 0.0},
     {'x': 2911, 'y': 0.0},
     {'x': 3032, 'y': 0.0},
     {'x': 3154, 'y': 0.0},
     {'x': 3275, 'y': 0.0},
     {'x': 3396, 'y': 0.0},
     {'x': 3518, 'y': 0.0},
     {'x': 3639, 'y': 0.0},
     {'x': 3760, 'y': 0.0},
     {'x': 3882, 'y': 0.0},
     {'x': 4003, 'y': 0.0},
     {'x': 4124, 'y': 0.0},
     {'x': 4245, 'y': 0.0},
     {'x': 4367, 'y': 0.0},
     {'x': 4488, 'y': 0.0},
     {'x': 4609, 'y': 0.0},
     {'x': 4731, 'y': 0.0},
     {'x': 4852, 'y': 0.0},
     {'x': 4973, 'y': 0.0},
     {'x': 5094, 'y': 0.0},


In [540]:
with open('webcustom/avgTaxAllAdjusted.json', 'w') as data_file:    
    json.dump(all_d3_data, data_file)

# Population Distribution

In [197]:
def commas(number):
    return locale.format("%d", number, grouping=True)

In [198]:
income_dist = pd.read_csv(filepath_or_buffer="./MiscData/IncDistCleaned.csv")
income_dist_dict = {}
income_dist["ActualTotal"] = 0
income_dist["PercentagePop"] = 0
income_dist["Label"] = ""
for (index, row) in income_dist.iterrows():
    
    # Get the total population for the percentage calculation
    prov = row["Province"]
    prov_data = income_dist.loc[income_dist["Province"] == prov,:]
    total_pop = prov_data.iloc[0]["Total"] + prov_data.iloc[1]["Total"]

    if row["Income"] == 0:
        income_dist.loc[index, "ActualTotal"] = row["Total"]
        income_dist.loc[index, "PercentagePop"] = 100.0 * row["Total"] / total_pop
        
        next_index = index + 1
        next_income = income_dist.loc[next_index, "Income"]
        income_dist.loc[index, "Label"] = "$" + str(row["Income"]) + " - " + str(commas(next_income - 1))
    elif row["Income"] == 250000:
        income_dist.loc[index, "ActualTotal"] = row["Total"]
        income_dist.loc[index, "PercentagePop"] = 100.0 * row["Total"] / total_pop
        
        income_dist.loc[index, "Label"] = "$250,000+"
    else:
        next_index = index + 1
        
        next_total = income_dist.loc[next_index, "Total"]
        income_dist.loc[index, "ActualTotal"] = row["Total"] - next_total
        income_dist.loc[index, "PercentagePop"] = 100.0 * (row["Total"] - next_total) / total_pop
        
        next_income = income_dist.loc[next_index, "Income"]
        income_dist.loc[index, "Label"] = "$" + str(commas(row["Income"])) + " - " + str(commas(next_income - 1))
income_dist.to_csv(path_or_buf="./MiscData/IncDistProcessed.csv")

In [199]:
row = income_dist.iloc[18]
prov = row["Province"]
prov_data = income_dist.loc[income_dist["Province"] == prov,:]
total_pop = prov_data.iloc[0]["Total"] + prov_data.iloc[1]["Total"]
100.0 * row["Total"] / total_pop

58.35221607903562