In [1]:
# Data processing from raw tax commission files to baseline clean data


In [2]:
# Imports and Pre-processing

import pandas as pd

df_raw = pd.read_excel("../../public/data/document (79).xls")

# Useful Variables
property_types = ['Primary Residential', 'Non-primary Residential', 'Commercial', 'Agricultural', 'Unimproved', 'Total Real Property', 
                  'Personal Property', 
                  'Centrally Assessed']

scale_factor = 1000000000  # Adjustment to show taxable property in billions

# Aggregate into usable catagories
df_raw['Primary Residential'] = (df_raw['233b Res_Primary_Land'] + df_raw['233b Res_Primary_Building'])/scale_factor
df_raw['Non-primary Residential'] = (df_raw['233b Res_Non_Primary_Land'] + df_raw['233b Res_Non_Primary_Building'])/scale_factor
df_raw['Commercial'] = (df_raw['233b Comm_Improv_Land'] + df_raw['233b Commercial_Building'])/scale_factor
df_raw['Agricultural'] = (df_raw['233b FAA_Land'] + df_raw['233b Agriculture_Building'])/scale_factor
df_raw['Unimproved'] = df_raw['233b Unimproved_Non_FAA']/scale_factor

df_raw['Total Real Property'] = df_raw['Primary Residential'] + df_raw['Non-primary Residential'] + df_raw['Commercial'] + df_raw['Agricultural'] + df_raw['Unimproved']

df_raw['Centrally Assessed'] = df_raw['233b Total_CA']/scale_factor
df_raw['Personal Property'] = df_raw['233b Total_Personal']/scale_factor
df_raw['Total Taxable Property'] = df_raw['Centrally Assessed'] + df_raw['Personal Property'] +df_raw['Total Real Property']

df_raw['Tax Rate'] = df_raw['750 Tax_Rate_Real']
df_raw['Tax Rate PP'] = df_raw['750 Tax_Rate_PP']


# Drop Extra Columns
df_raw = df_raw[['Tax Year', 'County Name', 'Entity Name', 'Tax Rate' ,'Tax Rate PP', 'Primary Residential', 'Non-primary Residential', 'Commercial', 'Agricultural', 'Unimproved', 'Total Real Property', 
                 'Centrally Assessed', 'Personal Property', 'Total Taxable Property']]


In [3]:
# Sum to Statewide
years = df_raw["Tax Year"].unique().tolist()

variables = df_raw.columns.to_list()
variables = variables[5:]

for year in years:
    df_raw.loc[-1]= [year, "99-STATEWIDE", "9999-STATEWIDE", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    
    for variable in variables:

        alpha = df_raw.loc[(df_raw["Tax Year"] == year) & (df_raw['Entity Name'].str[0:4] == '1010' )][variable].sum()

        df_raw.loc[[-1],[variable]] = alpha

    df_raw.index = df_raw.index + 1


In [4]:
# split county/ entity name and code out from county/ entity id
df_raw['Entity Code']=df_raw['Entity Name'].str[0:4]
df_raw['Entity Code']=df_raw['Entity Code'].astype(int)

df_raw['County Code']=df_raw['County Name'].str[0:2]
df_raw['County Code']=df_raw['County Code'].astype(int)


# identify entitiy type by entity code 
df_raw.loc[df_raw['Entity Code'] == 1010, 'Entity Type'] = 'County'
df_raw.loc[df_raw['Entity Code'] == 1015, 'Entity Type'] = 'Multicounty Assessing'
df_raw.loc[df_raw['Entity Code'] == 1020, 'Entity Type'] = 'County Assessing'
df_raw.loc[(df_raw['Entity Code'] >= 2000) & (df_raw['Entity Code'] < 3000), 'Entity Type'] = 'School District'
df_raw.loc[(df_raw['Entity Code'] >= 3000) & (df_raw['Entity Code'] < 4000), 'Entity Type'] = 'Municipality'
df_raw.loc[(df_raw['Entity Code'] >= 4000) & (df_raw['Entity Code'] < 5000), 'Entity Type'] = 'Special Service District'
df_raw.loc[(df_raw['Entity Code'] >= 5000) & (df_raw['Entity Code'] < 6000), 'Entity Type'] = 'Public Infrastrucutre District'
df_raw.loc[(df_raw['Entity Code'] >= 6000) & (df_raw['Entity Code'] < 7000), 'Entity Type'] = 'Special District'
df_raw.loc[(df_raw['Entity Code'] >= 8000) & (df_raw['Entity Code'] < 9999), 'Entity Type'] = 'RDA or CDA'
df_raw.loc[df_raw['Entity Code'] == 9999, 'Entity Type'] = 'Statewide'

df_raw['County Name']=df_raw['County Name'].str[3:].str.capitalize()

df_raw.loc[df_raw['Entity Code'] == 1010, 'Entity Name'] = df_raw['County Name']
df_raw.loc[df_raw['Entity Code'] == 1015, 'Entity Name'] = 'Multicounty Assessing' + ", " + df_raw['County Name']
df_raw.loc[df_raw['Entity Code'] == 1020, 'Entity Name'] = 'County Assessing' + ", " + df_raw['County Name']
df_raw.loc[(df_raw['Entity Code'] >= 2000) & (df_raw['Entity Code'] < 7000), 'Entity Name'] = df_raw['Entity Name'].str[5:].str.capitalize() + ", " + df_raw['County Name']
df_raw.loc[df_raw['Entity Code'] == 9999, 'Entity Name'] = 'Statewide'


In [5]:
# Compute Variables of Interest

# Taxable property type shares
for type in property_types:
    df_raw[type + ', Share'] = df_raw[type]/ df_raw['Total Taxable Property']


# Entity Revenue
revenue_variables= []

for type in property_types:
    if type == 'Personal Personal Property':
        df_raw['Revenue, ' + type] = df_raw[type] * df_raw['Tax Rate PP'] * 1000

    if type != 'Personal Personal Property':
        df_raw['Revenue, ' + type] = df_raw[type] * df_raw['Tax Rate'] * 1000
    
    revenue_variables+= ['Revenue, ' + type]

# Calcualte statewide revenue
for revenue_type in revenue_variables:

    for year in years:

        beta = df_raw.loc[(df_raw["Tax Year"] == year) & (df_raw["Entity Code"]!= 9999)][revenue_type].sum()
    
        df_raw.loc[(df_raw["Tax Year"] == year) & (df_raw["Entity Code"]== 9999), revenue_type] = beta

df_raw['Revenue, Total']= df_raw['Revenue, Total Real Property'] + df_raw['Revenue, Centrally Assessed'] + df_raw['Revenue, Personal Property']




# statewide tax rate faced by taxpayers

for year in years:

    gamma = df_raw.loc[(df_raw["Tax Year"]== year) & (df_raw["Entity Code"]== 9999)]["Revenue, Total"].sum()
    gamma = gamma/ (df_raw.loc[(df_raw["Tax Year"]== year) & (df_raw["Entity Code"]== 9999)]["Total Taxable Property"].sum() * 1000 )
    
    df_raw.loc[(df_raw["Tax Year"] == year) & (df_raw["Entity Code"]== 9999), ["Tax Rate"]] = gamma

    df_raw.loc[(df_raw["Tax Year"] == year + 1) & (df_raw["Entity Code"]== 9999), ["Tax Rate PP"]] = gamma

# statewide tax rate charged by entites
df_raw["Real Tax Rate, entity"] = df_raw["Tax Rate"]
df_raw["Personal Tax Rate, entity"] = df_raw["Tax Rate PP"]

for year in years:

    delta = df_raw.loc[(df_raw["Tax Year"]== year) & (df_raw["Entity Code"]== 9999)]["Revenue, Total"].sum()
    delta = delta / (df_raw.loc[(df_raw["Tax Year"] == year) & (df_raw["Entity Code"]!= 9999)]["Total Taxable Property"].sum() * 1000)

    df_raw.loc[(df_raw["Tax Year"] == year) & (df_raw["Entity Code"]== 9999), ["Real Tax Rate, entity"]] = delta

    df_raw.loc[(df_raw["Tax Year"] == year + 1) & (df_raw["Entity Code"]== 9999), ["Personal Tax Rate, entity"]] = delta



In [6]:
# Convert Tax Rates to Percent
df_raw['Tax Rate'] = df_raw["Tax Rate"] * 100
df_raw['Tax Rate PP'] = df_raw["Tax Rate PP"] * 100


In [7]:
# Export to JSON
df_raw.to_json('../../public/data/base_data.json', orient = 'records')

In [8]:
# Create exports for selectors

# list of entities
entities = df_raw["Entity Name"].unique().tolist()

df_entitylist = pd.DataFrame(entities, columns=['value'])

df_entitylist['label'] = df_entitylist['value']

df_entitylist.to_json('../../public/data/entity_list.json', orient = 'records')

# list of years
year_list = df_raw["Tax Year"].unique().tolist()

df_yearlist = pd.DataFrame(year_list, columns=['value'])

df_yearlist['label'] = df_yearlist['value']

df_yearlist.to_json('../../public/data/years_list.json', orient = 'records')

# list of counties
counties = df_raw["County Name"].unique().tolist()

df_countylist = pd.DataFrame(counties, columns=['value'])

df_countylist['label'] = df_countylist['value']

df_countylist.to_json('../../public/data/counties_list.json', orient = 'records')

# list of entity types
types = df_raw["Entity Type"].unique().tolist()

df_typelist = pd.DataFrame(types, columns=['value'])

df_typelist['label'] = df_typelist['value']

df_typelist.to_json('../../public/data/entity_types_list.json', orient = 'records')

# Property Types
df_propertytypes = pd.DataFrame(property_types, columns=['value'])

df_propertytypes['label'] = df_propertytypes['value']

df_propertytypes.to_json('../../public/data/property_types_list.json', orient = 'records')
