In [13]:
from collections import defaultdict
import geopandas as gp
from scipy.stats import wilcoxon, pearsonr, spearmanr, kendalltau
from collections import defaultdict
import pandas as pd
from src.constants import BniaIndicators
from src.exp_helper import *
from sklearn.metrics import r2_score, explained_variance_score
from IPython.display import display

In [2]:
population = pd.read_excel('data/open-baltimore/raw/VS16_Indicators_2010-2016.xlsx',sheet_name='2010')
population = population.set_index('CSA2010').drop('Baltimore City')
population = population['Total Population']

In [3]:
indicators = pd.read_excel('data/open-baltimore/raw/VS16_Indicators_2010-2016.xlsx',sheet_name='2015')
indicators = indicators.set_index('CSA2010').drop('Baltimore City')

In [30]:
income_cols = [col for col in BniaIndicators.household_income if col in set(indicators.columns.tolist())]
housing_cols = [col for col in BniaIndicators.housing if col in set(indicators.columns.tolist())]

# true y event count for bnia nbh

In [5]:
train_tw = 60
verbose = 0
d_nbh = CompileData(spu_name='bnia_nbh')
d_nbh.set_x(['crime'], category_groups={'crime': [['burglary']]}, by_category=False)
d_nbh.set_y('crime/burglary')



In [6]:
def get_true_y(compile_data, eval_roller, x_setting='time_indexed_points', y_setting='event_cnt'):
    pred_res = defaultdict(dict)
    for i, dates in enumerate(eval_roller.roll()):
        past_sd, past_ed, pred_sd, pred_ed = dates
        period = 'X: %s~%s -> Y: %s~%s' % (past_sd, past_ed, pred_sd, pred_ed)
        eval_x, eval_y = data_for_fit(compile_data, x_setting=x_setting, y_setting=y_setting, dates=dates,
                                      verbose=verbose)
        pred_res[period]['true_y'] = eval_y
        pred_res[period]['true_x'] = eval_x.ravel()
#         print(eval_x.ravel())
    return pd.DataFrame.from_dict(pred_res, 'index')


In [7]:
er_bower_2d = Rolling(rsd='2016-07-01', red='2017-06-30', rstep=1, tw_past=train_tw, tw_pred=2)
nbh_true_y_2d = get_true_y(d_nbh, er_bower_2d, x_setting='event_cnt')

In [8]:
er_bower_7d = Rolling(rsd='2016-07-01', red='2017-06-30', rstep=1, tw_past=train_tw, tw_pred=7)
nbh_true_y_7d = get_true_y(d_nbh, er_bower_7d, x_setting='event_cnt')

# evaluate the model

In [9]:
eval_res_2d = pd.read_csv('exp_res/bower_2day.csv')
pd.concat([eval_res_2d.kde200.apply(lambda x: eval(x)[0]).describe(), eval_res_2d.bower.apply(lambda x: eval(x)[0]).describe()], axis=1)

Unnamed: 0,kde200,bower
count,302.0,302.0
mean,0.613548,0.619199
std,0.078652,0.086483
min,0.388889,0.37931
25%,0.5625,0.566667
50%,0.617334,0.619048
75%,0.666667,0.678046
max,0.829787,0.842105


In [10]:
eval_res_7d = pd.read_csv('exp_res/bower_7day.csv')
pd.concat([eval_res_7d.kde200.apply(lambda x: eval(x)[0]).describe(), eval_res_7d.bower.apply(lambda x: eval(x)[0]).describe()], axis=1)

Unnamed: 0,kde200,bower
count,297.0,297.0
mean,0.609093,0.613674
std,0.043844,0.05006
min,0.5,0.481928
25%,0.576923,0.581395
50%,0.610738,0.613924
75%,0.639535,0.643312
max,0.723164,0.766667


# load predict result

In [11]:
def norm_by_population(df):
    for p in df.index:
#         print(p)
        for c in ['bower','kde200','true_y', 'true_x']:
            arr = df.loc[p,c]
            if isinstance(arr,str):
                arr = pd.np.array(eval(arr))
            df.loc[p,c]=arr/(population.values)


# get pearson correlation

In [18]:
def stest_ana(res_cnt, cols, stest):
    stest_res = []
    for period, (bower, kde200, true_y, true_x) in res_cnt.iterrows():
        if isinstance(bower,str):bower = eval(bower)
        if isinstance(kde200,str): kde200 = eval(kde200)
#         print(true_x)
#         print(true_y)
        for idctr_col in cols:
            idctr = indicators[idctr_col].values   
            rtrue,ptrue = stest(true_y,idctr)
            rp_true = 'pear=%0.4f, p=%0.4f' % (rtrue,ptrue)
            
            rtruex,ptruex = stest(true_x,idctr)
            rp_truex = 'pear=%0.4f, p=%0.4f' % (rtruex,ptruex)
            rbower,pbower = stest(bower,idctr)
            rp_bower = 'pear=%0.4f, p=%0.4f' % (rbower,pbower)
            rkde200,pkde200 = stest(kde200,idctr)
            rp_kde200 = 'pear=%0.4f, p=%0.4f' % (rkde200,pkde200)
            r2true_bower = r2_score(true_y, bower)
            r2true_kde200 = r2_score(true_y, kde200)
            pear_true_bower,_ = stest(true_y, bower)
            pear_true_kde200,_ = stest(true_y, kde200)
            stest_res.append({'period': period, 'indicator': idctr_col, 
                                 'rtrue': rtrue, 'ptrue':ptrue, 'rp_true':rp_true,
                                 'rtruex': rtruex, 'ptruex':ptruex, 'rp_truex':rp_truex,
                                 'rbower': rbower,'pbower':pbower, 'rp_bower': rp_bower,
                                 'rkde200': rkde200, 'pkde200': pkde200, 'rp_kde200': rp_kde200,
                                 'r2true_bower': r2true_bower, 'r2true_kde200': r2true_kde200,
                                 'pear_true_bower': pear_true_bower, 'pear_true_kde200': pear_true_kde200
                                })
    #     break

    stest_res= pd.DataFrame(stest_res)[['period', 'indicator', 'rtrue', 'ptrue', 'rp_true','rtruex', 'ptruex', 'rp_truex',
                                              'rbower', 'pbower',  'rp_bower', 'rkde200', 'pkde200', 'rp_kde200', 
                                              'r2true_bower', 'r2true_kde200', 'pear_true_bower', 'pear_true_kde200']]
    return stest_res

In [None]:
pear_top20_hotspots_count_2d = pear_ana(pred_res_top20_hotspots_count_2d,housing_cols)
pear_sum_risk_hotspots_2d = pear_ana(pred_res_sum_risk_hotspots_2d,housing_cols)
pear_above_mean_hotspots_count_2d = pear_ana(pred_res_above_mean_hotspots_count_2d,housing_cols)
pear_above_mean_std_hotspots_count_2d = pear_ana(pred_res_above_mean_std_hotspots_count_2d,housing_cols)

# analyze bias

In [15]:
p_thres_true = 0.05
p_thres_pred = 0.05

In [19]:
def get_res_table(stest_res, cond, kind, periods):
    res = stest_res[cond].pivot(index='period', columns='indicator', values='rp_'+kind) 
    return res.reindex(periods)
def mean_not_null(stest_res, cond, kind, periods):
    ires = get_res_table(stest_res, cond, kind, periods)
    return (~ires.isnull()).mean()

In [20]:
def get_res_table_for_4kinds(stest_res, res_cnt):
    
    bower_bias_sig = (stest_res.ptrue>=p_thres_true) &(stest_res.pbower<p_thres_pred)
    bsig_not_null = mean_not_null(stest_res, bower_bias_sig, 'bower', res_cnt.index)
    
    bower_bias_not_sig = (stest_res.ptrue<p_thres_true) &(stest_res.pbower>=p_thres_pred)
    bnotsig_not_null = mean_not_null(stest_res, bower_bias_not_sig, 'bower', res_cnt.index)
    
    kde200_bias_sig = (stest_res.ptrue>=p_thres_true) &(stest_res.pkde200<p_thres_pred)
    ksig_not_null = mean_not_null(stest_res, kde200_bias_sig, 'kde200', res_cnt.index)
    
    kde200_bias_not_sig = (stest_res.ptrue<p_thres_true) &(stest_res.pkde200>=p_thres_pred)
    knotsig_not_null = mean_not_null(stest_res, kde200_bias_not_sig, 'kde200', res_cnt.index)
    
    true_sig = (stest_res.ptrue<p_thres_true) 
    tsig = mean_not_null(stest_res, true_sig, 'true', res_cnt.index)
    
    truex_sig = (stest_res.ptruex<p_thres_true) 
    txsig = mean_not_null(stest_res, truex_sig, 'truex', res_cnt.index)
    
    tmp_df = pd.concat([bsig_not_null,bnotsig_not_null, ksig_not_null, knotsig_not_null, tsig,txsig],axis=1,sort=True)
    tmp_df.columns = ['bower sig.', 'bower not sig.', 'kde200 sig.', 'kde200 not sig.', 'true sig.','true_x sig.']
    tmp_df.index.name = 'pred sig. different than true sig. level'
    return tmp_df

# output result

In [39]:

norm_by_pop_2d = True
cols_type = 'income'
# cols_type = 'house'
st_type = 'kendalltau'
cols = {'house': housing_cols, 'income':income_cols}[cols_type]
# cols = income_cols + housing_cols
stest = {'pearsonr': pearsonr, 'spearmanr': spearmanr, 'kendalltau': kendalltau}[st_type]
print('2d norm by pop:',norm_by_pop_2d)
print('indicators:', cols_type)
print('statistical test:', st_type)

pred_res_top20_hotspots_count_2d = pd.read_csv('exp_res/bower_2day_bnia_top20_hotspots.csv', index_col=0).join(nbh_true_y_2d)
pred_res_sum_risk_hotspots_2d = pd.read_csv('exp_res/bower_2day_bnia_sum_risk_hotspots.csv', index_col=0).join(nbh_true_y_2d)
pred_res_above_mean_hotspots_count_2d = pd.read_csv('exp_res/bower_2day_bnia_above_mean_hotspots.csv', index_col=0).join(nbh_true_y_2d)
pred_res_above_mean_std_hotspots_count_2d = pd.read_csv('exp_res/bower_2day_bnia_above_mean_std_hotspots.csv', index_col=0).join(nbh_true_y_2d)
if norm_by_pop_2d:
    norm_by_population(pred_res_top20_hotspots_count_2d)
    norm_by_population(pred_res_sum_risk_hotspots_2d)
    norm_by_population(pred_res_above_mean_hotspots_count_2d)
    norm_by_population(pred_res_above_mean_std_hotspots_count_2d)
    
stest_top20_hotspots_count_2d = stest_ana(pred_res_top20_hotspots_count_2d,cols, stest)
stest_sum_risk_hotspots_2d = stest_ana(pred_res_sum_risk_hotspots_2d,cols, stest)
stest_above_mean_hotspots_count_2d = stest_ana(pred_res_above_mean_hotspots_count_2d,cols, stest)
stest_above_mean_std_hotspots_count_2d = stest_ana(pred_res_above_mean_std_hotspots_count_2d,cols, stest)

sig_top20_hotspots_count_2d = get_res_table_for_4kinds(stest_top20_hotspots_count_2d, pred_res_top20_hotspots_count_2d)
sig_sum_risk_hotspots_2d = get_res_table_for_4kinds(stest_sum_risk_hotspots_2d, pred_res_sum_risk_hotspots_2d)
sig_above_mean_hotspots_count_2d = get_res_table_for_4kinds(stest_above_mean_hotspots_count_2d, pred_res_above_mean_hotspots_count_2d)
sig_above_mean_std_hotspots_count_2d = get_res_table_for_4kinds(stest_above_mean_std_hotspots_count_2d, pred_res_above_mean_std_hotspots_count_2d)

true_sig_pcnt = sig_top20_hotspots_count_2d['true sig.']
truex_sig_pcnt = sig_top20_hotspots_count_2d['true_x sig.']

c = 'bower sig.'
bower_sig_pcnt_2d = pd.concat([true_sig_pcnt, truex_sig_pcnt,
                 sig_top20_hotspots_count_2d[c], 
                 sig_above_mean_hotspots_count_2d[c], 
                 sig_above_mean_std_hotspots_count_2d[c],
                 sig_sum_risk_hotspots_2d[c], 
                ],
          axis=1)
bower_sig_pcnt_2d.columns = ['true sig.','true_x sig.','top20', '>mean', '>mean+std', 'sum_risk', ]
bower_sig_pcnt_2d = bower_sig_pcnt_2d.join(bower_sig_pcnt_2d[['top20', '>mean', '>mean+std', 'sum_risk']].divide(true_sig_pcnt, axis=0),rsuffix='/true')
print('--------------------',c)
# bower_sig_pcnt_2d = bower_sig_pcnt_2d.reindex(income_cols)
bower_sig_pcnt_2d.index = [col.replace('$','\$') for col in bower_sig_pcnt_2d.index]
display(bower_sig_pcnt_2d.fillna(0))

c = 'kde200 sig.'
kde200_sig_pcnt_2d = pd.concat([true_sig_pcnt, truex_sig_pcnt,
                 sig_top20_hotspots_count_2d[c], 
                 sig_above_mean_hotspots_count_2d[c], 
                 sig_above_mean_std_hotspots_count_2d[c],
                 sig_sum_risk_hotspots_2d[c], 
                ],
          axis=1)
kde200_sig_pcnt_2d.columns = ['true sig.','true_x sig.','top20', '>mean', '>mean+std', 'sum_risk', ]
kde200_sig_pcnt_2d=kde200_sig_pcnt_2d.join(kde200_sig_pcnt_2d[['top20', '>mean', '>mean+std', 'sum_risk']].divide(true_sig_pcnt, axis=0),rsuffix='/true')
print('-------------------',c)
# kde200_sig_pcnt_2d = kde200_sig_pcnt_2d.reindex(income_cols)
kde200_sig_pcnt_2d.index = [col.replace('$','\$') for col in kde200_sig_pcnt_2d.index]
display(kde200_sig_pcnt_2d.fillna(0))

2d norm by pop: True
indicators: income
statistical test: kendalltau
-------------------- bower sig.


Unnamed: 0,true sig.,true_x sig.,top20,>mean,>mean+std,sum_risk,top20/true,>mean/true,>mean+std/true,sum_risk/true
Median Household Income,0.036424,0.0,0.062914,0.13245,0.023179,0.0,1.727273,3.636364,0.636364,0.0
Percent of Children Living Below the Poverty Line,0.07947,0.298013,0.516556,0.596026,0.321192,0.149007,6.5,7.5,4.041667,1.875
Percent of Family Households Living Below the Poverty Line,0.092715,0.34106,0.450331,0.503311,0.274834,0.172185,4.857143,5.428571,2.964286,1.857143
"Percent of Households Earning \$25,000 to \$40,000",0.039735,0.0,0.009934,0.003311,0.029801,0.039735,0.25,0.083333,0.75,1.0
"Percent of Households Earning \$40,000 to \$60,000",0.02649,0.0,0.0,0.0,0.006623,0.0,0.0,0.0,0.25,0.0
"Percent of Households Earning \$60,000 to \$75,000",0.009934,0.0,0.10596,0.072848,0.036424,0.0,10.666667,7.333333,3.666667,0.0
"Percent of Households Earning Less than \$25,000",0.043046,0.05298,0.178808,0.248344,0.069536,0.046358,4.153846,5.769231,1.615385,1.076923
"Percent of Households Earning More than \$75,000",0.059603,0.0,0.139073,0.304636,0.023179,0.019868,2.333333,5.111111,0.388889,0.333333


------------------- kde200 sig.


Unnamed: 0,true sig.,true_x sig.,top20,>mean,>mean+std,sum_risk,top20/true,>mean/true,>mean+std/true,sum_risk/true
Median Household Income,0.036424,0.0,0.033113,0.139073,0.019868,0.0,0.909091,3.818182,0.545455,0.0
Percent of Children Living Below the Poverty Line,0.07947,0.298013,0.622517,0.68543,0.364238,0.178808,7.833333,8.625,4.583333,2.25
Percent of Family Households Living Below the Poverty Line,0.092715,0.34106,0.519868,0.549669,0.311258,0.225166,5.607143,5.928571,3.357143,2.428571
"Percent of Households Earning \$25,000 to \$40,000",0.039735,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Percent of Households Earning \$40,000 to \$60,000",0.02649,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Percent of Households Earning \$60,000 to \$75,000",0.009934,0.0,0.248344,0.056291,0.099338,0.0,25.0,5.666667,10.0,0.0
"Percent of Households Earning Less than \$25,000",0.043046,0.05298,0.135762,0.231788,0.139073,0.066225,3.153846,5.384615,3.230769,1.538462
"Percent of Households Earning More than \$75,000",0.059603,0.0,0.069536,0.284768,0.049669,0.013245,1.166667,4.777778,0.833333,0.222222


In [75]:

norm_by_pop_7d = False
cols_type = 'income'
cols = {'house': housing_cols, 'income':income_cols}[cols_type]
print('norm by pop:',norm_by_pop_7d)
print('indicators:', cols_type)

pred_res_top20_hotspots_count_7d = pd.read_csv('exp_res/bower_7day_bnia_top20_hotspots.csv', index_col=0).join(nbh_true_y_7d)
pred_res_sum_risk_hotspots_7d = pd.read_csv('exp_res/bower_7day_bnia_sum_risk_hotspots.csv', index_col=0).join(nbh_true_y_7d)
pred_res_above_mean_hotspots_count_7d = pd.read_csv('exp_res/bower_7day_bnia_above_mean_hotspots.csv', index_col=0).join(nbh_true_y_7d)
pred_res_above_mean_std_hotspots_count_7d = pd.read_csv('exp_res/bower_7day_bnia_above_mean_std_hotspots.csv', index_col=0).join(nbh_true_y_7d)
if norm_by_pop_7d:
    norm_by_population(pred_res_top20_hotspots_count_7d)
    norm_by_population(pred_res_sum_risk_hotspots_7d)
    norm_by_population(pred_res_above_mean_hotspots_count_7d)
    norm_by_population(pred_res_above_mean_std_hotspots_count_7d)
    
pear_top20_hotspots_count_7d = pear_ana(pred_res_top20_hotspots_count_7d,cols)
pear_sum_risk_hotspots_7d = pear_ana(pred_res_sum_risk_hotspots_7d,cols)
pear_above_mean_hotspots_count_7d = pear_ana(pred_res_above_mean_hotspots_count_7d,cols)
pear_above_mean_std_hotspots_count_7d = pear_ana(pred_res_above_mean_std_hotspots_count_7d,cols)

sig_top20_hotspots_count_7d = get_res_table_for_4kinds(pear_top20_hotspots_count_7d, pred_res_top20_hotspots_count_7d)
sig_sum_risk_hotspots_7d = get_res_table_for_4kinds(pear_sum_risk_hotspots_7d, pred_res_sum_risk_hotspots_7d)
sig_above_mean_hotspots_count_7d = get_res_table_for_4kinds(pear_above_mean_hotspots_count_7d, pred_res_above_mean_hotspots_count_7d)
sig_above_mean_std_hotspots_count_7d = get_res_table_for_4kinds(pear_above_mean_std_hotspots_count_7d, pred_res_above_mean_std_hotspots_count_7d)

true_sig_pcnt = sig_top20_hotspots_count_7d['true sig.']
truex_sig_pcnt = sig_top20_hotspots_count_7d['true_x sig.']

c = 'bower sig.'
bower_sig_pcnt_7d = pd.concat([true_sig_pcnt, truex_sig_pcnt,
                 sig_top20_hotspots_count_7d[c], 
                 sig_above_mean_hotspots_count_7d[c], 
                 sig_above_mean_std_hotspots_count_7d[c],
                 sig_sum_risk_hotspots_7d[c], 
                ],
          axis=1)
bower_sig_pcnt_7d.columns = ['true sig.','true_x sig.','top20', '>mean', '>mean+std', 'sum_risk', ]
bower_sig_pcnt_7d = bower_sig_pcnt_7d.join(bower_sig_pcnt_7d.divide(true_sig_pcnt, axis=0),rsuffix='/true')
print('=====================',c)
display(bower_sig_pcnt_7d.fillna(0))

c = 'kde200 sig.'
kde200_sig_pcnt_7d = pd.concat([true_sig_pcnt, truex_sig_pcnt,
                 sig_top20_hotspots_count_7d[c], 
                 sig_above_mean_hotspots_count_7d[c], 
                 sig_above_mean_std_hotspots_count_7d[c],
                 sig_sum_risk_hotspots_7d[c], 
                ],
          axis=1)
kde200_sig_pcnt_7d.columns = ['true sig.','true_x sig.','top20', '>mean', '>mean+std', 'sum_risk', ]
kde200_sig_pcnt_7d=kde200_sig_pcnt_7d.join(kde200_sig_pcnt_7d.divide(true_sig_pcnt, axis=0),rsuffix='/true')
print('=====================',c)
display(kde200_sig_pcnt_7d.fillna(0))

norm by pop: False
indicators: income


Unnamed: 0_level_0,true sig.,true_x sig.,top20,>mean,>mean+std,sum_risk,true sig./true,true_x sig./true,top20/true,>mean/true,>mean+std/true,sum_risk/true
pred sig. different than true sig. level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Median Household Income,0.003367,0.0,0.097643,0.171717,0.020202,0.0,1.0,0.0,29.0,51.0,6.0,0.0
Percent of Children Living Below the Poverty Line,0.060606,0.111111,0.400673,0.37037,0.20202,0.084175,1.0,1.833333,6.611111,6.111111,3.333333,1.388889
Percent of Family Households Living Below the Poverty Line,0.03367,0.080808,0.316498,0.222222,0.181818,0.090909,1.0,2.4,9.4,6.6,5.4,2.7
"Percent of Households Earning $25,000 to $40,000",0.037037,0.0,0.074074,0.171717,0.030303,0.016835,1.0,0.0,2.0,4.636364,0.818182,0.454545
"Percent of Households Earning $40,000 to $60,000",0.047138,0.0,0.040404,0.13468,0.010101,0.0,1.0,0.0,0.857143,2.857143,0.214286,0.0
"Percent of Households Earning $60,000 to $75,000",0.003367,0.0,0.003367,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
"Percent of Households Earning Less than $25,000",0.003367,0.0,0.087542,0.016835,0.037037,0.0,1.0,0.0,26.0,5.0,11.0,0.0
"Percent of Households Earning More than $75,000",0.013468,0.0,0.141414,0.313131,0.020202,0.0,1.0,0.0,10.5,23.25,1.5,0.0




Unnamed: 0_level_0,true sig.,true_x sig.,top20,>mean,>mean+std,sum_risk,true sig./true,true_x sig./true,top20/true,>mean/true,>mean+std/true,sum_risk/true
pred sig. different than true sig. level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Median Household Income,0.003367,0.0,0.094276,0.249158,0.020202,0.0,1.0,0.0,28.0,74.0,6.0,0.0
Percent of Children Living Below the Poverty Line,0.060606,0.111111,0.531987,0.43771,0.276094,0.127946,1.0,1.833333,8.777778,7.222222,4.555556,2.111111
Percent of Family Households Living Below the Poverty Line,0.03367,0.080808,0.329966,0.313131,0.282828,0.097643,1.0,2.4,9.8,9.3,8.4,2.9
"Percent of Households Earning $25,000 to $40,000",0.037037,0.0,0.121212,0.30303,0.023569,0.0,1.0,0.0,3.272727,8.181818,0.636364,0.0
"Percent of Households Earning $40,000 to $60,000",0.047138,0.0,0.010101,0.158249,0.0,0.0,1.0,0.0,0.214286,3.357143,0.0,0.0
"Percent of Households Earning $60,000 to $75,000",0.003367,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
"Percent of Households Earning Less than $25,000",0.003367,0.0,0.151515,0.03367,0.138047,0.0,1.0,0.0,45.0,10.0,41.0,0.0
"Percent of Households Earning More than $75,000",0.013468,0.0,0.138047,0.333333,0.013468,0.0,1.0,0.0,10.25,24.75,1.0,0.0
