In [42]:
"""
Set Options
"""

# import libraries
import pandas as pd
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime
import pickle as pkl
import geopy.distance
import operator

# configuration options
%matplotlib inline
matplotlib.style.use("seaborn-muted")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

In [43]:
"""
import data files
"""

# load station flux datafile
with open("./data/fluxsum_locations.pkl", 'rb') as picklefile: 
    flux = pkl.load(picklefile)
    
# load station locations
with open("./data/income.pkl", 'rb') as picklefile: 
    income = pkl.load(picklefile)

In [44]:
"""
assign nearest income data to each station
"""

bling_station = []
for f_index, f_row in flux.iterrows():
    dist = []
    bling = []
    for i_index, i_row in income.iterrows():
        if i_row['MEDIAN'] > 10:
            dist.append(geopy.distance.vincenty([f_row['LAT'],f_row['LON']],[i_row['LAT'],i_row['LON']]).m)
            bling.append(i_row['MEDIAN'])
    dist_index, dist_value = min(enumerate(dist), key=operator.itemgetter(1))
    bling_station.append(bling[dist_index])

In [45]:
"""
modify dataframe and save
"""
# modify data structure
flux["INCOME"]=bling_station 

# save
flux.to_csv('./data/flux_income_nearest.csv')

with open('./data/flux_income_nearest.pkl', 'wb') as picklefile:
    pkl.dump(flux, picklefile)

In [47]:
flux.head()

Unnamed: 0,STATION,ENTRY_SUM,EXIT_SUM,LAT,LON,INCOME
0,34 ST-PENN STA,10701055.0,7978101.0,40.752287,-73.993391,30117.0
1,GRD CNTRL-42 ST,8930354.0,7929180.0,40.751431,-73.976041,300000.0
2,TIMES SQ-42 ST,7827994.0,7341618.0,40.755983,-73.986229,132578.0
3,14 ST-UNION SQ,8497203.0,6969379.0,40.734673,-73.989951,100393.0
4,34 ST-HERALD SQ,9115976.0,6261677.0,40.750373,-73.991057,151229.0


In [100]:
def weighting(income_in,flux_in):
    alpha = 0.8
    i_max_index, i_max = max(enumerate(income_in), key=operator.itemgetter(1))
    f_max_index, f_max = max(enumerate(flux_in), key=operator.itemgetter(1))
    i_weight = [alpha*(x/i_max) for x in income_in]
    f_weight = [(1-alpha)*(x/f_max) for x in flux_in]
    return [sum(x) for x in zip(i_weight,f_weight)]

In [101]:
income_w = list(flux['INCOME'])
flux_w = list(flux['EXIT_SUM'])
ranks =  weighting(income_w,flux_w)
flux['RANK'] = ranks

In [108]:
flux.sort_values('RANK',ascending=False).head(10)

Unnamed: 0,STATION,ENTRY_SUM,EXIT_SUM,LAT,LON,INCOME,RANK
1,GRD CNTRL-42 ST,8930354.0,7929180.0,40.751431,-73.976041,300000.0,0.998774
19,96 ST,4535177.0,2902793.0,40.791619,-73.964602,300000.0,0.872769
26,72 ST,3174065.0,2444464.0,40.778453,-73.98197,300000.0,0.861279
30,1 AV,1862031.0,2038169.0,40.730975,-73.981681,300000.0,0.851094
31,72 ST-2 AVE,1922659.0,1907709.0,40.778453,-73.98197,300000.0,0.847824
33,B'WAY-LAFAYETTE,1547935.0,1802631.0,40.725297,-73.996204,300000.0,0.845189
23,CHAMBERS ST,4010574.0,2667439.0,40.715478,-74.009266,220441.0,0.654712
4,34 ST-HERALD SQ,9115976.0,6261677.0,40.750373,-73.991057,151229.0,0.560249
10,59 ST COLUMBUS,4365349.0,4066701.0,40.768247,-73.981929,171735.0,0.559907
5,23 ST,8823955.0,6068022.0,40.742954,-73.992765,151229.0,0.555394


In [106]:
flux.to_csv('./data/ranks.csv')