In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.ops import nearest_points
import numpy as np

In [2]:
neighborhoods = gpd.read_file('data/chicago/v3 08192017/ChicagoNeighborhoods.shp')
transit = gpd.read_file('data/chicago/v3 08192017/transit_upd081917.shp')
coffee = gpd.read_file('data/chicago/v3 08192017/coffee_upd081917.shp')
apts = gpd.read_file('data/chicago/v3 08192017/clApts_upd081917.shp')

In [3]:
# setting up variables and prelim processing
home = Point(-87.659349,41.9880054)
work = Point(-87.6438878,41.884193)
apts = apts[apts['bedroom'].between(1.0,2.0)]

In [4]:
# set up functions
def getNearestPoint(pt,searchPts):
    # get nearest point relative to a given point
    pt = pt['geometry']
    nearest = nearest_points(pt,searchPts.geometry.unary_union)[1]
    nearest = gpd.GeoDataFrame(searchPts[searchPts.geometry == nearest])
    y = nearest.geometry.distance(pt)
    if y.iloc[0] == 0.0:
        searchPts = searchPts.loc[[i for i in searchPts.index if i != nearest.index[0] ],:]
        nearest = nearest_points(pt,searchPts.geometry.unary_union)[1]
        nearest = gpd.GeoDataFrame(searchPts[searchPts.geometry == nearest])
        y = nearest.geometry.distance(pt)
    try:
        y = float(y)
    except:
        y = 0.0
    return y

In [5]:
# clean and the business data
coffee.dropna(subset=['LONGITUDE'],inplace=True)
coffee = coffee.drop_duplicates(subset=['LONGITUDE','LATITUDE'])
coffee = coffee[coffee['DOING BUSI'].str.contains('STARBUCK') == False]

# get distance from home
coffee['homeDist'] = coffee['geometry'].apply(lambda x: home.distance(x))
coffee['workDist'] = coffee['geometry'].apply(lambda x: work.distance(x))

# get nearest other coffee place and train CTA train stop
coffee['nearestCoffeeDist'] = coffee.apply(lambda x: getNearestPoint(x,coffee),axis=1)
coffee['nearestTransitDist'] = coffee.apply(lambda x: getNearestPoint(x,transit),axis=1)

In [6]:
# get designated neighborhood and reduce columns
neighborhoods['area'] = neighborhoods['geometry'].area
neighborhoods.rename(columns={'pri_neigh':'neighborhood'},inplace=True)

joinedData = gpd.tools.sjoin(coffee,neighborhoods,op='within',how='inner')
joinedData = joinedData[['neighborhood','DOING BUSI', 
            'ADDRESS','CITY', 'STATE', 'ZIP CODE',
            'LONGITUDE','LATITUDE',
            'nearestTransitDist','workDist','homeDist','nearestCoffeeDist','geometry'
            ]].reset_index(drop=True)
joinedData.reset_index(inplace=True)

In [8]:
# get median price of 30 closest rentals
dists = joinedData['geometry'].apply(lambda nsb:
    apts['geometry'].apply(lambda apt: apt.distance(nsb)).sort_values().head(30).index
                        )
dists = pd.DataFrame(dists)
joinedData['medianPrice'] = dists['geometry'].apply(lambda x: apts.loc[x]['price'].median())
joinedData.drop('index',inplace=True,axis=1)

In [9]:
joinedData.head()

Unnamed: 0,neighborhood,DOING BUSI,ADDRESS,CITY,STATE,ZIP CODE,LONGITUDE,LATITUDE,nearestTransitDist,workDist,homeDist,nearestCoffeeDist,geometry,medianPrice
0,Old Town,"EVA'S COFFEE, INC.",1447 N SEDGWICK ST 1ST,CHICAGO,IL,60610,-87.638375,41.908857,0.0,0.025272,0.08188,0.014932,POINT (-87.63837516299999 41.908856726),2338.5
1,West Loop,ARTURO EXPRESS,130 S CANAL ST,CHICAGO,IL,60606,-87.639765,41.879616,0.0,0.00616,0.110144,0.001574,POINT (-87.639764523 41.879616268),2117.5
2,West Loop,MEDDLE COFFEE BAR,601 W JACKSON BLVD 1 A,CHICAGO,IL,60661,-87.642609,41.877889,0.0,0.006433,0.111382,0.003328,POINT (-87.642609175 41.877888529),2653.0
3,West Loop,PEET'S COFFEE & TEA,222 S RIVERSIDE PLZ 1ST,CHICAGO,IL,60606,-87.638579,41.878582,0.0,0.007725,0.111378,0.001574,POINT (-87.63857866799999 41.878581561),2117.5
4,West Loop,GROUNDSWELL COFFEE ROASTERS,1168 W MADISON ST 1ST 2,CHICAGO,IL,60607,-87.656987,41.881729,0.0,0.013329,0.106303,0.003664,POINT (-87.65698670899999 41.881728772),2700.0
