# NJMLS

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from IPython.core.display import display, HTML
from concurrent.futures import ThreadPoolExecutor

pd.set_option('display.max_rows', None)

def preview_njmls_df(mydf, save=False):
    columns = ['address', 'city', 'county', 'bedrooms', 'baths_full', 'baths_part',
               'price', 'tax', 'style', 'rooms', 'list_date', 'park_and_ride_name',
               'park_and_ride_duration_text', 'park_and_ride_type', 'park_and_ride_distance',
               'park_and_ride_bus_name', 'park_and_ride_bus_distance',
               'barnabas_duration_text',]
    if save:
        mydf[columns].to_csv('njmls.csv')
    display(HTML(
        """<style>.dataframe td {white-space: nowrap;}</style>""" +
        mydf[columns].to_html(
            formatters={
                '__index__':
                    lambda id: f'<a target="_blank" href="https://www.njmls.com/listings/index.cfm?action=dsp.info&mlsnum={id}">{id}</a>'},
            escape=False)))

**Define global parameters**

In [3]:
counties = ['Passaic', 'Hudson','Essex', 'Middlesex', 'Morris', 'Bergen', 'Union', 'Sussex',]

ONE_HOUR = 3600
TEN_MINUTES = ONE_HOUR/6
HALF_HOUR = ONE_HOUR / 2
ONE_HOUR_TEN_MINUTES = ONE_HOUR + TEN_MINUTES
ONE_HOUR_THIRTY_MINUTES = ONE_HOUR + HALF_HOUR
ONE_HOUR_FORTY_MINUTES = ONE_HOUR + HALF_HOUR + TEN_MINUTES

blacklist_cities = [
    'Paterson', 'East Orange', 'City of Orange', 'Belleville', 'Elizabeth',
    'Jefferson', 'Passaic', 'Newark', 'East Newark', 'Linden', 'Nutley',
    'Garfield', 'Hillside', 'Little Ferry']

**Download listings from [NJMLS](http://www.njmls.com/)**

In [4]:
from njmls import get_listings, get_listing_detail

def get_listing_detail_wrapper(listing):
    listing_detail = get_listing_detail(listing['id'])
    listing_detail['lat'] = listing['lat']
    listing_detail['lng'] = listing['lng']
    return listing_detail
    
listings = []
for county in tqdm(counties):

    current_listings1 = list(get_listings(
            min_beds=3,
            min_baths=2,
            county_search=True,
            min_price=350000,
            max_price=400000,
            counties=[county.upper()],
            proptypes=['1']))

    current_listings2 = list(get_listings(
            min_beds=3,
            min_baths=2,
            county_search=True,
            min_price=400000,
            max_price=515000,
            counties=[county.upper()],
            proptypes=['1']))

    current_listings = current_listings1 + current_listings2

    listing_ids = [x['id'] for x in current_listings]
    listings_dict = {x['id']:x for x in current_listings}

    with ThreadPoolExecutor(max_workers=20) as e:
        listings += tqdm(e.map(get_listing_detail_wrapper, current_listings), total=len(current_listings))

print(f"Downloaded {len(listings)} listings from NJMLS.")

listings = [x for x in listings if x['address']]
print(f"Filtered down to {len(listings)} listings due to null address.")



Downloaded 534 listings from NJMLS.
Filtered down to 516 listings due to null address.


**Geocode each address using Google Maps API**

In [5]:
from geolocate import add_geocode_to_listing
for listing in tqdm(listings):
    if not listing.get('address'): continue
    add_geocode_to_listing(listing)

could not find county for 1742511 46 Veranda Ave, North Caldwell, NJ, USA [{'long_name': '46', 'short_name': '46', 'types': ['street_number']}, {'long_name': 'Veranda Avenue', 'short_name': 'Veranda Ave', 'types': ['route']}, {'long_name': 'North Caldwell', 'short_name': 'North Caldwell', 'types': ['locality', 'political']}, {'long_name': 'New Jersey', 'short_name': 'NJ', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country', 'political']}]
could not find county for 1745962 790 Grove St, Clifton, NJ 07013, USA [{'long_name': '790', 'short_name': '790', 'types': ['street_number']}, {'long_name': 'Grove Street', 'short_name': 'Grove St', 'types': ['route']}, {'long_name': 'Clifton', 'short_name': 'Clifton', 'types': ['locality', 'political']}, {'long_name': 'New Jersey', 'short_name': 'NJ', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country',

**Find closest NJ Transit Park and Ride to each address**

In [6]:
from commute import add_closest_park_and_ride_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_closest_park_and_ride_to_listing(listing)




**Get commute time for each address through both park and ride and walking**

In [7]:
from commute import add_commute_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_commute_to_listing(listing)

print(sum(1 for listing in listings if listing.get('park_and_ride_duration')),
      "out of", len(listings), "have park and ride commute times.")

print(sum(1 for listing in listings if listing.get('nyc_duration')),
  "out of", len(listings), "have walking commute times.")


515 out of 516 have park and ride commute times.
472 out of 516 have walking commute times.


In [8]:
df = pd.DataFrame(listings)
df.set_index('id', inplace=True)
df = df[
    (~df.city.isin(blacklist_cities)) &
    (~df.basement.str.contains('Crawlspace')) &
    (df.basement != 'None') &
    (df['style'] != 'Cape Cod') &
    (df.barnabas_duration < HALF_HOUR + TEN_MINUTES) &
    (df.park_and_ride_duration1 < TEN_MINUTES) &
    (df.park_and_ride_duration < ONE_HOUR_TEN_MINUTES)
]
df = df.sort_values(by=['city',], ascending=[True])
print(len(df), "listings after filtering")
preview_njmls_df(df)

162 listings after filtering


Unnamed: 0_level_0,address,city,county,bedrooms,baths_full,baths_part,price,tax,style,rooms,list_date,park_and_ride_name,park_and_ride_duration_text,park_and_ride_type,park_and_ride_distance,park_and_ride_bus_name,park_and_ride_bus_distance,barnabas_duration_text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1727036,47 Hearthstone Road,Bloomfield,Essex,4,2,0,350000,12699.0,Split Level,10,06/30/2017,Watchung Avenue,"6 mins, 45 mins",rail,1.459975,Allwood Road,1.638773,25 mins
1748219,151 Sadler Road,Bloomfield,Essex,3,2,0,370000,11603.0,Colonial,7,12/17/2017,Clifton Commons,"8 mins, 47 mins",bus,1.67228,Clifton Commons,1.67228,23 mins
1748481,89 Mountain Avenue,Bloomfield,Essex,3,2,0,399000,13884.0,Colonial,6,12/21/2017,Watchung Avenue,"6 mins, 45 mins",rail,1.141127,Allwood Road,1.494435,25 mins
1735422,338 Essex Avenue,Bloomfield,Essex,5,2,1,439900,16232.0,Colonial,9,09/01/2017,Walnut Street,"5 mins, 42 mins",rail,0.751703,Allwood Road,2.776278,24 mins
1746287,175 Jerome Place,Bloomfield,Essex,4,3,0,459000,8715.0,Colonial,8,11/28/2017,Bloomfield,"5 mins, 36 mins",rail,0.632988,Clifton Commons,3.454818,21 mins
1741304,85 Bellevue Terrace,Bloomfield,Essex,3,2,1,424900,12668.0,Split Level,8,10/13/2017,Allwood Road,"4 mins, 33 mins",bus,1.063173,Allwood Road,1.063173,27 mins
1736975,15 George Street,Bloomfield,Essex,4,2,1,439000,14719.0,Colonial,8,09/12/2017,Watchung Avenue,"6 mins, 45 mins",rail,1.390126,Allwood Road,1.715707,22 mins
1742512,10 Claremont Avenue,Bloomfield,Essex,4,2,2,479800,16663.0,Colonial,11,10/18/2017,Watchung Avenue,"6 mins, 45 mins",rail,1.176241,Allwood Road,1.444312,25 mins
1743484,459 River Road,Bogota,Bergen,4,2,0,369999,8905.0,Colonial,7,11/01/2017,Hackensack Terminal,"4 mins, 45 mins",bus,0.537489,Hackensack Terminal,0.537489,38 mins
1732295,146 Cypress Avenue,Bogota,Bergen,4,2,1,464900,10032.0,Colonial,9,08/07/2017,Hackensack Terminal,"5 mins, 45 mins",bus,1.226966,Hackensack Terminal,1.226966,35 mins
