# NJMLS

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import json
import math
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from IPython.core.display import display, HTML
from concurrent.futures import ThreadPoolExecutor

pd.set_option('display.max_rows', None)

def preview_njmls_df(mydf, save=False):
    columns = ['address', 'city', 'county', 'bedrooms', 'baths_full', 'baths_part',
               'price', 'tax', 'style', 'rooms', 'list_date', 'park_and_ride_name',
               'park_and_ride_duration_text', 'park_and_ride_type', 'park_and_ride_distance',
               'park_and_ride_bus_name', 'park_and_ride_bus_distance',
               'barnabas_duration_text', 'nyc_duration_text', 'lat', 'lng',]
    if save:
        mydf[columns].to_csv('njmls.csv')
    display(HTML(
        """<style>.dataframe td {white-space: nowrap;}</style>""" +
        mydf[columns].to_html(
            formatters={
                '__index__':
                    lambda id: f'<a target="_blank" href="https://www.njmls.com/listings/index.cfm?action=dsp.info&mlsnum={id}">{id}</a>'},
            escape=False)))

**Import global settings**

In [3]:
from settings import (blacklist_cities, counties, ONE_HOUR, HALF_HOUR, TEN_MINUTES,
                      MIN_PRICE, MAX_PRICE, MAX_TAX, MIN_BEDS, MIN_BATHS,)

print(MIN_PRICE, MAX_PRICE, MAX_TAX, MIN_BEDS, MIN_BATHS,)

300000 550000 16000 4 3


**Download listings from [NJMLS](http://www.njmls.com/)**

In [4]:
from njmls import get_listings, get_listing_detail

def get_listing_detail_wrapper(listing):
    listing_detail = get_listing_detail(listing['id'])
    listing_detail['lat'] = listing['lat']
    listing_detail['lng'] = listing['lng']
    return listing_detail
    
listings = []
for county in tqdm(counties):
    
    try:
        current_listings1 = list(get_listings(
                min_beds=MIN_BEDS,
                min_baths=MIN_BATHS,
                county_search=True,
                min_price=MIN_PRICE,
                max_price=int(math.floor((MAX_PRICE+MIN_PRICE)/2)),
                counties=[county.upper()],
                proptypes=['1']))

        current_listings2 = list(get_listings(
                min_beds=MIN_BEDS,
                min_baths=MIN_BATHS,
                county_search=True,
                min_price=int(math.ceil((MAX_PRICE+MIN_PRICE)/2)),
                max_price=MAX_PRICE,
                counties=[county.upper()],
                proptypes=['1']))
    except Exception as e:
        current_listings1 = []
        current_listings2 = []
        print(county, e,)

    current_listings = current_listings1 + current_listings2

    listing_ids = [x['id'] for x in current_listings]
    listings_dict = {x['id']:x for x in current_listings}

    with ThreadPoolExecutor(max_workers=20) as e:
        listings += tqdm(e.map(get_listing_detail_wrapper, current_listings), total=len(current_listings))

print(f"Downloaded {len(listings)} listings from NJMLS.")

listings = [x for x in listings if x['address']]
print(f"Filtered down to {len(listings)} listings due to null address.")


Middlesex No listings found.


Union No listings found.


Somerset No listings found.


Mercer No listings found.



Downloaded 132 listings from NJMLS.
Filtered down to 128 listings due to null address.


**Geocode each address using Google Maps API**

In [5]:
from geolocate import add_geocode_to_listing
for listing in tqdm(listings):
    if not listing.get('address'): continue
    add_geocode_to_listing(listing)




**Find closest NJ Transit Park and Ride to each address**

In [6]:
from commute import add_closest_park_and_ride_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_closest_park_and_ride_to_listing(listing)




**Get commute time for each address through both park and ride and walking**

In [7]:
from commute import add_commute_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_commute_to_listing(listing)

print(sum(1 for listing in listings if listing.get('park_and_ride_duration')),
      "out of", len(listings), "have park and ride commute times.")

print(sum(1 for listing in listings if listing.get('nyc_duration')),
  "out of", len(listings), "have walking commute times.")


128 out of 128 have park and ride commute times.
112 out of 128 have walking commute times.


In [8]:
df = pd.DataFrame(listings)
df.set_index('id', inplace=True)
df = df[
    (~df.city.isin(blacklist_cities)) &
    (~df.basement.str.contains('Crawlspace')) &
    (df.basement != 'None') &
    (df['style'] != 'Cape Cod') &
    (df.barnabas_duration < HALF_HOUR + TEN_MINUTES) &
    (df.park_and_ride_duration1 < TEN_MINUTES) &
    (df.park_and_ride_duration < ONE_HOUR + TEN_MINUTES)
]
df = df.sort_values(by=['city',], ascending=[True])
print(len(df), "listings after filtering")
preview_njmls_df(df)

45 listings after filtering


Unnamed: 0_level_0,address,city,county,bedrooms,baths_full,baths_part,price,tax,style,rooms,list_date,park_and_ride_name,park_and_ride_duration_text,park_and_ride_type,park_and_ride_distance,park_and_ride_bus_name,park_and_ride_bus_distance,barnabas_duration_text,nyc_duration_text,lat,lng
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1746287,175 Jerome Place,Bloomfield,Essex,4,3,0,459000,8715,Colonial,8,11/28/2017,Bloomfield,"5 mins, 36 mins",rail,0.632988,Clifton Commons,3.454818,21 mins,1 hour 12 mins,40.791904,-74.188019
1748500,357 Palisade Avenue,Bogota,Bergen,4,3,0,455000,12000,Colonial,8,12/21/2017,Hackensack Terminal,"6 mins, 45 mins",bus,0.957129,Hackensack Terminal,0.957129,35 mins,40 mins,40.87744,-74.024662
1735749,521 Hackensack Street,Carlstadt,Bergen,4,3,0,499000,7955,Colonial,8,09/04/2017,Wood Ridge,"5 mins, 28 mins",rail,0.671821,Passaic Bus Terminal,2.553566,32 mins,34 mins,40.83929,-74.089955
1744349,9 Lynn Drive,Clifton,Passaic,4,3,1,519900,14975,Split Level,10,11/08/2017,Allwood Road,"3 mins, 33 mins",bus,0.608936,Allwood Road,0.608936,27 mins,51 mins,40.861,-74.16844
1747944,39 Woodlawn Avenue,Clifton,Passaic,5,3,1,535000,15508,Colonial,9,12/13/2017,Montclair Heights,"3 mins, 52 mins",rail,0.547339,Allwood Road,0.906385,27 mins,55 mins,40.856621,-74.192116
1741251,3 Trella Terrace,Clifton,Passaic,4,3,0,499000,13539,Ranch,10,10/13/2017,Clifton,"8 mins, 44 mins",rail,1.260493,Allwood Road,1.546734,28 mins,47 mins,40.876239,-74.174675
1801304,41 Ridgewood Road,Clifton,Passaic,4,3,0,350000,1800,Colonial,8,01/12/2018,Allwood Road,"4 mins, 33 mins",bus,0.702121,Allwood Road,0.702121,28 mins,39 mins,40.851911,-74.162052
1800649,103 Notch Road,Clifton,Passaic,5,3,0,495000,12140,Colonial,7,01/07/2018,Allwood Road,"3 mins, 33 mins",bus,0.696957,Allwood Road,0.696957,25 mins,36 mins,40.863733,-74.177815
1736582,41-10 Erli Road,Fair Lawn,Bergen,4,3,0,529900,14443,Bi-Level,8,09/09/2017,Fairlawn DPW,"2 mins, 45 mins",bus,0.915041,Fairlawn DPW,0.915041,36 mins,49 mins,40.937809,-74.097514
1632549,41-17 Christine Court,Fair Lawn,Bergen,4,3,0,499950,14836,Bi-Level,8,08/03/2016,Fairlawn DPW,"4 mins, 45 mins",bus,0.798312,Fairlawn DPW,0.798312,37 mins,57 mins,40.9399,-74.097151


In [9]:
import folium

home = (40.8961863, -74.1726829,)

m = folium.Map(location=home, zoom_start=12)

folium.Marker(home, tooltip=f'<i>115 Dumont Ave. Clifton, NJ</i>', icon=folium.Icon(color='green')).add_to(m)

for id, row in df.iterrows():
    url = f"http://www.njmls.com/listings/index.cfm?action=dsp.info&mlsnum={id}"
    address = row.address + ", " + row.city
    popup = f"""
    Address: {address}<br/>
    MLS: <a target="_blank" href="{url}">{id}</a><br/>
    Price: {row.price}<br/>
    Bedrooms: {row.bedrooms}<br/>
    Baths Full: {row.baths_full}<br/>
    Baths Part: {row.baths_full}<br/>
    Tax: {row.tax}<br/>
    Dad (Park and Ride): {row.park_and_ride_duration_text}<br/>
    Dad (Walk and Ride): {row.nyc_duration_text}<br/>
    Mom: {row.barnabas_duration_text}
    """
    folium.Marker((row.lat, row.lng,), popup=f'<i>{popup}</i>', tooltip=f'<i>{address}</i>').add_to(m)

folium.GeoJson('Tran_railroad_passenger.json', name='geojson',).add_to(m)

with open('Tran_railroad_station.json') as f:
    data = json.load(f)
    for station in data['features']:
        lng, lat = station['geometry']['coordinates']
        name = station['properties']['STATION']
        rail_line = station['properties']['RAIL_LINE']
        municipal_label = station['properties']['MUN_LABEL']
        county = station['properties']['COUNTY']
        folium.CircleMarker((lat,lng,), radius=2, color='red', tooltip=f"{name} - {rail_line}").add_to(m)

m.save('njmls-map.html')