# CJMLS

In [1]:
%reload_ext autoreload
%autoreload 2

In [26]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from IPython.core.display import display, HTML
from concurrent.futures import ThreadPoolExecutor

pd.set_option('display.max_rows', None)

def preview_cjmls_df(mydf, save=False):
    columns = ['address', 'city', 'county', 'bedrooms', 'baths_full', 'baths_part',
               'price', 'sqft',
               'park_and_ride_name',
               'park_and_ride_duration_text', 'park_and_ride_type', 'park_and_ride_distance',
               'park_and_ride_bus_name', 'park_and_ride_bus_distance',
               'barnabas_duration_text',]
    if save:
        mydf[columns].to_csv('njmls.csv')
    display(HTML(
        """<style>.dataframe td {white-space: nowrap;}</style>""" +
        mydf[columns].to_html(
            formatters={
                '__index__':
                    lambda id: f'<a target="_blank" href="https://cjmls.herokuapp.com/properties/{id}">{id}</a>'},
            escape=False)))

**Define global parameters**

In [21]:
ONE_HOUR = 3600
TEN_MINUTES = ONE_HOUR/6
HALF_HOUR = ONE_HOUR / 2
ONE_HOUR_TEN_MINUTES = ONE_HOUR + TEN_MINUTES
ONE_HOUR_THIRTY_MINUTES = ONE_HOUR + HALF_HOUR
ONE_HOUR_FORTY_MINUTES = ONE_HOUR + HALF_HOUR + TEN_MINUTES

blacklist_cities = [
    'Paterson', 'East Orange', 'City of Orange', 'Belleville', 'Elizabeth',
    'Jefferson', 'Passaic', 'Newark', 'East Newark', 'Linden', 'Nutley',
    'Garfield', 'Hillside', 'Little Ferry']

**Download listings from [CJMLS](https://www.mcmls.net/)**

In [4]:
from cjmls import get_listings

listings = [listing for meta, listing in get_listings(min_beds=3, min_baths=2, max_price=500000)]
print(f"Downloaded {len(listings)} listings from CJMLS.")

Downloaded 986 listings from CJMLS.


**Geocode each address using Google Maps API**

In [6]:
from geolocate import add_geocode_to_listing
for listing in tqdm(listings):
    if not listing.get('address'): continue
    add_geocode_to_listing(listing)

could not geocode: 0 Samarpan Court Robbinsville, NJ or 0 Samarpan Court Robbinsville, NJ
could not geocode: 118 Swarthmore Terrace Menlo Park Terrace, NJ or 118 Swarthmore Terrace Menlo Park Terrace, NJ
could not geocode: 43 Traditions Place Monroe, NJ or 43 Traditions Place Monroe, NJ
could not geocode: 8 Traditions Place Monroe, NJ or 8 Traditions Place Monroe, NJ
could not geocode: 795 Chalet Drive Woodbridge Proper, NJ or 795 Chalet Drive Woodbridge Proper, NJ
could not geocode: 8 Ridgedale Place Woodbridge Proper, NJ or 8 Ridgedale Place Woodbridge Proper, NJ
could not geocode: 155 S Park Drive Woodbridge Proper, NJ or 155 S Park Drive Woodbridge Proper, NJ
could not geocode: 54 Mercer Street Menlo Park Terrace, NJ or 54 Mercer Street Menlo Park Terrace, NJ
could not geocode: 15 Colonial Court Woodbridge Proper, NJ or 15 Colonial Court Woodbridge Proper, NJ
could not geocode: 169 Hillside Avenue Woodbridge Proper, NJ or 169 Hillside Avenue Woodbridge Proper, NJ
could not geocode:

**Find closest NJ Transit Park and Ride to each address**

In [7]:
from commute import add_closest_park_and_ride_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_closest_park_and_ride_to_listing(listing)




**Get commute time for each address through both park and ride and walking**

In [8]:
from commute import add_commute_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_commute_to_listing(listing)

print(sum(1 for listing in listings if listing.get('park_and_ride_duration')),
      "out of", len(listings), "have park and ride commute times.")

print(sum(1 for listing in listings if listing.get('nyc_duration')),
  "out of", len(listings), "have walking commute times.")


954 out of 986 have park and ride commute times.
875 out of 986 have walking commute times.


In [27]:
df = pd.DataFrame(listings)
df.set_index('id', inplace=True)
df = df[
    (~df.city.isin(blacklist_cities)) &
    (df.barnabas_duration < HALF_HOUR + TEN_MINUTES) &
    (df.park_and_ride_duration1 < TEN_MINUTES) &
    (df.park_and_ride_duration < ONE_HOUR_TEN_MINUTES)
]
df = df.sort_values(by=['sqft',], ascending=[False])
print(len(df), "listings after filtering")
preview_cjmls_df(df)

208 listings after filtering


Unnamed: 0_level_0,address,city,county,bedrooms,baths_full,baths_part,price,sqft,park_and_ride_name,park_and_ride_duration_text,park_and_ride_type,park_and_ride_distance,park_and_ride_bus_name,park_and_ride_bus_distance,barnabas_duration_text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1804161,46 Wood Avenue,Edison,Middlesex,4,2,1.0,400000,875.0,Metropark,"2 mins, 38 mins",rail,0.278891,Carteret,5.798302,33 mins
1802582,47 Normandy Drive,Woodbridge Township,Middlesex,5,3,1.0,499900,3705.0,Metropark,"10 mins, 38 mins",rail,2.482587,Watchung Park & Ride,4.909467,36 mins
1804831,32 Dorothy Street,Carteret,Middlesex,6,5,0.0,464000,3600.0,Carteret,"7 mins, 42 mins",bus,0.920786,Carteret,0.920786,39 mins
1810516,22 KILFOYLE Avenue,Woodbridge Township,Middlesex,4,2,1.0,459000,3005.0,Woodbridge,"7 mins, 43 mins",rail,1.692658,Sayreville,4.571356,37 mins
1805433,380 Outlook Avenue,Woodbridge Township,Middlesex,4,2,0.0,495000,3000.0,Avenel,"5 mins, 36 mins",rail,1.395819,Carteret,3.9896,34 mins
1806033,333 S Pine Avenue,South Amboy,Middlesex,4,2,0.0,399900,2924.0,South Amboy,"3 mins, 48 mins",rail,0.623283,Sayreville,0.887595,39 mins
1808444,312 Walnut Street,South Amboy,Middlesex,4,2,0.0,285000,2800.0,Sayreville,"4 mins, 55 mins",bus,0.478589,Sayreville,0.478589,39 mins
1713320,506 Col Db Kelly Way,South Amboy,Middlesex,3,2,1.0,369000,2800.0,Sayreville,"5 mins, 55 mins",bus,0.580759,Sayreville,0.580759,39 mins
1805572,104 Randolph Street,Carteret,Middlesex,4,2,1.0,484900,2700.0,Carteret,"4 mins, 42 mins",bus,0.742283,Carteret,0.742283,38 mins
1808929,449 W 2nd Avenue,Roselle,Union,4,3,1.0,239000,2550.0,Roselle Park,"5 mins, 45 mins",rail,0.80901,Rutgers Lane Hospital - Union Twp,1.231717,27 mins
