# CJMLS

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from IPython.core.display import display, HTML
from concurrent.futures import ThreadPoolExecutor

pd.set_option('display.max_rows', None)

def preview_cjmls_df(mydf, save=False):
    columns = ['address', 'city', 'county', 'bedrooms', 'baths_full', 'baths_part',
               'price', 'sqft',
               'park_and_ride_name',
               'park_and_ride_duration_text', 'park_and_ride_type', 'park_and_ride_distance',
               'park_and_ride_bus_name', 'park_and_ride_bus_distance',
               'barnabas_duration_text', 'lat', 'lng',]
    if save:
        mydf[columns].to_csv('cjmls.csv')
    display(HTML(
        """<style>.dataframe td {white-space: nowrap;}</style>""" +
        mydf[columns].to_html(
            formatters={
                '__index__':
                    lambda id: f'<a target="_blank" href="https://cjmls.herokuapp.com/properties/{id}">{id}</a>'},
            escape=False)))

**Import global settings**

In [3]:
from settings import (blacklist_cities, MIN_PRICE, MAX_PRICE, MAX_TAX, MIN_BEDS, MIN_BATHS,
                      ONE_HOUR, HALF_HOUR, TEN_MINUTES,)

print(MIN_PRICE, MAX_PRICE, MAX_TAX, MIN_BEDS, MIN_BATHS,)

300000 525000 16000 3 2


**Download listings from [CJMLS](https://www.mcmls.net/)**

In [4]:
from cjmls import get_listings

listings = [listing for meta, listing in get_listings(min_beds=MIN_BEDS, min_baths=MIN_BATHS, max_price=MAX_PRICE)]
print(f"Downloaded {len(listings)} listings from CJMLS.")

Downloaded 1023 listings from CJMLS.


**Geocode each address using Google Maps API**

In [5]:
from geolocate import add_geocode_to_listing
for listing in tqdm(listings):
    if not listing.get('address'): continue
    add_geocode_to_listing(listing)

could not geocode: 0 Samarpan Court Robbinsville, NJ or 0 Samarpan Court Robbinsville, NJ
could not geocode: 0 Samarpan Court Robbinsville, NJ or 0 Samarpan Court Robbinsville, NJ
could not geocode: 118 Swarthmore Terrace Menlo Park Terrace, NJ or 118 Swarthmore Terrace Menlo Park Terrace, NJ
could not geocode: 43 Traditions Place Monroe, NJ or 43 Traditions Place Monroe, NJ
could not geocode: 8 Traditions Place Monroe, NJ or 8 Traditions Place Monroe, NJ
could not geocode: 795 Chalet Drive Woodbridge Proper, NJ or 795 Chalet Drive Woodbridge Proper, NJ
could not geocode: 8 Ridgedale Place Woodbridge Proper, NJ or 8 Ridgedale Place Woodbridge Proper, NJ
could not geocode: 155 S Park Drive Woodbridge Proper, NJ or 155 S Park Drive Woodbridge Proper, NJ
could not geocode: 54 Mercer Street Menlo Park Terrace, NJ or 54 Mercer Street Menlo Park Terrace, NJ
could not geocode: 15 Colonial Court Woodbridge Proper, NJ or 15 Colonial Court Woodbridge Proper, NJ
could not geocode: 169 Hillside Av

**Find closest NJ Transit Park and Ride to each address**

In [6]:
from commute import add_closest_park_and_ride_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_closest_park_and_ride_to_listing(listing)




**Get commute time for each address through both park and ride and walking**

In [7]:
from commute import add_commute_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_commute_to_listing(listing)

print(sum(1 for listing in listings if listing.get('park_and_ride_duration')),
      "out of", len(listings), "have park and ride commute times.")

print(sum(1 for listing in listings if listing.get('nyc_duration')),
  "out of", len(listings), "have walking commute times.")


990 out of 1023 have park and ride commute times.
908 out of 1023 have walking commute times.


**Has Square Footage**

In [8]:
df = pd.DataFrame(listings)
df.set_index('id', inplace=True)
df = df[
    (~df.city.isin(blacklist_cities)) &
    (df.barnabas_duration < HALF_HOUR + TEN_MINUTES) &
    (df.park_and_ride_duration1 < TEN_MINUTES) &
    (df.park_and_ride_duration < ONE_HOUR + TEN_MINUTES) &
    (df.price > MIN_PRICE) &
    (df.sqft.notnull())
]
df = df.sort_values(by=['sqft',], ascending=[False])
print(len(df), "listings after filtering")
preview_cjmls_df(df)

28 listings after filtering


Unnamed: 0_level_0,address,city,county,bedrooms,baths_full,baths_part,price,sqft,park_and_ride_name,park_and_ride_duration_text,park_and_ride_type,park_and_ride_distance,park_and_ride_bus_name,park_and_ride_bus_distance,barnabas_duration_text,lat,lng
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1802582,47 Normandy Drive,Woodbridge Township,Middlesex,5,3,1,499900,3705.0,Metropark,"10 mins, 38 mins",rail,2.482587,Watchung Park & Ride,4.909467,36 mins,40.6045,-74.3331
1805433,380 Outlook Avenue,Woodbridge Township,Middlesex,4,2,0,495000,3000.0,Avenel,"5 mins, 36 mins",rail,1.395819,Carteret,3.9896,34 mins,40.5821,-74.3034
1806033,333 S Pine Avenue,South Amboy,Middlesex,4,2,0,399900,2924.0,South Amboy,"3 mins, 48 mins",rail,0.623283,Sayreville,0.887595,39 mins,40.4754,-74.2817
1713320,506 Col Db Kelly Way,South Amboy,Middlesex,3,2,1,369000,2800.0,Sayreville,"5 mins, 55 mins",bus,0.580759,Sayreville,0.580759,39 mins,40.4777,-74.2873
1808534,168 Colonia Road,Woodbridge Township,Middlesex,3,2,1,475000,2394.0,Metropark,"4 mins, 38 mins",rail,1.366399,Carteret,4.328978,33 mins,40.5817,-74.3099
1810732,65 roosevelt Boulevard,Sayreville,Middlesex,4,2,1,425000,2200.0,Sayreville,"4 mins, 55 mins",bus,0.721068,Sayreville,0.721068,39 mins,40.4702,-74.308
1709216,1022 BORDENTOWN Avenue,Sayreville,Middlesex,4,2,1,379900,2200.0,Old Bridge,"5 mins, 1 hour 3 mins",bus,0.946948,Old Bridge,0.946948,40 mins,40.4599,-74.305
1709226,1024 BORDENTOWN Avenue,Sayreville,Middlesex,4,2,1,379900,2200.0,Old Bridge,"5 mins, 1 hour 3 mins",bus,0.97055,Old Bridge,0.97055,40 mins,40.4602,-74.3053
1801380,1898 W 5th Street,Piscataway Township,Middlesex,4,2,1,449000,2028.0,Dunellen,"3 mins, 1 hour 0 mins",rail,0.599803,Watchung Park & Ride,4.742956,37 mins,40.5848,-74.4549
1719672,112 Lee Drive,Middlesex,Middlesex,4,2,0,325000,2000.0,Dunellen,"5 mins, 1 hour 0 mins",rail,1.342684,Watchung Park & Ride,5.851621,37 mins,40.5823,-74.486


**Null Square Footage**

In [9]:
df = pd.DataFrame(listings)
df.set_index('id', inplace=True)
df = df[
    (~df.city.isin(blacklist_cities)) &
    (df.barnabas_duration < HALF_HOUR + TEN_MINUTES) &
    (df.park_and_ride_duration1 < TEN_MINUTES) &
    (df.park_and_ride_duration < ONE_HOUR + TEN_MINUTES) &
    (df.price > MIN_PRICE) &
    (df.sqft.isnull())
]
df = df.sort_values(by=['city', 'price',], ascending=[True, False])
print(len(df), "listings after filtering")
preview_cjmls_df(df)

80 listings after filtering


Unnamed: 0_level_0,address,city,county,bedrooms,baths_full,baths_part,price,sqft,park_and_ride_name,park_and_ride_duration_text,park_and_ride_type,park_and_ride_distance,park_and_ride_bus_name,park_and_ride_bus_distance,barnabas_duration_text,lat,lng
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1803747,352 Berkeley Avenue,Bloomfield,Essex,5,2,0.0,337000,,Bloomfield,"4 mins, 36 mins",rail,0.736768,Clifton Commons,3.354571,21 mins,40.7924,-74.186
1803600,114 Allwood Place,Clifton,Passaic,4,2,0.0,499000,,Passaic,"4 mins, 40 mins",rail,0.763636,Clifton Commons,1.381371,30 mins,40.8464,-74.1479
1808973,50 Terry Avenue,Edison,Middlesex,3,2,1.0,499900,,Metropark,"4 mins, 38 mins",rail,1.046095,Sayreville,6.196405,35 mins,40.5598,-74.3456
1810757,5 Sharp Road,Edison,Middlesex,4,2,1.0,465000,,Metuchen,"7 mins, 51 mins",rail,1.557051,Sayreville,4.722426,37 mins,40.5413,-74.3311
1810768,37 Roxy Avenue,Edison,Middlesex,3,2,1.0,460000,,Metropark,"8 mins, 38 mins",rail,1.973808,Watchung Park & Ride,4.792475,37 mins,40.5927,-74.3496
1805273,14 Mayling Court,Edison,Middlesex,4,2,1.0,417000,,Metuchen,"4 mins, 51 mins",rail,0.832383,Sayreville,5.443685,38 mins,40.5471,-74.3472
1810817,703 New Dover Road,Edison,Middlesex,3,3,0.0,415000,,Metropark,"6 mins, 38 mins",rail,1.321405,Watchung Park & Ride,5.447293,36 mins,40.5852,-74.342
1808831,58 Morris Avenue,Edison,Middlesex,4,2,0.0,375000,,Metuchen,"5 mins, 51 mins",rail,1.222795,Sayreville,4.29978,38 mins,40.5241,-74.3526
1806244,38 Henry Street,Edison,Middlesex,4,2,0.0,365000,,Metropark,"5 mins, 38 mins",rail,0.686462,Carteret,5.914199,35 mins,40.5754,-74.339
1808119,77 Sixth Street,Edison,Middlesex,3,2,1.0,329900,,Metuchen,"6 mins, 51 mins",rail,1.433108,Sayreville,4.488425,37 mins,40.5367,-74.334


In [14]:
import folium

home = (40.8961863, -74.1726829,)

m = folium.Map(location=home, zoom_start=10)

folium.Marker(home, tooltip=f'<i>115 Dumont Ave. Clifton, NJ</i>', icon=folium.Icon(color='green')).add_to(m)

for id, row in df.iterrows():
    url = f"https://cjmls.herokuapp.com/properties/{id}"
    address = row.address + ", " + row.city
    popup = f"""
    Address: {address}<br/>
    MLS: <a target="_blank" href="{url}">{id}</a><br/>
    Price: {row.price}<br/>
    Dad (Park and Ride): {row.park_and_ride_duration_text}
    Dad (Walk and Ride): {row.nyc_duration_text}
    Mom: {row.barnabas_duration_text}
    """
    folium.Marker((row.lat, row.lng,), popup=f'<i>{popup}</i>', tooltip=f'<i>{address}</i>').add_to(m)

m