# GSMLS

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import json
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from IPython.core.display import display, HTML
from concurrent.futures import ThreadPoolExecutor

pd.set_option('display.max_rows', None)

def preview_gsmls_df(mydf, save=False):
    columns = [
        'address', 'city', 'county', 'bedrooms', 'baths_full', 'baths_part',
        'sqft', 'price', 'tax', 'style', 'rooms', 'park_and_ride_name',
        'park_and_ride_duration_text', 'park_and_ride_type', 'park_and_ride_distance',
        'park_and_ride_bus_name', 'park_and_ride_bus_distance',
        'barnabas_duration_text', 'nyc_duration_text', 'lat', 'lng',]
    if save:
        mydf[columns].to_csv('gsmls.csv')
    display(HTML(
        """<style>.dataframe td {white-space: nowrap;}</style>""" +
        mydf[columns].to_html(
            formatters={
                '__index__':
                    lambda id: f'<a target="_blank" href="https://gsmls.herokuapp.com/properties/{id}">{id}</a>'},
                        escape=False)))

**Import global settings**

In [3]:
from settings import (blacklist_cities, counties, ONE_HOUR, HALF_HOUR, TEN_MINUTES,
                      MIN_PRICE, MAX_PRICE, MAX_TAX, MIN_BEDS, MIN_BATHS,)

print(MIN_PRICE, MAX_PRICE, MAX_TAX, MIN_BEDS, MIN_BATHS,)

300000 550000 16000 4 3


**Download listings from [GSMLS](https://www.gsmls.com/)**

In [4]:
from gsmls import get_listings, GSMLSException

listings = []
for county in tqdm(counties):

    try:
        current_listings1 = get_listings(county,
                               min_list_price=MIN_PRICE,
                               max_list_price=int((MAX_PRICE+MIN_PRICE)/2-1),
                               min_bedrooms=MIN_BEDS,
                               min_bathrooms=MIN_BATHS)
    except GSMLSException as e:
        print(e)

    try:
        current_listings2 = get_listings(county,
                               min_list_price=int((MAX_PRICE+MIN_PRICE)/2),
                               max_list_price=MAX_PRICE,
                               min_bedrooms=MIN_BEDS,
                               min_bathrooms=MIN_BATHS)
    except GSMLSException as e:
        print(e)

    current_listings = current_listings1 + current_listings2

    try:
        assert len(set([x['id'] for x in current_listings])) == len(current_listings)
    except AssertionError:
        raise Exception(f"expected unique results from current_listings1 ({len(current_listings1)}) "
                        f"and current_listings2 {len(current_listings2)} of county {county}")

    listings += current_listings

print(f"Downloaded {len(listings)} listings from GSMLS.")
listings = [x for x in listings if x['address']]
print(f"Filtered down to {len(listings)} listings due to null address.")

your search returned no records in Hudson with payload {'idxId': '', 'token': '', 'minlistprice': 300000, 'maxlistprice': 424999, 'minbedrooms': 4, 'minbaths': 3, 'minacres': '', 'maxacres': '', 'lotdesc': '', 'Search': 'Search', 'countycode': 18, 'countyname': 'Hudson', 'propertytype': 'RES', 'propertytypedesc': 'Residential', 'transactionsought': 'purchase', 'sttowns': '1801,1802,1803,1804,1805,1806,1807,1808,1809,1810,1811,1812'}

Downloaded 398 listings from GSMLS.
Filtered down to 369 listings due to null address.


**Geocode each address using Google Maps API**

In [5]:
from geolocate import add_geocode_to_listing
for listing in tqdm(listings):
    if not listing.get('address'): continue
    add_geocode_to_listing(listing)

could not geocode: 19 Durban Rd. 3 UNITS Hopatcong Boro, NJ or 19 Durban Rd. 3 UNITS Hopatcong, NJ



**Find closest NJ Transit Park and Ride to each address**

In [6]:
from commute import add_closest_park_and_ride_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_closest_park_and_ride_to_listing(listing)




**Get commute time for each address through both park and ride and walking**

In [7]:
from commute import add_commute_to_listing

for i in tqdm(range(len(listings))):
    listing = listings[i]
    add_commute_to_listing(listing)

print(sum(1 for listing in listings if listing.get('park_and_ride_duration')),
      "out of", len(listings), "have park and ride commute times.")

print(sum(1 for listing in listings if listing.get('nyc_duration')),
  "out of", len(listings), "have walking commute times.")


368 out of 369 have park and ride commute times.
255 out of 369 have walking commute times.


In [8]:
gsmls_filters = lambda x: ((x.address.notnull()) &
                            (~x.city.isin(blacklist_cities)) &
                            (x.barnabas_duration < HALF_HOUR + TEN_MINUTES) &                           
                            ((x.park_and_ride_duration1 < TEN_MINUTES) & (x.park_and_ride_duration < ONE_HOUR + TEN_MINUTES) | (x.nyc_duration < ONE_HOUR + TEN_MINUTES)) &
                            (~x['style'].str.contains('Townhouse', case=False)) &
                            (~x['style'].str.contains('Bi-Level', case=False)) &
                            (~x['style'].str.contains('Cape Cod', case=False)) &
                            (~x['style'].str.contains('One Floor Unit', case=False)) &
                            (~x['style'].str.contains('Multi Floor Unit', case=False)) &
                            (~x['style'].str.contains('1/2 Duplex', case=False)) &
                            (~x.heat_source.str.contains('oil', case=False, na=True)) &
                            # (~x.heat_system.str.contains('radiator', case=False, na=True)) &
                            # (x.cool_system.str.contains('central', case=False, na=True)) &
                            (~x.water.str.contains('well', case=False, na=True))
                            # (~x.sewer.str.contains('septic', case=False, na=True))
                          )

def show_gsmls():
    df = pd.DataFrame(listings)
    df.set_index('id', inplace=True)
    df = df[gsmls_filters(df)]
    df = df.sort_values(by=['park_and_ride_duration', 'park_and_ride_distance'], ascending=[True, True])
    return df

In [9]:
df = show_gsmls()
df = df[(df.tax < MAX_TAX)]
df = df.sort_values(['city', 'sqft'], ascending=[True, False])
print(len(df), "listings after filtering")
preview_gsmls_df(df)

61 listings after filtering


Unnamed: 0_level_0,address,city,county,bedrooms,baths_full,baths_part,sqft,price,tax,style,rooms,park_and_ride_name,park_and_ride_duration_text,park_and_ride_type,park_and_ride_distance,park_and_ride_bus_name,park_and_ride_bus_distance,barnabas_duration_text,nyc_duration_text,lat,lng
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3434778,79 Winding Ln,Bloomfield,Essex,4,3,0,3054.0,489000,13683,Split Level,11,Watchung Avenue,"6 mins, 45 mins",rail,1.334357,Allwood Road,1.641766,24 mins,56 mins,40.830521,-74.181507
3437585,1681 Raritan Rd,Clark,Union,5,3,1,,549999,14761,Colonial,10,Cranford,"6 mins, 44 mins",rail,1.272401,Rutgers Lane Hospital - Union Twp,2.775131,26 mins,1 hour 21 mins,40.639,-74.292477
3438608,103 Notch Rd,Clifton,Passaic,5,3,0,,495000,12140,Colonial,7,Allwood Road,"3 mins, 33 mins",bus,0.696957,Allwood Road,0.696957,25 mins,36 mins,40.863733,-74.177815
3429872,9 Lynn Dr,Clifton,Passaic,4,3,1,,519900,14975,Split Level,10,Allwood Road,"3 mins, 33 mins",bus,0.608936,Allwood Road,0.608936,27 mins,51 mins,40.861,-74.16844
3409537,118 W 2nd St,Clifton,Passaic,4,3,0,,379000,7469,Colonial,9,Clifton,"6 mins, 44 mins",rail,1.40358,Passaic Bus Terminal,2.239075,27 mins,51 mins,40.888326,-74.154123
3409537,118 W 2nd St,Clifton,Passaic,4,3,0,,379000,7469,Colonial,9,Clifton,"6 mins, 44 mins",rail,1.40358,Passaic Bus Terminal,2.239075,27 mins,51 mins,40.888326,-74.154123
3438920,25 Ruth Ave,Clifton,Passaic,4,3,0,,499000,9310,Custom Home,9,Clifton Commons,"3 mins, 47 mins",bus,0.420376,Clifton Commons,0.420376,27 mins,44 mins,40.8317,-74.146318
3424669,3 Trella Ter,Clifton,Passaic,4,3,0,,499000,13539,"Custom Home, Ranch, Raised Ranch",10,Clifton,"8 mins, 44 mins",rail,1.260493,Allwood Road,1.546734,28 mins,47 mins,40.876239,-74.174675
3435496,39 Woodlawn Ave,Clifton,Passaic,5,3,1,,535000,15508,Colonial,9,Montclair Heights,"3 mins, 52 mins",rail,0.547339,Allwood Road,0.906385,27 mins,55 mins,40.856621,-74.192116
3426938,1117 N Washington Ave,Green Brook Township,Somerset,4,3,1,2500.0,549900,9089,"Contemporary, Custom Home",9,Dunellen,"5 mins, 1 hour 0 mins",rail,1.306731,Watchung Park & Ride,4.508905,30 mins,1 hour 14 mins,40.605383,-74.479002


In [10]:
import folium

home = (40.8961863, -74.1726829,)

m = folium.Map(location=home, zoom_start=12)

folium.Marker(home, tooltip=f'<i>115 Dumont Ave. Clifton, NJ</i>', icon=folium.Icon(color='green')).add_to(m)

for id, row in df.iterrows():
    url = f"https://gsmls.herokuapp.com/properties/{id}"
    address = row.address + ", " + row.city
    popup = f"""
    Address: {address}<br/>
    MLS: <a target="_blank" href="{url}">{id}</a><br/>
    Price: {row.price}<br/>
    Bedrooms: {row.bedrooms}<br/>
    Baths Full: {row.baths_full}<br/>
    Baths Part: {row.baths_full}<br/>
    Tax: {row.tax}<br/>
    Dad (Park and Ride): {row.park_and_ride_duration_text}<br/>
    Dad (Walk and Ride): {row.nyc_duration_text}<br/>
    Mom: {row.barnabas_duration_text}
    """
    folium.Marker((row.lat, row.lng,), popup=f'<i>{popup}</i>', tooltip=f'<i>{address}</i>').add_to(m)

folium.GeoJson('Tran_railroad_passenger.json', name='geojson',).add_to(m)

with open('Tran_railroad_station.json') as f:
    data = json.load(f)
    for station in data['features']:
        lng, lat = station['geometry']['coordinates']
        name = station['properties']['STATION']
        rail_line = station['properties']['RAIL_LINE']
        municipal_label = station['properties']['MUN_LABEL']
        county = station['properties']['COUNTY']
        folium.CircleMarker((lat,lng,), radius=2, color='red', tooltip=f"{name}").add_to(m)

m.save('gsmls-map.html')