# Mapping!

A great way to quickly visualise geographic patterns in data. Let's get started!

In [1]:
import mapfuncs as mf

In [2]:
import pandas as pd
map_base = pd.read_csv('../DATA/RealEstate2.csv')

In [3]:
map_base['url3'] = [f'<a href = "{x}" target = "_blank">Original Source</a>' for x in map_base['url']]

In [4]:
import numpy as np
map_base['price_group'] = [str(np.round(x/50000)*50)[:-2] for x in map_base.max_price]

In [10]:
mf.create_map(map_base[:50], 
             lat_col = 'lat',
             lon_col = 'lon', 
             group_col = 'price_group', zoom = 14, disable_cluster = 8,
             strings = ['add', 'bed', 'bath', 'car', 'headline', 'desc', 'url3'],
             sort_col = 'max_price', photo_col = 'photo')

In [11]:
# from geopy.geocoders import Nominatim
# from geopy.exc import GeocoderTimedOut
# geolocator = Nominatim(user_agent="hmm")

# def do_geocode(place):
#     try:
#         return geolocator.geocode(place, timeout=600)
#     except GeocoderTimedOut:
#         return geolocator.geocode(place, timeout=600)

In [12]:
# dump = do_geocode('Hornsby Station').raw
# slat = dump.get('lat')
# slon = dump.get('lon')

In [21]:
# import folium
# h = folium.Marker(location=[slat,slon],
#                       icon = folium.Icon(color = 'red'))

# m.add_child(h)
# f.add_child(m)
# f

<hr>

In [17]:
stats = pd.read_csv('../DATA/StationEntrances2020_v4.csv')

In [20]:
stats.groupby('Train_Station').agg({'LAT':'mean', 'LONG':'mean'}).reset_index()

Unnamed: 0,Train_Station,LAT,LONG
0,Aberdeen,-32.166893,150.891966
1,Adamstown,-32.933587,151.720350
2,Albion Park,-34.563160,150.798893
3,Allawah,-33.969752,151.114576
4,Arlington LR,-33.902047,151.138101
...,...,...,...
346,Wynyard LR,-33.866706,151.207168
347,Wyong,-33.285247,151.425382
348,Yagoona,-33.907331,151.024677
349,Yennora,-33.864880,150.970878


In [None]:
##Build Map    
def map(map_df, 
            lat_col = 'LATITUDE', lon_col = 'LONGITUDE', 
            group_col = 'PLEASE SELECT', #e.g. 'PRIMARY_TECHNOLOGY'
            string_var = [], #e.g. ['LOCATION_ID', 'PRIMARY_TECHNOLOGY']
            
            height = 900,
            zoom = 10,
            colour_shift = 0,
            disable_cluster = None,
            max_zoom = 18,
            
            save_map_name = "",
            disable_legend = False,
            geocode = False,
            
            icon_func = icons, 
            popup_func = popup
            ):
    
    import folium
#     from folium.plugins import MarkerCluster
    
    
    height = height
    zoom = zoom
    if disable_cluster is None:
        disable_cluster = zoom+2
    
    
    focus = map_df.groupby(lambda _ : True).agg({lat_col:'mean', lon_col:'mean'}).iloc[0]
    lat,lon = focus[lat_col], focus[lon_col] 
    
    icns, clrs, legend_html = icon_func(map_df=map_df, 
                                        group_col = group_col, 
                                        colour_shift = colour_shift)
    
   
    f = folium.Figure(height=height)
    m = folium.Map(location=[lat, lon], zoom_start = zoom,  max_zoom=max_zoom)
    marker_cluster = MarkerCluster(options = {'disableClusteringAtZoom':disable_cluster}) 

    
    if(geocode==True):
    
        from geopy.geocoders import Nominatim
        from geopy.exc import GeocoderTimedOut
        geolocator = Nominatim(user_agent="hmm")
    
        def do_geocode(lat, lon):
            try:
                return geolocator.reverse("{}, {}".format(lat, lon), timeout=600)
            except GeocoderTimedOut:
                return geolocator.reverse("{}, {}".format(lat, lon), timeout=600)
    
        if(len(map_df)>500):
            print('- Geocoding may take some time as there are over 500 addresses -\n')
        print("- Reverse Geocoding -")
        map_df.loc[:,('GEOCODE')] = map_df.apply(lambda row:("<br>"+do_geocode(row[lat_col], row[lon_col]).address), axis = 1)
        string_var = string_var+['GEOCODE']
        
    
    
    map_df.apply(lambda row:folium.Marker(location=[row[lat_col], row[lon_col]],
                                            popup = popup_func(row, string_var),
                                            icon = folium.Icon(color=clrs[row[group_col]], 
                                                               icon_color = "white",
                                                               prefix = 'fa',
                                                               icon = icns[row[group_col]])
                                               ).add_to(marker_cluster), axis=1) 


    m.add_child(marker_cluster)
    f.add_child(m)
    
    if(disable_legend == False):
        f.get_root().html.add_child(folium.Element(legend_html))
    
    if(save_map_name != ""):
        f.save("{}.html".format(save_map_name))
    
    return(f)

In [None]:
##To provide some quick mapping options.

##Define what goes in the popup bubble
def popup(row, var = []):
    import folium
    
    """Add row column names to include that information in popup bubbles."""
    st = ""
    for item in var:
        st = st+str(item)+": "+str(row[item])+"<br>"
    
    pop = folium.Popup(folium.IFrame(
                    '<div style="font-family: Arial">{}</div>'.format(str(st)),

                                                    width = 300, height = 120))
    return(pop)
    
    
##Define popup colours and icons
def icons(map_df, group_col, colour_shift = 0):
    """
    Returns icons, icon colours, and an appropriate legend.
    """
    if(colour_shift>18):
        print("To big of colour spectrum shift")
        return
    
    fa = ['random', 'cog', 'magnet', 'pencil', 'wrench', 'signal',  
          'tag', 'certificate', 'volume-off',  'th-large', 
          'map-marker', 'plus', 
         'file', 'refresh', 'align-center', 'link', 
     'repeat', 'th', 'heart', 'briefcase']
    
    cols = ['red', 'blue', 'green', 'purple', 'orange', 'darkred',
                 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
                 'darkpurple', 'pink', 'lightblue', 'lightgreen',
                 'gray', 'black', 'lightgray']*2
    
    ics = map_df[group_col].unique()
    
    if(len(ics)>len(fa)):
        print("Too many groups")
        return
    
    
    icns = dict(zip(ics,
                fa[:len(ics)]))
    clrs = dict(zip(ics, 
                cols[colour_shift:(colour_shift+len(ics))]))
    
    leg = ""
    for key, item in icns.items():
        leg = leg + """{}&nbsp; <i class="fa fa-{} fa-2x"
                       style="color:{}"></i><br><br>""".format(key, item, clrs[key])
    legh = len(icns.items())*56
    
    legend_html = """
         <div style="position: fixed; 
         bottom: 25px; left: 25px; width: 120px; height: {}px; text-align:center;
         border:2px solid grey; background-color: white; z-index:9999; font-size:14px;
         "><h4 style = "text-align:center">Legend</h4>
         {}
          </div>
         """.format(legh, leg[:-8])
    
    return(icns, clrs, legend_html)
    
    
##Build Map    
def map(map_df, 
            lat_col = 'LATITUDE', lon_col = 'LONGITUDE', 
            group_col = 'PLEASE SELECT', #e.g. 'PRIMARY_TECHNOLOGY'
            string_var = [], #e.g. ['LOCATION_ID', 'PRIMARY_TECHNOLOGY']
            
            height = 900,
            zoom = 10,
            colour_shift = 0,
            disable_cluster = None,
            max_zoom = 18,
            
            save_map_name = "",
            disable_legend = False,
            geocode = False,
            
            icon_func = icons, 
            popup_func = popup
            ):
    """
    An attempt to speed up mapping in folium. May need further abstraction, or more 
    function exposure. Simply have a pandas dataframe with lat/lons, and a grouping 
    column (for icon colouring etc) to produce a useful map. Can also choose row 
    variables to put in the icons' popups, save the map, and disable clustering.
    
    Allows you to use your own custom popup functions.
    """
    
    import folium
    from folium.plugins import MarkerCluster
    
    
    height = height
    zoom = zoom
    if disable_cluster is None:
        disable_cluster = zoom+2
    
    
    focus = map_df.groupby(lambda _ : True).agg({lat_col:'mean', lon_col:'mean'}).iloc[0]
    lat,lon = focus[lat_col], focus[lon_col] 
    
    icns, clrs, legend_html = icon_func(map_df=map_df, 
                                        group_col = group_col, 
                                        colour_shift = colour_shift)
    
   
    f = folium.Figure(height=height)
    m = folium.Map(location=[lat, lon], zoom_start = zoom,  max_zoom=max_zoom)
    marker_cluster = MarkerCluster(options = {'disableClusteringAtZoom':disable_cluster}) 

    
    if(geocode==True):
    
        from geopy.geocoders import Nominatim
        from geopy.exc import GeocoderTimedOut
        geolocator = Nominatim(user_agent="hmm")
    
        def do_geocode(lat, lon):
            try:
                return geolocator.reverse("{}, {}".format(lat, lon), timeout=600)
            except GeocoderTimedOut:
                return geolocator.reverse("{}, {}".format(lat, lon), timeout=600)
    
        if(len(map_df)>500):
            print('- Geocoding may take some time as there are over 500 addresses -\n')
        print("- Reverse Geocoding -")
        map_df.loc[:,('GEOCODE')] = map_df.apply(lambda row:("<br>"+do_geocode(row[lat_col], row[lon_col]).address), axis = 1)
        string_var = string_var+['GEOCODE']
        
    
    
    map_df.apply(lambda row:folium.Marker(location=[row[lat_col], row[lon_col]],
                                            popup = popup_func(row, string_var),
                                            icon = folium.Icon(color=clrs[row[group_col]], 
                                                               icon_color = "white",
                                                               prefix = 'fa',
                                                               icon = icns[row[group_col]])
                                               ).add_to(marker_cluster), axis=1) 


    m.add_child(marker_cluster)
    f.add_child(m)
    
    if(disable_legend == False):
        f.get_root().html.add_child(folium.Element(legend_html))
    
    if(save_map_name != ""):
        f.save("{}.html".format(save_map_name))
    
    return(f)
    
    
    
    
    
    

In [None]:
def path_to_image(row, string_var = []):
    import folium
    from folium import IFrame
    import base64
    from io import BytesIO
    from PIL import Image as IM
    import numpy as np
    
    """Add row column names to include that information in popup bubbles."""
    st = ""
    for item in string_var:
        st = st+str(item.capitalize())+": "+str(row[item])+"<br>"
    
        
    image_object = work_bucket.Object(row.img)
    in_mem =    io.BytesIO(image_object.get()['Body'].read())
    img = IM.open(in_mem)
    h,w = img.size
        
    img.thumbnail((np.round(h/10), np.round(w/10)))
    imgByteArr = io.BytesIO()
    img.save(imgByteArr, format='PNG', optimize=True,quality=10)
    
    
    img_str = base64.b64encode(imgByteArr.getvalue()).decode()
    #base64.b64encode(open("{}".format(row.file), 'rb').read())
    pop = folium.Popup(IFrame(
                '<div style="font-family: Arial">{}<br><img src="data:image/{};base64,{}">'.format(str(st),
                                                                                               row.img, 
                                                                                          img_str)+
                '</div>',
                                              width = 350, height = 350

                                              ), 
                                                       max_width=2650)
    return(pop)

In [None]:
max_price=starting_max_price
searching_for_price=True

In [None]:
while searching_for_price:
    
    url = "https://api.domain.com.au/v1/listings/residential/_search" # Set destination URL here
    post_fields ={
      "listingType":"Sale",
        "maxPrice":max_price,
        "pageSize":100,
      "propertyTypes":property_type,
      "minBedrooms":bedrooms,
        "maxBedrooms":bedrooms,
      "minBathrooms":bathrooms,
        "maxBathrooms":bathrooms,
      "locations":[
        {
          "state":"",
          "region":"",
          "area":"",
          "suburb":suburb,
          "postCode":postcode,
          "includeSurroundingSuburbs":False
        }
      ]
    }

    request = requests.post(url,headers=auth,json=post_fields, verify = False)

    l=request.json()
    listings = []
    for listing in l:
        listings.append(listing["listing"]["id"])
    listings

    if (int(property_id) in listings)|(max_price>1500000):
            max_price=max_price-increment
            print("Lower bound found: ", max_price)
            searching_for_price=False
    else:
        max_price=max_price+increment
        print("Not found. Increasing max price to ",max_price)
        time.sleep(0.1)  # sleep a bit so you don't make too many API calls too quickly   

In [None]:
searching_for_price=True
if starting_min_price>0:
    min_price=starting_min_price
else:  
    min_price=max_price+400000  

In [None]:
while searching_for_price:
    
    url = "https://api.domain.com.au/v1/listings/residential/_search" # Set destination URL here
    post_fields ={
      "listingType":"Sale",
        "minPrice":min_price,
        "pageSize":100,
      "propertyTypes":property_type,
      "minBedrooms":bedrooms,
        "maxBedrooms":bedrooms,
      "minBathrooms":bathrooms,
        "maxBathrooms":bathrooms,
      "locations":[
        {
          "state":"",
          "region":"",
          "area":"",
          "suburb":suburb,
          "postCode":postcode,
          "includeSurroundingSuburbs":False
        }
      ]
    }

    request = requests.post(url,headers=auth,json=post_fields, verify = False)

    l=request.json()
    listings = []
    for listing in l:
        listings.append(listing["listing"]["id"])
    listings

    if int(property_id) in listings:
            min_price=min_price+increment
            print("Upper bound found: ", min_price)
            searching_for_price=False
    else:
        min_price=min_price-increment
        print("Not found. Decreasing min price to ",min_price)
        time.sleep(0.1)  # sleep a bit so you don't make too many API calls too quickly

In [None]:
if max_price<1000000:
    lower=min_price/1000
    upper=max_price/1000
    denom="k"
else: 
    lower=min_price/1000000
    upper=max_price/1000000
    denom="m"

In [None]:
print(da['displayAddress'])
print(r['headline'])
print("Property Type:",property_type_str)
print("Details: ",int(bedrooms),"bedroom,",int(bathrooms),"bathroom,",int(carspaces),"carspace")
print("Display price:",r['priceDetails']['displayPrice'])      
if max_price==min_price:
  print("Price guide:","$",lower,denom)
else:
  print("Price range:","$",lower,"-","$",upper,denom)
print("URL:",r['seoUrl'])

In [None]:
# url = "https://api.domain.com.au/v1/listings/residential/_search" # Set destination URL here
# post_fields ={
#       "listingType":"Sale", #Rent
#       "page":1,
#       "propertyType":['apartmentUnitFlat'],
#       "pageSize":4,
#       "locations":[
#         {
#           "state":"NSW",
#           "region":"",
#           "area":"",
#           "suburb":"Parramatta",
#           "postCode":"",
#           "includeSurroundingSuburbs":False
#         }
#       ]
#     }

In [None]:
#get details
da=r['addressParts']
postcode=da['postcode']
suburb=da['suburb']
bathrooms=r['bathrooms']
bedrooms=r['bedrooms']
carspaces=r['carspaces']
property_type=r['propertyTypes']
print(property_type,postcode, suburb, bedrooms, bathrooms,  carspaces)

# the below puts all relevant property types into a single string. eg. a property listing can be a 'house' and a 'townhouse'
n=0
property_type_str=""
for p in r['propertyTypes']:
    property_type_str=property_type_str+(r['propertyTypes'][int(n)])
    n=n+1
print(property_type_str)  

In [None]:
# url = "https://api.domain.com.au/v1/addressLocators?searchLevel=Address&streetNumber=100&streetName=Harris&streetType=Street&suburb=Pyrmont&state=NSW&postcode=2009"
url = "https://api.domain.com.au/v1/addressLocators?searchLevel=Address&unitNumber=3&streetNumber=61&streetName=High&streetType=Street&suburb=Parramatta&state=NSW&postcode=2150"

# url = "https://api.domain.com.au/v1/addressLocators?searchLevel=Address&unitNumber=6&streetNumber=65&streetName=Albert&streetType=Crescent&suburb=Burwood&state=NSW&postcode=2134"
# url = "https://api.domain.com.au/v1/addressLocators?searchLevel=Address&unitNumber=1&streetNumber=27&streetName=Stewart&streetType=Street&suburb=PARRAMATTA&state=NSW&postcode=2150"

# url = """https://api.domain.com.au/v1/addressLocators?searchLevel=Address&suburb=Pyrmont&state=NSW&postcode=2009"""
request = requests.get(url,
                       headers=auth, 
                       verify = False)
request.json()

In [None]:
property_id="2015010473"
starting_max_price=1000000
increment=50000
# when starting min price is zero we'll just use the lower bound plus 400k later on
starting_min_price=0
url = "https://api.domain.com.au/v1/listings/"+property_id
auth = {"accept":"text/json", "Authorization":"Bearer "+access_token}
request = requests.get(url,headers=auth, verify = False)
r=request.json()

In [None]:
prop_id = request.json()[0].get('ids')[0].get('id')
prop_id

In [None]:
url = "https://api.domain.com.au/v1/listings/"+str(prop_id)
auth = {"accept":"text/json", "Authorization":"Bearer "+access_token}
request = requests.get(url,headers=auth, verify = False)
# request
r=request.json()
r

In [None]:
url