# Suburb Venue Profiles and the Sydney Housing Market

### Parse CSV file of Sydney Suburb Names into Pandas Dataframe

In [72]:
# List of suburbs from "http://www.walksydneystreets.net/suburbssydneyall.htm"

In [13]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3
import geopy
!pip install folium
import folium


# The CSV file is uploaded to IBM Watson, which is accessed using my credentials. This is why the next cell is hidden.

In [14]:
# The code was removed by Watson Studio for sharing.

In [15]:
suburbs = df_data_0.T
suburbs.columns = ['Suburb Name']
suburbs.head()

Unnamed: 0,Suburb Name
0,Abbotsbury
1,Abbotsford
2,Acacia Gardens
3,Agnes Banks
4,Airds


### Get coordinates of each suburb using geocoder

locator = geopy.Nominatim(user_agent="Coursera_Capstone")

from geopy.extra.rate_limiter import RateLimiter

geocode = RateLimiter(locator.geocode, min_delay_seconds=2)
suburbs['location'] = suburbs['Address'].apply(geocode)
suburbs['point'] = suburbs['location'].apply(lambda loc: tuple(loc.point) if loc else None)
suburbs[['latitude', 'longitude', 'altitude']] = pd.DataFrame(suburbs['point'].tolist(), index=suburbs.index)

In [24]:
# The code was removed by Watson Studio for sharing.

In [25]:
suburbs.head()

Unnamed: 0,Suburb Name,location,point,latitude,longitude,altitude
0,Abbotsbury,"Abbotsbury, Sydney, Fairfield City Council, Ne...","(-33.8692846, 150.8667029, 0.0)",-33.869285,150.866703,0.0
1,Abbotsford,"Abbotsford, Sydney, City of Canada Bay Council...","(-33.8505529, 151.129759, 0.0)",-33.850553,151.129759,0.0
2,Acacia Gardens,"Acacia Gardens, Sydney, Blacktown City Council...","(-33.7324595, 150.9125321, 0.0)",-33.732459,150.912532,0.0
3,Agnes Banks,"Agnes Banks, Sydney, Hawkesbury City Council, ...","(-33.6145082, 150.7114482, 0.0)",-33.614508,150.711448,0.0
4,Airds,"Airds, Sydney, Campbelltown City Council, New ...","(-34.09, 150.8261111, 0.0)",-34.09,150.826111,0.0


In [26]:
# Drop unneeded columns
suburbs = suburbs.drop(columns={"altitude", "point"})
suburbs.head()

Unnamed: 0,Suburb Name,location,latitude,longitude
0,Abbotsbury,"Abbotsbury, Sydney, Fairfield City Council, Ne...",-33.869285,150.866703
1,Abbotsford,"Abbotsford, Sydney, City of Canada Bay Council...",-33.850553,151.129759
2,Acacia Gardens,"Acacia Gardens, Sydney, Blacktown City Council...",-33.732459,150.912532
3,Agnes Banks,"Agnes Banks, Sydney, Hawkesbury City Council, ...",-33.614508,150.711448
4,Airds,"Airds, Sydney, Campbelltown City Council, New ...",-34.09,150.826111


### Display map of Sydney with markers for each Suburb

In [28]:
map_syd = folium.Map(location=[-33.865143, 151.209900], zoom_start=10)

# add markers to map
for lat, lng, label in zip(suburbs['latitude'], suburbs['longitude'], suburbs['Suburb Name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_syd)  
    
map_syd

In [29]:
# The code was removed by Watson Studio for sharing.

In [39]:
import requests

def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [40]:
sydney_venues = getNearbyVenues(names=suburbs['Suburb Name'],
                                   latitudes=suburbs['latitude'],
                                   longitudes=suburbs['longitude']
                                  )
print(sydney_venues.shape)

KeyError: 'groups'

In [41]:
sydney_venues.head(20)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Abbotsbury,-33.869285,150.866703,Abbotsbury Shops,-33.869554,150.865294,Convenience Store
1,Abbotsbury,-33.869285,150.866703,Stockdale Reserve,-33.871098,150.86806,Park
2,Abbotsbury,-33.869285,150.866703,817 bus stop,-33.869773,150.870122,Bus Station
3,Abbotsford,-33.850553,151.129759,The Cove Dining Co,-33.850933,151.129015,Café
4,Abbotsford,-33.850553,151.129759,The Cove Dining Co,-33.85095,151.12911,Café
5,Abbotsford,-33.850553,151.129759,Sistina Ristorante & Pizzeria,-33.848149,151.128307,Pizza Place
6,Abbotsford,-33.850553,151.129759,Lock Stock And Bottle,-33.84786,151.12797,Wine Shop
7,Abbotsford,-33.850553,151.129759,Cinnamon Thai Cuisine,-33.847697,151.128288,Thai Restaurant
8,Abbotsford,-33.850553,151.129759,Henry Lawson Park,-33.85035,151.125895,Park
9,Alexandria,-33.909157,151.192128,The Grounds Of Alexandria,-33.910774,151.194406,Café


### Develop Suburb Profiles

In [42]:
## todo : sum each type of venue instead of one hot

# one hot encoding
onehot = pd.get_dummies(sydney_venues[['Venue Category']], prefix="", prefix_sep="")

# add suburb name column back to dataframe
onehot['Suburb'] = sydney_venues['Neighborhood'] 

# move suburb column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

In [43]:
onehot.head(10)

Unnamed: 0,Suburb,Advertising Agency,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Antique Shop,Aquarium,Arcade,Arepa Restaurant,...,Video Game Store,Video Store,Vietnamese Restaurant,Waterfront,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Abbotsbury,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Abbotsbury,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Abbotsbury,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Abbotsford,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Abbotsford,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Abbotsford,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Abbotsford,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
7,Abbotsford,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Abbotsford,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Alexandria,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
onehot['Bar'].head()

0    0
1    0
2    0
3    0
4    0
Name: Bar, dtype: uint8