### Installing libraries

In [20]:
!pip install geopy
!pip install geocoder
!pip install folium

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


### Scraping Neighborhood data

In [21]:
address = 'Los Angeles, United States'
url = "https://en.wikipedia.org/wiki/List_of_districts_and_neighborhoods_of_Los_Angeles"
source = requests.get(url).text
soup = BeautifulSoup(source, 'lxml')
neighborhoodList = []

# -- append the data into the list

links = []
for row in soup.find_all("div", class_="div-col"):
    for item in row.select("a"):
        if item.has_attr('title'):
            neighborhoodList.append(item.text)

la_df = pd.DataFrame({"Neighborhood": neighborhoodList})
la_df.head(10)

Unnamed: 0,Neighborhood
0,Adams-Normandie
1,Alsace
2,Angelino Heights
3,Angeles Mesa
4,Angelus Vista
5,Arleta
6,Arlington Heights
7,Arts District
8,Atwater Village
9,Baldwin Hills


In [22]:
la_df.shape

(219, 1)

In [23]:
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Los Angeles, United States'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [24]:
coordinates = [ get_latlng(neighborhood) for neighborhood in la_df["Neighborhood"].tolist()]

In [25]:
coordinates

[[33.901238000000035, -118.29908299999994],
 [33.97902000000005, -118.41417999999999],
 [34.07029000000006, -118.25479999999999],
 [18.05302000000006, -65.87515999999994],
 [34.084375707919705, -118.26919066978292],
 [34.24905000000007, -118.43348999999995],
 [34.03989000000007, -118.32515999999998],
 [34.04193300000003, -118.23644599999994],
 [34.11970000000008, -118.25886999999994],
 [34.021570000000054, -118.36764999999997],
 [34.011716000000035, -118.34072499999996],
 [34.07044548680411, -118.20070975126488],
 [34.07044548680411, -118.20070975126488],
 [34.10915025473649, -118.32045034087673],
 [34.08361000000008, -118.43482999999998],
 [34.12306719490349, -118.43504285082464],
 [34.09653268819107, -118.40329514585207],
 [34.11793000000006, -118.44608999999997],
 [34.09544836979449, -118.42608686997163],
 [34.075496000000044, -118.35588099999995],
 [34.063720000000046, -118.26459999999997],
 [34.05138000000005, -118.40132999999997],
 [34.04004000000003, -118.21049999999997],
 [34.0

In [28]:
df_coords = pd.DataFrame(coordinates, columns=['Latitude', 'Longitude'])
# Merge the coordinates into the original dataframe
la_df['Latitude'] = df_coords['Latitude']
la_df['Longitude'] = df_coords['Longitude']
print(la_df.shape)
la_df

(219, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Adams-Normandie,33.901238,-118.299083
1,Alsace,33.97902,-118.41418
2,Angelino Heights,34.07029,-118.2548
3,Angeles Mesa,18.05302,-65.87516
4,Angelus Vista,34.084376,-118.269191
5,Arleta,34.24905,-118.43349
6,Arlington Heights,34.03989,-118.32516
7,Arts District,34.041933,-118.236446
8,Atwater Village,34.1197,-118.25887
9,Baldwin Hills,34.02157,-118.36765


In [29]:
la_df.dtypes

Neighborhood     object
Latitude        float64
Longitude       float64
dtype: object

In [32]:
la_df.to_csv("la_df.csv", index=False)

In [33]:
address = 'Los Angeles'

geolocator = Nominatim(user_agent = "ExploreLA")
LA_location = geolocator.geocode(address)
LA_latitude = LA_location.latitude
LA_longitude = LA_location.longitude

print('The geograpical coordinates of Los Angeles are {}, {}.'.format(LA_latitude, LA_longitude))

The geograpical coordinates of Los Angeles are 34.0536909, -118.242766.


### Using Folium to plot neighborhoods on the map

In [45]:
mapLA = folium.Map(
    location=[LA_latitude, LA_longitude], 
    tiles='openstreetmap', 
    zoom_start=9.5, 
)

# add markers to map
for lat, lng, neighborhood in zip(la_df['Latitude'], la_df['Longitude'], la_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html = True)
    folium.Marker(
        [lat, lng],
        popup = label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7
    ).add_to(mapLA)

In [46]:
mapLA