In [107]:
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import pandas as pd 
import numpy as np
import geopandas as gpd
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
import math

In [None]:
def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width='100%', height='500px')

# Exercise

**Nominatim** refers to the geocoding software that will be used to generate locations.

In [18]:
geolocator = Nominatim(user_agent="zoux@usc.edu")

location = geolocator.geocode("Pyramid of Khufu",timeout=None)

print(location.point)
print(location.address)

29 58m 44.976s N, 31 8m 3.17625s E
هرم خوفو, شارع ابو الهول السياحي, نزلة البطران, الجيزة, 12125, مصر


In [3]:
point = location.point
print("Latitude:", point.latitude)
print("Longitude:", point.longitude)

Latitude: 29.97916
Longitude: 31.134215625236113


In [32]:
universities = pd.read_csv("geospatial-learn-course-data/top_universities.csv")
universities.head()

Unnamed: 0,Name
0,University of Oxford
1,University of Cambridge
2,Imperial College London
3,ETH Zurich
4,UCL


In [33]:
def my_geocoder(row):
    try:
        geolocator = Nominatim(user_agent="zoux@usc.edu")
        point = geolocator.geocode(row,timeout=None).point
        return pd.Series({'Latitude':point.latitude,'Longitude':point.longitude})
    except:
        return None

In [34]:
universities[['Latitude', 'Longitude']] = universities.apply(lambda x: my_geocoder(x['Name']), axis=1)

In [35]:
print("{}% of addresses were geocoded!".format((1 - sum(np.isnan(universities["Latitude"])) / len(universities)) * 100))

89.0% of addresses were geocoded!


In [36]:
# dropna
universities = universities.loc[~np.isnan(universities["Latitude"])]


In [37]:
universities = gpd.GeoDataFrame(universities, geometry=gpd.points_from_xy(universities.Longitude, universities.Latitude))
universities.crs = {'init': 'epsg:4326'}
universities.head()

  return GeometryArray(vectorized.points_from_xy(x, y, z))
  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,Name,Latitude,Longitude,geometry
0,University of Oxford,51.758879,-1.259603,POINT (-1.25960 51.75888)
1,University of Cambridge,52.200623,0.110474,POINT (0.11047 52.20062)
2,Imperial College London,51.498959,-0.175641,POINT (-0.17564 51.49896)
3,ETH Zurich,47.562772,7.580947,POINT (7.58095 47.56277)
4,UCL,51.521785,-0.135151,POINT (-0.13515 51.52179)


In [42]:
# Create a map
m = folium.Map(location=[54, 15], tiles='openstreetmap', zoom_start=2)

# Add points to the map
for idx, row in universities.iterrows():
    Marker([row['Latitude'], row['Longitude']], popup=row['Name']).add_to(m)

# Display the map
m

## Join Tables

In [44]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
europe = world.loc[world.continent == 'Europe'].reset_index(drop=True)

europe_stats = europe[["name", "pop_est", "gdp_md_est"]]
europe_boundaries = europe[["name", "geometry"]]

  aout[:] = out


In [45]:
europe_boundaries.head()

Unnamed: 0,name,geometry
0,Russia,"MULTIPOLYGON (((178.725 71.099, 180.000 71.516..."
1,Norway,"MULTIPOLYGON (((15.143 79.674, 15.523 80.016, ..."
2,France,"MULTIPOLYGON (((-51.658 4.156, -52.249 3.241, ..."
3,Sweden,"POLYGON ((11.027 58.856, 11.468 59.432, 12.300..."
4,Belarus,"POLYGON ((28.177 56.169, 29.230 55.918, 29.372..."


In [46]:
# Use an attribute join to merge data about countries in Europe
europe = europe_boundaries.merge(europe_stats, on="name")
europe.head()

Unnamed: 0,name,geometry,pop_est,gdp_md_est
0,Russia,"MULTIPOLYGON (((178.725 71.099, 180.000 71.516...",142257519,3745000.0
1,Norway,"MULTIPOLYGON (((15.143 79.674, 15.523 80.016, ...",5320045,364700.0
2,France,"MULTIPOLYGON (((-51.658 4.156, -52.249 3.241, ...",67106161,2699000.0
3,Sweden,"POLYGON ((11.027 58.856, 11.468 59.432, 12.300...",9960487,498100.0
4,Belarus,"POLYGON ((28.177 56.169, 29.230 55.918, 29.372...",9549747,165400.0


In [48]:
# Use spatial join to match universities to countries in Europe
european_universities = gpd.sjoin(universities, europe)
european_universities.head()

  warn(
  r_idx = np.concatenate([[i] * len(v) for i, v in idxmatch.iteritems()])


Unnamed: 0,Name,Latitude,Longitude,geometry,index_right,name,pop_est,gdp_md_est
0,University of Oxford,51.758879,-1.259603,POINT (-1.25960 51.75888),28,United Kingdom,64769452,2788000.0
1,University of Cambridge,52.200623,0.110474,POINT (0.11047 52.20062),28,United Kingdom,64769452,2788000.0
2,Imperial College London,51.498959,-0.175641,POINT (-0.17564 51.49896),28,United Kingdom,64769452,2788000.0
4,UCL,51.521785,-0.135151,POINT (-0.13515 51.52179),28,United Kingdom,64769452,2788000.0
5,London School of Economics and Political Science,51.514211,-0.116808,POINT (-0.11681 51.51421),28,United Kingdom,64769452,2788000.0


# Starbucks

## Geocode the missing locations.

In [79]:
# Load and preview Starbucks locations in California
starbucks = pd.read_csv("geospatial-learn-course-data/starbucks_locations.csv")
starbucks.head()

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
0,10429-100710,Palmdale & Hwy 395,14136 US Hwy 395 Adelanto CA,Adelanto,-117.4,34.51
1,635-352,Kanan & Thousand Oaks,5827 Kanan Road Agoura CA,Agoura,-118.76,34.16
2,74510-27669,Vons-Agoura Hills #2001,5671 Kanan Rd. Agoura Hills CA,Agoura Hills,-118.76,34.15
3,29839-255026,Target Anaheim T-0677,8148 E SANTA ANA CANYON ROAD AHAHEIM CA,AHAHEIM,-117.75,33.87
4,23463-230284,Safeway - Alameda 3281,2600 5th Street Alameda CA,Alameda,-122.28,37.79


In [80]:
starbucks.isnull().sum()

Store Number    0
Store Name      0
Address         0
City            0
Longitude       5
Latitude        5
dtype: int64

In [81]:
starbucks.loc[starbucks.isnull()['Longitude']==True]

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,,
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,,
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,,
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,,
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,,


In [82]:
def geo_stb(i):
    try: 
        geolocator = Nominatim(user_agent="zoux@usc.edu")
        location = geolocator.geocode(i,timeout=None)
        point = location.point
        return pd.Series({'Latitude':point.latitude,'Longitude':point.longitude})
    except:
        return None

In [83]:
starbucks.loc[starbucks.isnull()['Longitude']==True,['Longitude','Latitude']] = \
starbucks.loc[starbucks.isnull()['Longitude']==True].apply(lambda x: geo_stb(x['Address']), axis=1)

In [84]:
starbucks.isnull().sum()

Store Number    0
Store Name      0
Address         0
City            0
Longitude       0
Latitude        0
dtype: int64

## View Berkeley locations

In [86]:
starbucks = gpd.GeoDataFrame(starbucks, geometry=gpd.points_from_xy(starbucks.Longitude, starbucks.Latitude))
starbucks.crs = {'init': 'epsg:4326'}
starbucks.head()

  return GeometryArray(vectorized.points_from_xy(x, y, z))
  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude,geometry
0,10429-100710,Palmdale & Hwy 395,14136 US Hwy 395 Adelanto CA,Adelanto,-117.4,34.51,POINT (-117.40000 34.51000)
1,635-352,Kanan & Thousand Oaks,5827 Kanan Road Agoura CA,Agoura,-118.76,34.16,POINT (-118.76000 34.16000)
2,74510-27669,Vons-Agoura Hills #2001,5671 Kanan Rd. Agoura Hills CA,Agoura Hills,-118.76,34.15,POINT (-118.76000 34.15000)
3,29839-255026,Target Anaheim T-0677,8148 E SANTA ANA CANYON ROAD AHAHEIM CA,AHAHEIM,-117.75,33.87,POINT (-117.75000 33.87000)
4,23463-230284,Safeway - Alameda 3281,2600 5th Street Alameda CA,Alameda,-122.28,37.79,POINT (-122.28000 37.79000)


In [89]:
m_2 = folium.Map(location=[37.88,-122.26], zoom_start=13)


# Add points to the map
for idx, row in starbucks.iterrows():
    Marker([row['Latitude'], row['Longitude']], popup=row['Store Name']).add_to(m_2)

# Display the map
m_2

In [90]:
## Consoilidate the data

In [91]:
CA_counties = gpd.read_file("geospatial-learn-course-data/CA_county_boundaries/CA_county_boundaries/CA_county_boundaries.shp")
CA_counties.head()

  aout[:] = out


Unnamed: 0,GEOID,name,area_sqkm,geometry
0,6091,Sierra County,2491.995494,"POLYGON ((-120.65560 39.69357, -120.65554 39.6..."
1,6067,Sacramento County,2575.258262,"POLYGON ((-121.18858 38.71431, -121.18732 38.7..."
2,6083,Santa Barbara County,9813.817958,"MULTIPOLYGON (((-120.58191 34.09856, -120.5822..."
3,6009,Calaveras County,2685.626726,"POLYGON ((-120.63095 38.34111, -120.63058 38.3..."
4,6111,Ventura County,5719.321379,"MULTIPOLYGON (((-119.63631 33.27304, -119.6360..."


In [92]:
CA_pop = pd.read_csv("geospatial-learn-course-data/CA_county_population.csv", index_col="GEOID")
CA_high_earners = pd.read_csv("geospatial-learn-course-data/CA_county_high_earners.csv", index_col="GEOID")
CA_median_age = pd.read_csv("geospatial-learn-course-data/CA_county_median_age.csv", index_col="GEOID")

In [97]:
cols = CA_pop.join([CA_high_earners, CA_median_age]).reset_index()
CA = CA_counties.merge(cols, on="GEOID")
CA.head()

Unnamed: 0,GEOID,name,area_sqkm,geometry,population,high_earners,median_age
0,6091,Sierra County,2491.995494,"POLYGON ((-120.65560 39.69357, -120.65554 39.6...",2987,111,55.0
1,6067,Sacramento County,2575.258262,"POLYGON ((-121.18858 38.71431, -121.18732 38.7...",1540975,65768,35.9
2,6083,Santa Barbara County,9813.817958,"MULTIPOLYGON (((-120.58191 34.09856, -120.5822...",446527,25231,33.7
3,6009,Calaveras County,2685.626726,"POLYGON ((-120.63095 38.34111, -120.63058 38.3...",45602,2046,51.6
4,6111,Ventura County,5719.321379,"MULTIPOLYGON (((-119.63631 33.27304, -119.6360...",850967,57121,37.5


In [98]:
CA["density"] = CA["population"] / CA["area_sqkm"]

## Promising Counties

In [99]:
prom_counties = CA[((CA.high_earners > 100000) &
                         (CA.median_age < 38.5) &
                         (CA.density > 285) &
                         ((CA.median_age < 35.5) |
                         (CA.density > 1400) |
                         (CA.high_earners > 500000)))]

In [100]:
prom_counties.head()

Unnamed: 0,GEOID,name,area_sqkm,geometry,population,high_earners,median_age,density
5,6037,Los Angeles County,12305.376879,"MULTIPOLYGON (((-118.66761 33.47749, -118.6682...",10105518,501413,36.0,821.227834
8,6073,San Diego County,11721.342229,"POLYGON ((-117.43744 33.17953, -117.44955 33.1...",3343364,194676,35.4,285.237299
10,6075,San Francisco County,600.588247,"MULTIPOLYGON (((-122.60025 37.80249, -122.6123...",883305,114989,38.3,1470.733077


In [104]:
prom_loc = gpd.sjoin(starbucks, prom_counties)
len(prom_loc)

  warn(
  l_idx = np.concatenate([[i] * len(v) for i, v in idxmatch.iteritems()])


1043

## Visualization

In [113]:
m_3 = folium.Map(location=[37,-120], zoom_start=6)

mc = MarkerCluster()
# Add points to the map
for idx, row in prom_loc.iterrows():
    if not np.isnan(row['Longitude']) and not np.isnan(row['Latitude']):
        mc.add_child(folium.Marker([row['Latitude'], row['Longitude']]))

m_3.add_child(mc)
# Display the map
m_3