In [45]:
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
import random as rd
import warnings
warnings.filterwarnings("ignore")
from geopy.geocoders import Nominatim
import math
from folium.plugins import MarkerCluster

In [3]:
def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width='100%', height='500px')

### Geocode the missing locations

In [9]:
starbucks = pd.read_csv("../../geospatial-learn-course-data/starbucks_locations.csv")
# print(starbucks.isnull().sum())
rows_with_missing = starbucks.loc[starbucks["City"] == "Berkeley"]
display(rows_with_missing)

Unnamed: 0,Store Number,Store Name,Address,City,Longitude,Latitude
153,5406-945,2224 Shattuck - Berkeley,2224 Shattuck Avenue Berkeley CA,Berkeley,,
154,570-512,Solano Ave,1799 Solano Avenue Berkeley CA,Berkeley,,
155,17877-164526,Safeway - Berkeley #691,1444 Shattuck Place Berkeley CA,Berkeley,,
156,19864-202264,Telegraph & Ashby,3001 Telegraph Avenue Berkeley CA,Berkeley,,
157,9217-9253,2128 Oxford St.,2128 Oxford Street Berkeley CA,Berkeley,,


In [11]:
geolocator = Nominatim(user_agent="exercice-kaggle-course")
def my_geocoder(row):
    try:
        point = geolocator.geocode(row).point
        return pd.Series({'Latitude': point.latitude, 'Longitude': point.longitude})
    except:
        return None

rows_with_missing[["Latitude", "Longitude"]] = rows_with_missing.apply(lambda x:my_geocoder(x["Address"]), axis=1)

### View Berkeley location

In [48]:
def showMap(data):
    mc = MarkerCluster()
    map = folium.Map(location=[37.88,-122.26], tiles='openstreetmap', zoom_start=13)
    for _, row in data.iterrows():
        color = f"rgb({rd.randint(0, 255)}, {rd.randint(0, 255)}, {rd.randint(0, 255)})"
        icons = ['fa-ice-cream', 'fa-coffee','fa-beer','fa-cocktail','fa-apple-alt']
        icon = rd.choice(icons)
        if not math.isnan(row['Longitude']) and not math.isnan(row['Latitude']):
            mc.add_child(folium.Marker(location=[row["Latitude"], row["Longitude"]], icon=folium.Icon(color=color, icon=icon, prefix='fa')))
    map.add_child(mc)
    return map
showMap(rows_with_missing)

###  Consolidate your data

In [23]:
CA_counties = gpd.read_file("../../geospatial-learn-course-data/CA_county_boundaries/CA_county_boundaries/CA_county_boundaries.shp")
CA_counties.crs = {'init': 'epsg:4326'}
CA_pop = pd.read_csv("../../geospatial-learn-course-data/CA_county_population.csv", index_col="GEOID")
CA_high_earners = pd.read_csv("../../geospatial-learn-course-data/CA_county_high_earners.csv", index_col="GEOID")
CA_median_age = pd.read_csv("../../geospatial-learn-course-data/CA_county_median_age.csv", index_col="GEOID")

CA_counties = CA_counties.merge(CA_pop, on="GEOID")
CA_counties = CA_counties.merge(CA_high_earners, on="GEOID")
CA_stats = CA_counties.merge(CA_median_age,on="GEOID")

In [24]:
CA_stats["density"] = CA_stats["population"] / CA_stats["area_sqkm"]

### Which counties look promising ?

In [33]:
sel_counties = CA_stats.loc[((CA_stats["median_age"] < 38.5) & (CA_stats["density"] > 285) & (CA_stats["high_earners"] >100000) & ((CA_stats["median_age"] < 35.5) | (CA_stats["density"] > 1400) | (CA_stats["high_earners"] >500000)))]

### How many stores did you identify?

In [40]:
starbucks_gdf = gpd.GeoDataFrame(starbucks, geometry=gpd.points_from_xy(starbucks.Longitude, starbucks.Latitude))
starbucks_gdf.crs = {'init': 'epsg:4326'}

locations_of_interest= gpd.sjoin(starbucks_gdf, sel_counties)
num_stores = len(locations_of_interest)
print(num_stores)

1043


### Visualize the store locations.

In [49]:
map = folium.Map(location=[37,-120], tiles='openstreetmap', zoom_start=6)
showMap(locations_of_interest)