In [1]:
import folium as fm
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, Polygon
from fiona.drvsupport import supported_drivers
import panel as pn
import re
from fastkml.kml import KML
import matplotlib.pyplot as plt
pn.extension(sizing_mode="stretch_width")

In [2]:
# If required

#import os
#os.getcwd()
#os.chdir()

In [3]:
supported_drivers['KML'] = 'rw'
project_zips = gpd.read_file('./Geographies/DC_MD_VA_Zipcodes.kml', driver='KML')

In [4]:
# Checkpoint, it should be 1432 entries
project_zips.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1432 entries, 0 to 1431
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   Name         1432 non-null   object  
 1   Description  1432 non-null   object  
 2   geometry     1432 non-null   geometry
dtypes: geometry(1), object(2)
memory usage: 33.7+ KB


In [5]:
def find_zip_code(cell):
    match = re.match(r'<at><openparen>([0-9]+)<closeparen>', cell)
    if match:
        return match.group(1)
zips = list()
for _,r in project_zips.iterrows():
    zips.append(find_zip_code(r['Name']))
project_zips['Zip Code'] = zips
display(project_zips.head())

Unnamed: 0,Name,Description,geometry,Zip Code
0,<at><openparen>20001<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.02758 38.90964 0.00000, -77.02...",20001
1,<at><openparen>20002<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.01217 38.89209 0.00000, -77.01...",20002
2,<at><openparen>20003<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.01402 38.88236 0.00000, -77.01...",20003
3,<at><openparen>20004<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.03365 38.89735 0.00000, -77.03...",20004
4,<at><openparen>20005<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.03654 38.90252 0.00000, -77.03...",20005


In [6]:
NCRbreweries = pd.read_csv('NCRbreweries.csv')

In [7]:
# Check to ensure it pulls in correctly, should be 379 entries
NCRbreweries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 379 entries, 0 to 378
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       379 non-null    int64  
 1   obdb_id          379 non-null    object 
 2   name             379 non-null    object 
 3   brewery_type     379 non-null    object 
 4   street           324 non-null    object 
 5   address_2        0 non-null      float64
 6   address_3        0 non-null      float64
 7   city             379 non-null    object 
 8   state            379 non-null    object 
 9   county_province  0 non-null      float64
 10  postal_code      379 non-null    object 
 11  website_url      318 non-null    object 
 12  phone            337 non-null    float64
 13  country          379 non-null    object 
 14  longitude        193 non-null    float64
 15  latitude         193 non-null    float64
 16  tags             0 non-null      float64
dtypes: float64(7), i

In [8]:
# Need to change column name in NCR breweries in order to merge the datasets
NCRbreweries = NCRbreweries.rename(columns={'postal_code' : 'Zip Code'})

In [9]:
NCRbreweries

Unnamed: 0.1,Unnamed: 0,obdb_id,name,brewery_type,street,address_2,address_3,city,state,county_province,Zip Code,website_url,phone,country,longitude,latitude,tags
0,1,1623-brewing-co-llc-westminister,"1623 Brewing CO, llc",contract,1146 colonel Joshua Ct,,,Westminister,Maryland,,21157,,,United States,,,
1,2,1781-brewing-company-spotsylvania,1781 Brewing Company,micro,11109 Plank Rd,,,Spotsylvania,Virginia,,22553-4258,,5.408413e+09,United States,,,
2,3,2-silos-brewing-company-manassas,2 Silos Brewing Company,micro,9925 Discovery Blvd,,,Manassas,Virginia,,20109,http://www.2silosbrewing.com,7.034202e+09,United States,,,
3,4,2-witches-winery-and-brewing-company-danville,2 Witches Winery and Brewing Company,micro,209 Trade St,,,Danville,Virginia,,24541-3545,http://www.2witcheswinebrew.com,4.345493e+09,United States,-79.421256,36.585903,
4,5,3-stars-brewing-co-washington,3 Stars Brewing Co,micro,6400 Chillum Pl NW Ste B,,,Washington,District of Columbia,,20012-2111,http://www.3starsbrewing.com,2.026700e+09,United States,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374,375,wood-ridge-farm-brewery-lovingston,Wood Ridge Farm Brewery,micro,151 Old Ridge Rd,,,Lovingston,Virginia,,22949-2550,,4.344226e+09,United States,,,
375,376,woodbine-farm-brewery-manassas,Woodbine Farm Brewery,planning,,,,Manassas,Virginia,,20112,http://Woodbinefarmandbrewery.com,5.712057e+09,United States,,,
376,377,woodstock-brewhouse-woodstock,Woodstock Brewhouse,brewpub,123 E Court St,,,Woodstock,Virginia,,22664-1761,http://www.woodstockbrewhouse.com,5.404593e+09,United States,-78.504142,38.881426,
377,378,wort-hog-brewing-company-llc-warrenton,Wort Hog Brewing Company LLC,brewpub,50A S 3rd St,,,Warrenton,Virginia,,20186-3340,http://www.worthogbreweryllc.com,5.403003e+09,United States,-77.795138,38.711795,


In [10]:
combinedrecreation_df = pd.merge(project_zips, NCRbreweries, on = 'Zip Code', how = 'left')

In [11]:
combinedrecreation_df.head()

Unnamed: 0.1,Name,Description,geometry,Zip Code,Unnamed: 0,obdb_id,name,brewery_type,street,address_2,address_3,city,state,county_province,website_url,phone,country,longitude,latitude,tags
0,<at><openparen>20001<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.02758 38.90964 0.00000, -77.02...",20001,,,,,,,,,,,,,,,,
1,<at><openparen>20002<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.01217 38.89209 0.00000, -77.01...",20002,,,,,,,,,,,,,,,,
2,<at><openparen>20003<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.01402 38.88236 0.00000, -77.01...",20003,,,,,,,,,,,,,,,,
3,<at><openparen>20004<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.03365 38.89735 0.00000, -77.03...",20004,,,,,,,,,,,,,,,,
4,<at><openparen>20005<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.03654 38.90252 0.00000, -77.03...",20005,,,,,,,,,,,,,,,,


In [12]:
# Checkpoint: merged dataset should have 1437 entries

combinedrecreation_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1437 entries, 0 to 1436
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   Name             1437 non-null   object  
 1   Description      1437 non-null   object  
 2   geometry         1437 non-null   geometry
 3   Zip Code         1437 non-null   object  
 4   Unnamed: 0       58 non-null     float64 
 5   obdb_id          58 non-null     object  
 6   name             58 non-null     object  
 7   brewery_type     58 non-null     object  
 8   street           43 non-null     object  
 9   address_2        0 non-null      float64 
 10  address_3        0 non-null      float64 
 11  city             58 non-null     object  
 12  state            58 non-null     object  
 13  county_province  0 non-null      float64 
 14  website_url      43 non-null     object  
 15  phone            46 non-null     float64 
 16  country          58 non-null     o

In [13]:
nonnullcombinedrecreation_df = combinedrecreation_df[combinedrecreation_df['name'].notnull()]

In [14]:
nonnullcombinedrecreation_df.head()

Unnamed: 0.1,Name,Description,geometry,Zip Code,Unnamed: 0,obdb_id,name,brewery_type,street,address_2,address_3,city,state,county_province,website_url,phone,country,longitude,latitude,tags
80,<at><openparen>20629<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-76.46425 38.34189 0.00000, -76.46...",20629,293.0,ruddy-duck-brewery-and-grill-dowell,Ruddy Duck Brewery and Grill,brewpub,13200 Dowell Rd,,,Dowell,Maryland,,http://www.ruddyduckbrewery.com,4103944000.0,United States,-76.45795,38.350501,
86,<at><openparen>20639<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-76.68553 38.63222 0.00000, -76.68...",20639,171.0,gypsy-brewing-company-huntingtown,Gypsy Brewing Company,micro,,,,Huntingtown,Maryland,,,,United States,,,
137,<at><openparen>20736<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-76.70067 38.67013 0.00000, -76.70...",20736,296.0,scorpion-brewing-owings,Scorpion Brewing,micro,"929 Skinners Turn Rd, Suite 100",,,Owings,Maryland,,http://www.scorpionbrewing.com,7033072000.0,United States,,,
161,<at><openparen>20776<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-76.69655 38.84716 0.00000, -76.69...",20776,188.0,hopscratch-farm-and-brewery-harwood,HopScratch Farm & Brewery,planning,,,,Harwood,Maryland,,,,United States,,,
199,<at><openparen>20877<closeparen>,<center><table><tr><th colspan='2' align='cent...,"POLYGON Z ((-77.21685 39.14884 0.00000, -77.21...",20877,164.0,greene-growlers-gaithersburg,Greene Growlers,brewpub,227 E Diamond Ave,,,Gaithersburg,Maryland,,http://www.greenegrowlers.com,2402616000.0,United States,-77.193556,39.141936,


In [15]:
nonnullcombinedrecreation_df.to_csv('recreationdata.csv')