## Imports

In [147]:
import pandas as pd
import numpy as np
import folium
import requests
import geocoder
import math

## Data

In [2]:
beers = 'raw_data/beers.csv'
breweries = 'raw_data/breweries.csv'

df_beers = pd.read_csv(beers)
df_breweries = pd.read_csv(breweries)

In [3]:
# Dropping unnecesary column
df_beers = df_beers[['abv', 'ibu', 'id', 'name', 'style', 'brewery_id', 'ounces']]

# Changing columns name
df_beers = df_beers.rename(columns={'id':'beer_id', 'name': 'beer_name'})
df_breweries = df_breweries.rename(columns={'Unnamed: 0':'brewery_id', 'name': 'brewery_name'})

In [4]:
# Displaying data frames
df_beers.head()

Unnamed: 0,abv,ibu,beer_id,beer_name,style,brewery_id,ounces
0,0.05,,1436,Pub Beer,American Pale Lager,408,12.0
1,0.066,,2265,Devil's Cup,American Pale Ale (APA),177,12.0
2,0.071,,2264,Rise of the Phoenix,American IPA,177,12.0
3,0.09,,2263,Sinister,American Double / Imperial IPA,177,12.0
4,0.075,,2262,Sex and Candy,American IPA,177,12.0


In [5]:
df_breweries.head()

Unnamed: 0,brewery_id,brewery_name,city,state
0,0,NorthGate Brewing,Minneapolis,MN
1,1,Against the Grain Brewery,Louisville,KY
2,2,Jack's Abby Craft Lagers,Framingham,MA
3,3,Mike Hess Brewing Company,San Diego,CA
4,4,Fort Point Beer Company,San Francisco,CA


### Getting latitude and longitude location of breweries

In [85]:
def get_lat_lng(lat_lng, brewery, city, state):
    if lat_lng:
        return lat_lng
    
    url = 'https://maps.googleapis.com/maps/api/geocode/json'
    address = ', '.join([brewery, city, state]) + ', USA'
    params = {'sensor': 'false', 'address': address}
    r = requests.get(url, params=params)
    results = r.json()['results']
    if results:
        location = results[0]['geometry']['location']
        return location['lat'], location['lng']
    
    return np.nan

def get_lat_lng2(lat_lng, brewery, city, state):
    if lat_lng:
        return lat_lng
    
    try:
        address = ', '.join([brewery, city, state]) + ', USA'
        g = geocoder.google(address)
        return g.json['lat'], g.json['lng']
    
    except:
        return np.nan
    
    

In [88]:
# Getting lat_lng using normal request
df_breweries['lat_lng'] = df_breweries.apply(lambda row: get_lat_lng(row['lat_lng'], str(row['brewery_name']), str(row['city']), str(row['state'])), axis=1)

In [91]:
df_breweries['lat_lng'] = df_breweries.apply(lambda row: get_lat_lng2(row['lat_lng'], str(row['brewery_name']), str(row['city']), str(row['state'])), axis=1)

In [169]:
# Counting None
df_breweries[['lat_lng']].isnull().sum()

lat_lng    0
dtype: int64

#### Cleaning data (removing None rows)

In [163]:
def get_latitude(lat_lng):
    if type(lat_lng) == tuple:
        return lat_lng[0]
    else:
        return np.nan
    
def get_longitude(lat_lng):
    if type(lat_lng) == tuple:
        return lat_lng[1]
    else:
        return np.nan
    

In [164]:
df_breweries['latitude'] = df_breweries.apply(lambda row: get_latitude(row['lat_lng']), axis=1)
df_breweries['longitude'] = df_breweries.apply(lambda row: get_longitude(row['lat_lng']), axis=1)

In [167]:
df_breweries = df_breweries[~np.isnan(df_breweries['latitude'])]

In [173]:
assert df_breweries['lat_lng'].isnull().sum() == 0

In [174]:
# Writing into CSV file with latitude and longitude info
df_breweries.to_csv('clean_data/breweries_latlng.csv')

### Merging beers DF with breweries DF using 'brewery_id'

In [172]:
# Merging data frames
df_beers_breweries = df_beers.merge(df_breweries, left_on='brewery_id', right_on='brewery_id')

In [173]:
df_beers_breweries.head()

Unnamed: 0,abv,ibu,beer_id,beer_name,style,brewery_id,ounces,brewery_name,city,state
0,0.05,,1436,Pub Beer,American Pale Lager,408,12.0,10 Barrel Brewing Company,Bend,OR
1,0.066,,2265,Devil's Cup,American Pale Ale (APA),177,12.0,18th Street Brewery,Gary,IN
2,0.071,,2264,Rise of the Phoenix,American IPA,177,12.0,18th Street Brewery,Gary,IN
3,0.09,,2263,Sinister,American Double / Imperial IPA,177,12.0,18th Street Brewery,Gary,IN
4,0.075,,2262,Sex and Candy,American IPA,177,12.0,18th Street Brewery,Gary,IN


In [174]:
# Testing data is not missing
assert len(df_beers_breweries) == len(df_beers)

### Counting breweries and beers by state

In [175]:
# Counting breweries by state
df_breweries_by_state = df_breweries.groupby(df_breweries['state'].str.strip())[['brewery_name']].nunique(dropna=False)

# Resetting index so that 'state' is no longer index.
df_breweries_by_state.reset_index(level=0, inplace=True)

In [176]:
df_breweries_by_state.head()

Unnamed: 0,state,brewery_name
0,AK,7
1,AL,3
2,AR,2
3,AZ,11
4,CA,39


In [177]:
df_beers_by_state.head()

Unnamed: 0,state,beer_name
0,AK,25
1,AL,10
2,AR,5
3,AZ,46
4,CA,173


## Maps & Visualization

### Breweries location (Markers)

In [323]:
brewery_location_map = folium.Map(location=[48, -102], zoom_start=3, tiles='Stamen Terrain')

In [325]:
for index, row in df_breweries.iterrows():
    lat = row['latitude']
    lng = row['longitude']
    name = 'brewery name'#str(row['brewery_name'])
    folium.Marker(
        [lat, lng], 
        popup=name
    ).add_to(brewery_location_map)

In [326]:
brewery_location_map

### Brewery by state (Colorpleth)

In [178]:
state_geo = r'data/us-states.json'

brewery_state_map = folium.Map(location=[48, -102], zoom_start=3)
brewery_state_map.choropleth(
    geo_data=state_geo,
    data=df_breweries_by_state,
    key_on='feature.id',
    columns=['state', 'brewery_name'],
    fill_color='YlGn'
    )

In [179]:
brewery_map

## Resources

* US States: https://github.com/python-visualization/folium/blob/master/examples/data/us-states.json