# Emission Factor Analysis

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import folium

# Analyze Data

## Power Plant Data

In [2]:
gppd = pd.read_csv('../input/ds4g-environmental-insights-explorer/eie_data/gppd/gppd_120_pr.csv')

In [3]:
gppd.head()

Unnamed: 0,system:index,capacity_mw,commissioning_year,country,country_long,estimated_generation_gwh,generation_gwh_2013,generation_gwh_2014,generation_gwh_2015,generation_gwh_2016,...,other_fuel1,other_fuel2,other_fuel3,owner,primary_fuel,source,url,wepp_id,year_of_capacity_data,.geo
0,0000000000000000315a,15.0,1942.0,USA,United States of America,685.397712,0.0,0.0,0.0,0.0,...,,,,PREPA,Hydro,CEPR,http://energia.pr.gov/datos/plantas/,30936.0,0,"{""type"":""Point"",""coordinates"":[-66.66629863706..."
1,000000000000000026e5,1492.0,1975.0,USA,United States of America,8334.010812,0.0,0.0,0.0,0.0,...,,,,PREPA,Oil,PREPA,http://www.prepa.com/aguirre.asp,30928.0,0,"{""type"":""Point"",""coordinates"":[-66.23079607357..."
2,00000000000000002fda,990.0,1962.0,USA,United States of America,5529.94015,0.0,0.0,0.0,0.0,...,,,,PREPA,Oil,PREPA,http://www.prepa.com/costasur.asp,30934.0,0,"{""type"":""Point"",""coordinates"":[-66.75340271704..."
3,00000000000000003f76,602.0,1960.0,USA,United States of America,3362.650475,0.0,0.0,0.0,0.0,...,,,,PREPA,Oil,PREPA,http://www.prepa.com/PALOSECO.ASP,30942.0,0,"{""type"":""Point"",""coordinates"":[-66.14859700475..."
4,00000000000000002def,10.0,1915.0,USA,United States of America,456.931808,0.0,0.0,0.0,0.0,...,,,,PREPA,Hydro,PREPA,http://www.prepa.com/historia_eng.asp https:/...,65289.0,0,"{""type"":""Point"",""coordinates"":[-66.10600367149..."


In [4]:
'''
Source:
https://www.kaggle.com/paultimothymooney/overview-of-the-eie-analytics-challenge
'''
def split_column_into_new_columns(dataframe,column_to_split,new_column_one,begin_column_one,end_column_one):
    for i in range(0, len(dataframe)):
        dataframe.loc[i, new_column_one] = dataframe.loc[i, column_to_split][begin_column_one:end_column_one]
    return dataframe

In [5]:
gppd = split_column_into_new_columns(gppd,'.geo','latitude',50,66)
gppd = split_column_into_new_columns(gppd,'.geo','longitude',31,48)

gppd['latitude'] = gppd.apply(lambda row: row.latitude if float(row.latitude) > 10.0 else str(10.0 + float(row.latitude)), axis=1)

In [6]:
from shapely.geometry import Point

In [7]:
gppd['geometry'] = gppd.apply(lambda row: Point(float(row.longitude), float(row.latitude)), axis=1)

In [8]:
import geopandas as gpd

In [9]:
geo_df = gpd.GeoDataFrame(gppd, geometry='geometry')
geo_df.crs = {'init': 'epsg:4326'}
geo_df = geo_df.to_crs(epsg=3857)

In [10]:
import bokeh
from bokeh.models import ColumnDataSource, LinearColorMapper
from bokeh.plotting import figure, show, reset_output, output_notebook
from bokeh.palettes import brewer
from bokeh.models import HoverTool
from bokeh.models import GeoJSONDataSource
from bokeh.tile_providers import STAMEN_TERRAIN



In [11]:
ppd_geosource = GeoJSONDataSource(geojson=geo_df.to_json())

In [12]:
# Create figure object.
p1 = figure(title = 'Map of Power Plants', 
           plot_height = 500,
           plot_width = 800, 
           toolbar_location = 'below',
           tools = "pan, wheel_zoom, box_zoom, reset")
p1.xgrid.grid_line_color = None
p1.ygrid.grid_line_color = None
# Add patch renderer to figure.
# Define color palettes

p1.add_tile(STAMEN_TERRAIN)

palette = brewer['BuPu'][8]
palette = palette[::-1] # reverse order of colors so higher values have darker colors
# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 400000000)

powerplants_map = p1.circle('x', 'y', source=ppd_geosource, color='red', size=5,
                      name='powerplants_map')

# Create hover tool

p1.add_tools(HoverTool(renderers = [powerplants_map],
                      tooltips = [('Name', '@name'),
                          ('Generation','@estimated_generation_gwh'),
                                ('Capacity','@capacity_mw')]))

try:
    reset_output()
    output_notebook()
    
    show(p1)  # angrily yells at me about single ownership
except:
    output_notebook()
    show(p1)  # ...aaaaaaand we're back to happy inline plotting.

Plot plant locations with Folium:

In [13]:
lat=18.200178
lon=-66.664513
plant_map = folium.Map(location=[lat, lon], zoom_start=9)
for point in range(0, len(gppd)):
    folium.Marker((gppd.iloc[point]['latitude'], gppd.iloc[point]['longitude']), popup=gppd.iloc[point]['name']).add_to(plant_map)
plant_map