In [42]:
import geopandas as gp
import pandas as pd
from geopandas import GeoDataFrame
from bokeh.plotting import save
import fiona
from shapely.geometry import Point
import gdal
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Circle, ColorBar, CategoricalColorMapper, LinearColorMapper
from bokeh.plotting import figure, show, ColumnDataSource, output_file
from bokeh.palettes import Category10

from geopandas import GeoDataFrame as gdf


output_notebook()

Goal of Notebook:  First approach to getting data read into a map projection by way of Geopandas.  Hover tool tips are active and will show all data on an institution.  Only institutions that are represented on the 2016-17 U.S. Department of Education Institute of Education Sciences Institution list.  See Joining HERD.ipynb in the repo for list of ranks that fell out due to not having an institution ID.  

Current Abbreviations used:

dp is shorthand for the data points. Data labeled thusly comes from the a descendent of the Merged.csv dataset that was created by the Joining HERD notebook.

gdp is a shorhand for a geodataframed version of the data points.

df is shorthand for data frame, currently all df variables are unused, but they originally held the shapefiles for the US/world (df/ df2).  Descendents of these were used to test commands.  

The Good: Hover tools work well.  Show all data on cursor hover.

The Relevant: pandas data frame 'dp' or data points, and its geopandas Geo Data Frame 'gdp' are used in the current projection.

The Bad: Projection woes are here.  gdalwarp may be a functional saviour (11_18).  Aesthetically this map is ugly.

The Irrelevant:  data frames 'df', 'df2' are currently unused in the map.  They contain different shapefiles that may or may not be useful for our purposes.  

The Possibilities: Refocus only on mainland United States for now (sorry Hawaii and Alaska).  Split entries by state and project only each state's Universities.  Backdrop still outstanding.  

In [8]:
###read in data files
#df = gp.read_file("C:/Users/cgdeer/Downloads/cb_2017_us_nation_5m/cb_2017_us_nation_5m.dbf")
dp = pd.read_csv('~/HERD_Analysis/HERD16ShortWState.csv')
#df2= gp.read_file(gp.datasets.get_path('naturalearth_lowres'))

In [9]:
###check headers, remove ';' to run result
#df.head;
dp.head;
#df2.head;
#df4.head;
type(dp)

pandas.core.frame.DataFrame

Reminder: review CRS for any shapefiles and the sources of lat,lon.  


In [11]:
### Converting a Pandas data frame to a Geopandas GeoDataFrame

geometry= [Point(xy)for xy in zip(dp['LON'], dp['LAT'])]
#dp = dp.drop(['LON', 'LAT'], axis=1)
crs = {'init': 'epsg:4269'}
gdp = GeoDataFrame(dp, crs=crs, geometry=geometry)
type(gdp);
GeoDataFrame.to_file(gdp, "gdp")



In [12]:
gdp.head(2)


Unnamed: 0,UNITID,Rank,Institution,TOTAL_Federal,DOD,DOE,HHS,NASA,NSF,USDA,Other,STATE,LAT,LON,geometry
0,162928,1,Johns Hopkins U.b,2104653,1037685,9125,618525,252470,37379,537,148932,MD,39.33,-76.62,POINT (-76.62000000000001 39.33)
1,236948,2,"U. Washington, Seattle",946787,85472,30932,612050,16815,113679,15404,72435,WA,47.66,-122.31,POINT (-122.31 47.66)


If we are to be plotting in Bokeh, we need to have x and y values.  Since to_crs is only working on the geometry and not the lat, lon we need to read out the Point(x,y) values we set up earlier into their own columns.

The next two lines were provided by https://automating-gis-processes.github.io/2016/Lesson5-interactive-map-bokeh.html


In [43]:
def getPointCoords(row,geom,coord_type):
    if coord_type=='x':
        return row[geom].x
    elif coord_type=='y':
        return row[geom].y


In [44]:
gdp['x']= gdp.apply(getPointCoords, geom='geometry', coord_type='x', axis=1)
gdp['y']= gdp.apply(getPointCoords, geom='geometry', coord_type='y', axis=1)
gdp.head(2)

Unnamed: 0,UNITID,Rank,Institution,TOTAL_Federal,DOD,DOE,HHS,NASA,NSF,USDA,Other,STATE,LAT,LON,geometry,x,y
0,162928,1,Johns Hopkins U.b,2104653,1037685,9125,618525,252470,37379,537,148932,MD,39.33,-76.62,POINT (1643848.568113441 1980093.375177886),1643849.0,1980093.0
1,236948,2,"U. Washington, Seattle",946787,85472,30932,612050,16815,113679,15404,72435,WA,47.66,-122.31,POINT (-1964758.151342937 3014368.153047035),-1964758.0,3014368.0


In [45]:
###trying to follow example from https://bokeh.pydata.org/en/latest/docs/gallery/texas.html
#failed
###trying to follow example from http://www.bigendiandata.com/2017-06-27-Mapping_in_Jupyter/
#without the GMap stuff
color_mapper= LinearColorMapper(palette='Viridis6')
source = ColumnDataSource(
data=dict(
        lat = gdp.y.tolist(),
        lon = gdp.x.tolist(),
        rank= gdp.Rank.tolist(),
        institution= gdp.Institution.tolist(),
        state= gdp.STATE.tolist(),
        
        DOD= gdp.DOD.tolist(),
        DOE= gdp.DOE.tolist(),
        HHS= gdp.HHS.tolist(),
        NASA= gdp.NASA.tolist(),
        NSF= gdp.NSF.tolist(),
        USDA= gdp.USDA.tolist(),
        Other= gdp.Other.tolist()
    ))
circle =Circle(x='lon',y='lat', size=5)

In [47]:
p = figure(plot_width=1080, plot_height=720, title="Points with Tooltips",
            toolbar_location='below', toolbar_sticky=False)
p.add_glyph(source, circle)
tools='pan, wheel_zoom, reset'
p.add_tools
hover = HoverTool()
hover.tooltips =[
    ('institution', '@institution'),
    ('lon ', '@lon'),
    ('lat ', '@lat'),
    ('rank', '@rank'),
    ('DOD ', '@DOD'),
    ('DOE ', '@DOE'),
    ('HHS', '@HHS'),
    ('NASA', '@NASA'),
    ('NSF ', '@NSF'),
    ('USDA', '@USDA'),
    ('Other', '@Other')
    ]
p.tools.append(hover)        
         
show(p)