In [None]:
%%html
<style>
.container {width: 99% !important; }
#logo
{
    float: left;
}
#names
{
    float: right;
    
}
#logo2
{
    float: right;
    height=100;
    width=100;
    margin: 5px;
}
#top_menu
{
    margin: 20px auto 0;
    
    height:300px;
    width: 400px;
}
</style>




<div id="logo"><img src="./img/ScienceOfWhere2.PNG"/></div>

<div id="top_menu">
    <h2>ArcGIS API for Python and Data Scientists</h2>
    <p/> By: Andrew Chapkowski and Rohit Singh
</div>


<center><img src="./img/slide2_header.jpg" width=1000 height=600/></center>
</br>



## What is a Data Scientist?



    It's a person employed to analyze and interpret complex digital data, such as the usage statistics of a website, especially in order to assist a business in its decision-making.

# Getting Started with GIS

In [None]:
from arcgis.gis import GIS

In [None]:
gis = GIS(username='geodevtest')

In [None]:
username = 'admin'
password = '<insert_password_here>'

In [None]:
enterprise = GIS('https://achap.esri.com/portal', username, password, verify_cert=False)

# Finding and Locating Content

- ArcGIS Online has a wealth of information
- Simple search interface
- Two ways to get information from your GIS

    + By ID
    + By Searching



In [None]:
from IPython.display import display

In [None]:
items = gis.content.search("owner:esri AND World Cities", item_type="Feature Layer", outside_org=True)
for item in items:
    display(item)

In [None]:
item = gis.content.get("6996f03a1b364dbab4008d99380370ed")
item 

# Visualize Data on Maps

<img src="./img/02_MapWidget_Basemaps_03.gif" />

- matplotlib syntax
- easy to use

In [None]:
m = GIS().map('Indianapolis')
m

In [None]:
m.basemap = 'dark-gray-vector'
m.zoom = 6

In [None]:
m.add_layer(items[1].layers[0])
m.add_layer(items[0].layers[0])

# Spatial DataFrame 

<img src='./img/pandas_logo.png' width=490/>

   + Build on Pandas
   + Easy Query, Update operations
   + Ability to render and plot spatial data


# Features

- Spatial Indexing
- Consumes both local and web content

## Supports Open Source Packages

- Integrates with Fiona, pyshp, and ArcPy

# Demo: Mapping Flight Paths

<img src="./img/airplane-fact.jpg" width=500/>

In [None]:
import sqlite3
import pandas as pd
conn = sqlite3.connect("./data/flights.db")
df_airlines = pd.read_sql_query("SELECT * from airlines", conn)
df_airlines.head()

In [None]:
df_routes = pd.read_sql_query("select * from routes", conn)
df_routes.head()

In [None]:
df_apt = pd.read_sql_query("select * from airports", conn)
df_apt.head()

## Data Wrangling

- **Type Issues** latitude/longitude come back as strings

In [None]:
df_apt.latitude = df_apt.latitude.astype(float)
df_apt.longitude = df_apt.longitude.astype(float)

In [None]:
from arcgis.geometry import Point
sr = {'wkid' : 4326}
geoms = [Point({'x': i[2], 'y':i[1], 'spatialReference' : sr}) \
        for i in df_apt[['latitude', 'longitude']].to_records()]

In [None]:
from arcgis.features import SpatialDataFrame
sdf = SpatialDataFrame(data=df_apt, geometry=geoms, sr=sr)
m = GIS().map('Brazil')
m

In [None]:
sdf.plot(kind='map', 
         map_widget=m,
         marker_size=3, 
         pallette='Blues_r',
         symbol_style='o',
         outline_color='Reds_r',
         line_width=.5,
         cstep=25)

In [None]:
from arcgis.geometry import Polyline
import greatcircle, sys
import numpy as np

## Plotting the Plain Routes

- Get the source airport for each route
- Get the destination airport for each route
- Cast to floats
- Plot on a map

In [None]:
routes = pd.read_sql_query("""
                           select cast(sa.longitude as float) as source_lon, 
                           cast(sa.latitude as float) as source_lat,
                           cast(da.longitude as float) as dest_lon,
                           cast(da.latitude as float) as dest_lat
                           from routes 
                           inner join airports sa on
                           sa.id = routes.source_id
                           inner join airports da on
                           da.id = routes.dest_id;
                           """, 
                           conn)
routes.head()

In [None]:
gcs = []
recs = []
for name, row in routes[:3000].iterrows():
    if abs(row["source_lon"] - row["dest_lon"]) < 90:
        # Draw a great circle between source and dest airports.
        recs.append(row.tolist())
        gc = greatcircle.GreatCircle(lat1=row['source_lat'],
                               lat2=row['dest_lat'],
                               lon1=row['source_lon'],
                               lon2=row['dest_lon'])
        lat, long = gc.points(5000)
        gcs.append(
        Polyline({
            'paths' : [np.column_stack([list(lat), list(long)]).tolist()],
            'spatialReference' : {'wkid' : 4326}
        }))


In [None]:
routes_sdf = SpatialDataFrame(data=recs, geometry=gcs, columns=routes.columns, sr=4326)
routes_sdf.head()

In [None]:
m = GIS().map('Europe')
m

In [None]:
sdf.plot(kind='map', 
         map_widget=m,
         marker_size=3, 
         pallette='Blues_r',
         symbol_style='o',
         outline_color='Greens_r',
         line_width=.5,
         cstep=25)

In [None]:
routes_sdf.head(500).copy().plot(kind='map', 
                                 map_widget=m,
                                line_width=.5,
                                pallette='Reds_r',
                                cstep=50)

# Selecting Data

### Attribute Selection

In [75]:
df = item.layers[0].query().df
df[df['CNTRY_NAME'] == 'Brazil'].head()

Unnamed: 0,ADMIN_NAME,CITY_NAME,CNTRY_NAME,FID,FIPS_CNTRY,GMI_ADMIN,LABEL_FLAG,ObjectID,POP,POP_CLASS,POP_RANK,PORT_ID,STATUS,SHAPE
0,Mato Grosso,Cuiaba,Brazil,1,BR,BRA-MGR,0,0,521934,"500,000 to 999,999",3,0,Provincial capital,"{'x': -6244244.606196579, 'y': -1760180.180467..."
3,Distrito Federal,Brasilia,Brazil,4,BR,BRA-DFD,0,1,2207718,"1,000,000 to 4,999,999",2,0,National and provincial capital,"{'x': -5331952.879361972, 'y': -1780660.510942..."
4,Goias,Goiania,Brazil,5,BR,BRA-GOI,0,2,1171195,"1,000,000 to 4,999,999",2,0,Provincial capital,"{'x': -5483041.697485694, 'y': -1889069.697082..."
6,Mato Grosso do Sul,Campo Grande,Brazil,7,BR,BRA-MGD,0,3,729151,"500,000 to 999,999",3,0,Provincial capital,"{'x': -6079824.8259584475, 'y': -2326534.95185..."
78,Roraima,Boa Vista,Brazil,79,BR,BRA-ROR,0,535,235150,"100,000 to 249,999",5,0,Provincial capital,"{'x': -6753085.102171447, 'y': 313824.76209534..."


### Spatial Selection

- Perform spatial selection using extents

In [77]:
sq = [-7548797.6103562601, -3508692.7589624999, -3881376.5246212599, 313824.76209535002]
index = df.sindex.intersect(sq)
df.iloc[list(index)].head()

Unnamed: 0,ADMIN_NAME,CITY_NAME,CNTRY_NAME,FID,FIPS_CNTRY,GMI_ADMIN,LABEL_FLAG,ObjectID,POP,POP_CLASS,POP_RANK,PORT_ID,STATUS,SHAPE
128,Caaguazu,Coronel Oviedo,Paraguay,129,PA,PRY-CAA,0,127,51286,"50,000 to 99,999",6,0,Provincial capital,"{'x': -6281536.41337292, 'y': -2931488.7433651..."
129,Acre,Rio Branco,Brazil,130,BR,BRA-ACR,0,49,257642,"250,000 to 499,999",4,0,Provincial capital,"{'x': -7548797.610356256, 'y': -1116516.500888..."
130,Paraguari,Paraguari,Paraguay,131,PA,PRY-PRG,1,128,10079,"Less than 50,000",7,0,Provincial capital,"{'x': -6362020.065471231, 'y': -2952585.649088..."
0,Mato Grosso,Cuiaba,Brazil,1,BR,BRA-MGR,0,0,521934,"500,000 to 999,999",3,0,Provincial capital,"{'x': -6244244.606196579, 'y': -1760180.180467..."
4,Goias,Goiania,Brazil,5,BR,BRA-GOI,0,2,1171195,"1,000,000 to 4,999,999",2,0,Provincial capital,"{'x': -5483041.697485694, 'y': -1889069.697082..."


# Enriching Data

- Add variables to existing data

# Raster Analytics

- Rohit's Slides

# Geoprocessing


# Questions?


# Thank you

**Andrew Chapkowski**

**Rohit Singh**
