## Set Up Spark Session

In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("test").getOrCreate()


24/11/15 03:17:19 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


## Authenticate arcgis api for python
Use https://ucr.maps.arcgis.com/ to sign in with ucr credentials so you have access to arcgis pro and api development permissions.

Then go to content > new item > developer credential > continue > then type in redirect url = urn:ietf:wg:oauth:2.0:oob and url you can use https://localhost. This will create the client_id I believe which is what I used below. Use your own client id in the next cell, and then when you run it it should send you a password link. Type the password link into the box.



In [2]:
from arcgis.gis import GIS
import arcgis
from arcgis.map import Map
print(arcgis.__version__)


# Authenticate GIS.
my_client_id = "Sry1p3Nb1sg3fciJ"
gis = GIS("https://ucr.maps.arcgis.com", client_id=my_client_id)

2.4.0
Please sign in to your GIS and paste the code that is obtained below.
If a web browser does not automatically open, please navigate to the URL below yourself instead.
Opening web browser to navigate to: https://ucr.maps.arcgis.com/sharing/rest/oauth2/authorize?response_type=code&client_id=Sry1p3Nb1sg3fciJ&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&state=pPSe2dS5zyy5Ge8y8XgYTHNlri2CQc&allow_verification=false


24/11/15 03:17:31 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors


Enter code obtained on signing in using SAML:  ········


## Access featurecollection of riverside reporting districts

Using map from owner:"KPrakah@riversideca.gov_CityOfRiverside"


In [3]:
# Access a web map of riverside reporting districts (RDs)
item = gis.content.get("4855e57db5d3430cb94ddd32688ab7b7")
feature_collection = item.layers[0]

In [4]:
# Update Esri Requirement
feature_collection.properties.layerDefinition.htmlPopupType = 'esriServerHTMLPopupTypeAsHTMLText'
# Display what a feature looks like. This gets a featureSet from a featureCollection, then displays
# the first feature in the feature set.
feature_collection.query().features[0]

{"geometry": {"rings": [[[-13059322, 4013797], [-13059306, 4013790], [-13059307, 4013320], [-13059241, 4013320], [-13059241, 4013420], [-13059108, 4013418], [-13059090, 4013415], [-13059128, 4013345], [-13059039, 4013291], [-13059101, 4013120], [-13059306, 4013120], [-13059307, 4011761], [-13061269, 4011727], [-13061268, 4013666], [-13061036, 4013698], [-13060068, 4013920], [-13059782, 4013912], [-13059322, 4013797]]], "spatialReference": {"wkid": 102100, "latestWkid": 3857}}, "attributes": {"FID": 0, "OBJECTID": 1, "NPC": "EAST", "NAME": "K03"}}

## Query Riverside_Crime_reports using Spark by Reporting District

In [5]:

from pyspark.sql.functions import col


df = spark.read.csv("Riverside_Crime_Reports.csv", header=True, inferSchema=True)
df.columns
# Filter rows where crimeType is 'ROBBERY' and group by rpc column, counting the occurrences
result = (df.select("rd", "crimeType")
          .groupBy("rd")
          .count()
          .select("rd", "Count")
          .withColumnRenamed("rd", "NAME"))

# Show the result
result.show()
pd_crime = result.toPandas()

+----+-----+
|NAME|Count|
+----+-----+
| B05|  362|
| Z21|   25|
| E02|  983|
| G12|  367|
| 17A|  288|
| B01|  416|
| I01|  385|
| K03|  624|
| I06|  488|
| D16|  429|
| F01| 1060|
| I04| 1268|
| I14|  162|
| F04|  145|
| B06|  314|
| I03|  850|
|   3|    5|
| H14| 1800|
| A01| 2008|
| I02|  804|
+----+-----+
only showing top 20 rows



## Merge these counts into the FeatureCollection

In [6]:
print("before:")
feature_collection.query().features[0]

before:


{"geometry": {"rings": [[[-13059322, 4013797], [-13059306, 4013790], [-13059307, 4013320], [-13059241, 4013320], [-13059241, 4013420], [-13059108, 4013418], [-13059090, 4013415], [-13059128, 4013345], [-13059039, 4013291], [-13059101, 4013120], [-13059306, 4013120], [-13059307, 4011761], [-13061269, 4011727], [-13061268, 4013666], [-13061036, 4013698], [-13060068, 4013920], [-13059782, 4013912], [-13059322, 4013797]]], "spatialReference": {"wkid": 102100, "latestWkid": 3857}}, "attributes": {"FID": 0, "OBJECTID": 1, "NPC": "EAST", "NAME": "K03"}}

In [7]:
fset = feature_collection.query()

In [8]:
import pandas as pd
from arcgis.features import FeatureSet

#convert featureset to dataframe, merge the dataframes, then convert back to a featureset
fset_df = fset.sdf
merged_df = pd.merge(pd_crime, fset_df, on="NAME") 
fset2 = FeatureSet.from_dataframe(merged_df)

# create new featurecollection from the featureset
from arcgis.features import FeatureCollection
feature_collection2 = FeatureCollection.from_featureset(fset2)

In [9]:
print("after:")
feature_collection2.query().features[0]

after:


{"geometry": {"rings": [[[-13058817, 4026512], [-13058816, 4026065], [-13058771, 4026002], [-13058753, 4025929], [-13058765, 4025784], [-13058754, 4025637], [-13058787, 4025537], [-13059786, 4025535], [-13059788, 4026032], [-13060099, 4026103], [-13060299, 4026191], [-13060467, 4026309], [-13060658, 4026518], [-13058817, 4026512]]], "spatialReference": {"wkid": 102100, "latestWkid": 3857}}, "attributes": {"NAME": "B05", "Count": 362, "FID": 113, "OBJECTID": 114, "NPC": "EAST"}}

## Create a map to display the data. You will be able to click on each reporting district to see statistics.

In [10]:
# Create the map using our featureCollection
my_map = Map()
my_map.basemap.basemap =  "arcgis-streets"
my_map.content.add(feature_collection2)

In [11]:
# Update the information that will be shown when clicking on a reporting district

from arcgis.map.popups import FieldInfo, PopupExpressionInfo
expression_infos = [
    PopupExpressionInfo(
        title="Name",
        expression="return $feature.NAME;"
    )
]

field_infos = [
    FieldInfo(
        fieldName="NAME",
        label="Reporting District",
        visible=True
    ),
    FieldInfo(
        fieldName="Count",
        label="Number of Police Reports",
        visible=True
    )
]
my_map.content.popup(0).edit(title="HELLO", expression_infos=expression_infos, field_infos=field_infos)

True

In [12]:
# Update starting field of view, and display the map
my_map.extent = {'spatialReference': {'wkid': 102100}, 'xmin': -13087454.153546248, 'ymin': 4009445.945260928, 'xmax': -13044725.85473489, 'ymax': 4032377.0537464498}
# Display map
my_map

Map(extent={'spatialReference': {'wkid': 102100}, 'xmin': -13087454.153546248, 'ymin': 4009445.945260928, 'xma…