# Data Exploration for Police Shooting Data

**[Synopsis] The following explores the US Cities dataset pertaining to the Police Shooting Dashboard**

Use the following URL:
* [US Cities Data](https://simplemaps.com/data/us-cities)

*****

In [1]:
import requests
import configparser
config = configparser.ConfigParser()
config.read('config.ini')

['config.ini']

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as func
from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType, BooleanType, FloatType

spark = SparkSession.builder.master('local[*]').appName('DataExploration').getOrCreate()

In [3]:
# Creating Dataframe and Temp View
usCitiesDF = spark.read.option('header', 'True').option('inferSchema', 'true').csv(config['pathways']['usCities'])
usCitiesDF.createOrReplaceTempView('usCities')

In [5]:
usCitiesDF.printSchema()

root
 |-- city: string (nullable = true)
 |-- city_ascii: string (nullable = true)
 |-- state_id: string (nullable = true)
 |-- state_name: string (nullable = true)
 |-- county_fips: integer (nullable = true)
 |-- county_name: string (nullable = true)
 |-- lat: double (nullable = true)
 |-- lng: double (nullable = true)
 |-- population: integer (nullable = true)
 |-- density: integer (nullable = true)
 |-- source: string (nullable = true)
 |-- military: boolean (nullable = true)
 |-- incorporated: boolean (nullable = true)
 |-- timezone: string (nullable = true)
 |-- ranking: integer (nullable = true)
 |-- zips: string (nullable = true)
 |-- id: integer (nullable = true)



### Data Preview

In [8]:
usCitiesDF.select('city', 'city_ascii', 'state_id', 'state_name', 'county_name', 'lat', 'lng', 'population').show()

+-------------+-------------+--------+--------------------+--------------------+-------+---------+----------+
|         city|   city_ascii|state_id|          state_name|         county_name|    lat|      lng|population|
+-------------+-------------+--------+--------------------+--------------------+-------+---------+----------+
|     New York|     New York|      NY|            New York|            New York|40.6943| -73.9249|  18713220|
|  Los Angeles|  Los Angeles|      CA|          California|         Los Angeles|34.1139|-118.4068|  12750807|
|      Chicago|      Chicago|      IL|            Illinois|                Cook|41.8373| -87.6862|   8604203|
|        Miami|        Miami|      FL|             Florida|          Miami-Dade|25.7839| -80.2102|   6445545|
|       Dallas|       Dallas|      TX|               Texas|              Dallas|32.7936| -96.7662|   5743938|
| Philadelphia| Philadelphia|      PA|        Pennsylvania|        Philadelphia|40.0077| -75.1339|   5649300|
|      Hou

In [10]:
usCitiesDF.select('density', 'source', 'military', 'incorporated', 'timezone', 'ranking', 'zips', 'id').show()

+-------+-------+--------+------------+-------------------+-------+--------------------+----------+
|density| source|military|incorporated|           timezone|ranking|                zips|        id|
+-------+-------+--------+------------+-------------------+-------+--------------------+----------+
|  10715|polygon|   false|        true|   America/New_York|      1|11229 11226 11225...|1840034016|
|   3276|polygon|   false|        true|America/Los_Angeles|      1|90291 90293 90292...|1840020491|
|   4574|polygon|   false|        true|    America/Chicago|      1|60018 60649 60641...|1840000494|
|   5019|polygon|   false|        true|   America/New_York|      1|33129 33125 33126...|1840015149|
|   1526|polygon|   false|        true|    America/Chicago|      1|75287 75098 75233...|1840019440|
|   4554|polygon|   false|        true|   America/New_York|      1|19154 19151 19150...|1840000673|
|   1399|polygon|   false|        true|    America/Chicago|      1|77069 77068 77061...|1840020925|


### Relevant Columns

In [12]:
# Relevant Columns, 
usCitiesDF.select('state_id', 'state_name', 'city', 'county_name').show()

+--------+--------------------+-------------+--------------------+
|state_id|          state_name|         city|         county_name|
+--------+--------------------+-------------+--------------------+
|      NY|            New York|     New York|            New York|
|      CA|          California|  Los Angeles|         Los Angeles|
|      IL|            Illinois|      Chicago|                Cook|
|      FL|             Florida|        Miami|          Miami-Dade|
|      TX|               Texas|       Dallas|              Dallas|
|      PA|        Pennsylvania| Philadelphia|        Philadelphia|
|      TX|               Texas|      Houston|              Harris|
|      GA|             Georgia|      Atlanta|              Fulton|
|      DC|District of Columbia|   Washington|District of Columbia|
|      MA|       Massachusetts|       Boston|             Suffolk|
|      AZ|             Arizona|      Phoenix|            Maricopa|
|      WA|          Washington|      Seattle|                K

In [15]:
spark.sql("""
    SELECT 
        state_id,
        state_name,
        county_name,
        city
    FROM usCities where state_id = 'WA' and city = 'Shelton'
""").show()

+--------+----------+-----------+-------+
|state_id|state_name|county_name|   city|
+--------+----------+-----------+-------+
|      WA|Washington|      Mason|Shelton|
+--------+----------+-----------+-------+

