In [19]:
import pandas as pd
from sqlalchemy import create_engine
import pymongo

### EXTRACT

In [20]:
# DATA FILES
data_shootings_path = "data/data_shootings.csv"
data_shootings_locations_path = "data/data_shootings_locations.csv"

# READ
shootings_df = pd.read_csv(data_shootings_path)
locations_df = pd.read_csv(data_shootings_locations_path)

In [21]:
shootings_df.head(3)

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False


In [22]:
locations_df.head(3)

Unnamed: 0,city,lon,lat
0,Los Angeles CA,-118.244476,34.054935
1,Phoenix AZ,-112.077346,33.448587
2,Houston TX,-95.367697,29.758938


### TRANSFORM

In [24]:
# CLEAN LOCATIONS DF.

# REMOVE STATE FROM CITY COLUMN.
locations_df_2 = locations_df.city.str.rsplit(" ", 1, expand=True).rename(lambda x: f'col{x + 1}', axis=1)
locations_df_2

# RENAME COLUMNS
locations_df_2  = locations_df_2.rename(columns={'col1':'City'})
locations_df_2 = locations_df_2.rename(columns={'col2':'State'})
locations_df_2

Unnamed: 0,City,State
0,Los Angeles,CA
1,Phoenix,AZ
2,Houston,TX
3,Chicago,IL
4,Las Vegas,NV
...,...,...
1999,Gainesville,GA
2000,Martins Ferry,OH
2001,Soddy-Daisy,TN
2002,Kearny,AZ


In [25]:
# MERGE BOTH DFs
merged_loc_df = locations_df_2.join(other=locations_df,how='left')

#DROP EXTRA CITY COLUMN
merged_loc_df = merged_loc_df.drop('city', 1)
merged_loc_df

Unnamed: 0,City,State,lon,lat
0,Los Angeles,CA,-118.244476,34.054935
1,Phoenix,AZ,-112.077346,33.448587
2,Houston,TX,-95.367697,29.758938
3,Chicago,IL,-87.624421,41.875555
4,Las Vegas,NV,-115.149225,36.166286
...,...,...,...,...
1999,Gainesville,GA,-83.824066,34.297879
2000,Martins Ferry,OH,-80.724526,40.095906
2001,Soddy-Daisy,TN,-85.190790,35.235903
2002,Kearny,AZ,-110.910666,33.057009


In [26]:
# MATCH CITY COLUMN NAME CASE FOR THE NEXT STEP
merged_loc_df  = merged_loc_df.rename(columns={'City':'city'})
merged_loc_df

Unnamed: 0,city,State,lon,lat
0,Los Angeles,CA,-118.244476,34.054935
1,Phoenix,AZ,-112.077346,33.448587
2,Houston,TX,-95.367697,29.758938
3,Chicago,IL,-87.624421,41.875555
4,Las Vegas,NV,-115.149225,36.166286
...,...,...,...,...
1999,Gainesville,GA,-83.824066,34.297879
2000,Martins Ferry,OH,-80.724526,40.095906
2001,Soddy-Daisy,TN,-85.190790,35.235903
2002,Kearny,AZ,-110.910666,33.057009


In [27]:
# MERGE SHOOTINGS DF WITH LOCATIONS DF
merge_df = pd.merge(merged_loc_df, shootings_df, on='city', how='outer')
merge_df.head(3)

Unnamed: 0,city,State,lon,lat,id,name,date,manner_of_death,armed,age,gender,race,state,signs_of_mental_illness,threat_level,flee,body_camera
0,Los Angeles,CA,-118.244476,34.054935,75.0,Pablo Meza,2015-01-17,shot,gun,24.0,M,H,CA,False,attack,Not fleeing,False
1,Los Angeles,CA,-118.244476,34.054935,194.0,Charly Leundeu Keunang,2015-03-01,shot and Tasered,unarmed,43.0,M,B,CA,True,attack,Not fleeing,True
2,Los Angeles,CA,-118.244476,34.054935,796.0,Aaron Valdez,2015-03-11,shot,,25.0,M,H,CA,False,other,Not fleeing,False


In [28]:
# DROP EXTRA STATE COLUMN
merge_df = merge_df.drop('state' , 1)
merge_df.head(3)

Unnamed: 0,city,State,lon,lat,id,name,date,manner_of_death,armed,age,gender,race,signs_of_mental_illness,threat_level,flee,body_camera
0,Los Angeles,CA,-118.244476,34.054935,75.0,Pablo Meza,2015-01-17,shot,gun,24.0,M,H,False,attack,Not fleeing,False
1,Los Angeles,CA,-118.244476,34.054935,194.0,Charly Leundeu Keunang,2015-03-01,shot and Tasered,unarmed,43.0,M,B,True,attack,Not fleeing,True
2,Los Angeles,CA,-118.244476,34.054935,796.0,Aaron Valdez,2015-03-11,shot,,25.0,M,H,False,other,Not fleeing,False


In [29]:
# CHECK FOR NULL VALUES
merge_df.isnull().sum()

city                         0
State                      723
lon                        723
lat                        723
id                           2
name                         2
date                         2
manner_of_death              2
armed                      312
age                        308
gender                       4
race                       698
signs_of_mental_illness      2
threat_level                 2
flee                       321
body_camera                  2
dtype: int64

In [30]:
# DROP NULL VALUES
merge_df = merge_df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)

# CHANGE STATE TO LOWER CASE
merge_df  = merge_df.rename(columns={'State':'state'})
merge_df

merge_df.head(3)

Unnamed: 0,city,state,lon,lat,id,name,date,manner_of_death,armed,age,gender,race,signs_of_mental_illness,threat_level,flee,body_camera
0,Los Angeles,CA,-118.244476,34.054935,75.0,Pablo Meza,2015-01-17,shot,gun,24.0,M,H,False,attack,Not fleeing,False
1,Los Angeles,CA,-118.244476,34.054935,194.0,Charly Leundeu Keunang,2015-03-01,shot and Tasered,unarmed,43.0,M,B,True,attack,Not fleeing,True
3,Los Angeles,CA,-118.244476,34.054935,331.0,Roberto Rodriguez,2015-04-08,shot,gun,39.0,M,H,False,attack,Foot,False


### LOAD TO POSTGRES

In [15]:
# CONNECT TO LOCAL DATABASE
rds_connection_string = "postgres:ComisarioRex21@localhost:5432/police_shootings_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [16]:
# CHECK FOR TABLES
engine.table_names()

[]

In [17]:
# USE PANDAS TO LOAD DF INTO SQL DB
merge_df.to_sql(name='merge_df', con=engine, if_exists='append', index=False)

In [18]:
# COMFIRM DATA HAS BEEN ADDED BY QUERYING THE TABLE
pd.read_sql_query('select * from merge_df', con=engine).head()

Unnamed: 0,city,state,lon,lat,id,name,date,manner_of_death,armed,age,gender,race,signs_of_mental_illness,threat_level,flee,body_camera
0,Los Angeles,CA,-118.244476,34.054935,75.0,Pablo Meza,2015-01-17,shot,gun,24.0,M,H,False,attack,Not fleeing,False
1,Los Angeles,CA,-118.244476,34.054935,194.0,Charly Leundeu Keunang,2015-03-01,shot and Tasered,unarmed,43.0,M,B,True,attack,Not fleeing,True
2,Los Angeles,CA,-118.244476,34.054935,331.0,Roberto Rodriguez,2015-04-08,shot,gun,39.0,M,H,False,attack,Foot,False
3,Los Angeles,CA,-118.244476,34.054935,495.0,Luis Martinez,2015-04-21,shot,knife,35.0,M,H,True,other,Not fleeing,False
4,Los Angeles,CA,-118.244476,34.054935,619.0,Jason Hendley,2015-07-06,shot,knife,29.0,M,B,False,attack,Not fleeing,False


### LOAD TO MONGO

In [31]:
# ESTABLISH CONNECTION
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [32]:
# DEFINE THE CLASS IN MONGO
db = client.police_shootings_db

In [35]:
# INSERT DF TO MONGO DB
client.police_shootings_db.police_shootings.insert_many(merge_df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x1171850a0>

In [37]:
# COMFIRM DATA HAS BEEN ADDED TO MONGO DB

police_shootings = db.police_shootings
for shootings in police_shootings:
    print(shootings)

{'_id': ObjectId('5edf01a9b6c40d5bb5f922f8'), 'city': 'Los Angeles', 'state': 'CA', 'lon': -118.244476, 'lat': 34.054935, 'id': 75.0, 'name': 'Pablo Meza', 'date': '2015-01-17', 'manner_of_death': 'shot', 'armed': 'gun', 'age': 24.0, 'gender': 'M', 'race': 'H', 'signs_of_mental_illness': False, 'threat_level': 'attack', 'flee': 'Not fleeing', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f922f9'), 'city': 'Los Angeles', 'state': 'CA', 'lon': -118.244476, 'lat': 34.054935, 'id': 194.0, 'name': 'Charly Leundeu Keunang', 'date': '2015-03-01', 'manner_of_death': 'shot and Tasered', 'armed': 'unarmed', 'age': 43.0, 'gender': 'M', 'race': 'B', 'signs_of_mental_illness': True, 'threat_level': 'attack', 'flee': 'Not fleeing', 'body_camera': True}
{'_id': ObjectId('5edf01a9b6c40d5bb5f922fa'), 'city': 'Los Angeles', 'state': 'CA', 'lon': -118.244476, 'lat': 34.054935, 'id': 331.0, 'name': 'Roberto Rodriguez', 'date': '2015-04-08', 'manner_of_death': 'shot', 'armed': 'gun', 'age': 39.

{'_id': ObjectId('5edf01a9b6c40d5bb5f9252c'), 'city': 'Miami', 'state': 'OK', 'lon': -94.877374, 'lat': 36.8746177, 'id': 1398.0, 'name': 'Ethan James Rincon', 'date': '2016-03-22', 'manner_of_death': 'shot', 'armed': 'pick-axe', 'age': 25.0, 'gender': 'M', 'race': 'H', 'signs_of_mental_illness': True, 'threat_level': 'other', 'flee': 'Not fleeing', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f9252d'), 'city': 'Miami', 'state': 'OK', 'lon': -94.877374, 'lat': 36.8746177, 'id': 1498.0, 'name': 'Kendar del Rosario', 'date': '2016-04-28', 'manner_of_death': 'shot', 'armed': 'knife', 'age': 37.0, 'gender': 'M', 'race': 'B', 'signs_of_mental_illness': False, 'threat_level': 'attack', 'flee': 'Not fleeing', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f9252e'), 'city': 'Miami', 'state': 'OK', 'lon': -94.877374, 'lat': 36.8746177, 'id': 1551.0, 'name': 'Kentrill William Carraway', 'date': '2016-05-19', 'manner_of_death': 'shot', 'armed': 'gun', 'age': 22.0, 'gender'

{'_id': ObjectId('5edf01a9b6c40d5bb5f92890'), 'city': 'Memphis', 'state': 'TN', 'lon': -90.0516285, 'lat': 35.1490215, 'id': 4780.0, 'name': 'Brandon Webber', 'date': '2019-06-12', 'manner_of_death': 'shot', 'armed': 'vehicle', 'age': 20.0, 'gender': 'M', 'race': 'B', 'signs_of_mental_illness': False, 'threat_level': 'attack', 'flee': 'Foot', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f92891'), 'city': 'Memphis', 'state': 'TN', 'lon': -90.0516285, 'lat': 35.1490215, 'id': 4914.0, 'name': 'James Lee Kirkwood', 'date': '2019-08-05', 'manner_of_death': 'shot', 'armed': 'knife', 'age': 49.0, 'gender': 'M', 'race': 'B', 'signs_of_mental_illness': False, 'threat_level': 'attack', 'flee': 'Foot', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f92892'), 'city': 'Memphis', 'state': 'TN', 'lon': -90.0516285, 'lat': 35.1490215, 'id': 5489.0, 'name': 'Willie Hudson', 'date': '2019-09-18', 'manner_of_death': 'shot', 'armed': 'gun', 'age': 33.0, 'gender': 'M', 'race': 'B', 

{'_id': ObjectId('5edf01a9b6c40d5bb5f92b02'), 'city': 'Atwater', 'state': 'CA', 'lon': -120.60908400000001, 'lat': 37.3477174, 'id': 3568.0, 'name': 'Timothy Breckenridge', 'date': '2018-03-30', 'manner_of_death': 'shot', 'armed': 'toy weapon', 'age': 42.0, 'gender': 'M', 'race': 'W', 'signs_of_mental_illness': False, 'threat_level': 'other', 'flee': 'Not fleeing', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f92b03'), 'city': 'Temecula', 'state': 'CA', 'lon': -117.14736609999999, 'lat': 33.4946353, 'id': 1693.0, 'name': 'Sam Newby', 'date': '2016-07-05', 'manner_of_death': 'shot', 'armed': 'undetermined', 'age': 49.0, 'gender': 'M', 'race': 'W', 'signs_of_mental_illness': False, 'threat_level': 'other', 'flee': 'Car', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f92b04'), 'city': 'East Point', 'state': 'GA', 'lon': -84.43937240000001, 'lat': 33.6795531, 'id': 1767.0, 'name': 'Jamarion Rashad Robinson', 'date': '2016-08-05', 'manner_of_death': 'shot', 'armed':

{'_id': ObjectId('5edf01a9b6c40d5bb5f92f8c'), 'city': 'Moreno Valley', 'state': 'CA', 'lon': -117.23059440000002, 'lat': 33.937517, 'id': 2780.0, 'name': 'Vaughn Shaw', 'date': '2017-07-15', 'manner_of_death': 'shot', 'armed': 'gun', 'age': 23.0, 'gender': 'M', 'race': 'B', 'signs_of_mental_illness': True, 'threat_level': 'other', 'flee': 'Not fleeing', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f92f8d'), 'city': 'St. Joseph', 'state': 'MO', 'lon': -94.8466322, 'lat': 39.7686055, 'id': 1711.0, 'name': 'Larry Darnell Gordon', 'date': '2016-07-11', 'manner_of_death': 'shot', 'armed': 'gun', 'age': 44.0, 'gender': 'M', 'race': 'W', 'signs_of_mental_illness': False, 'threat_level': 'attack', 'flee': 'Not fleeing', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f92f8e'), 'city': 'St. Joseph', 'state': 'MO', 'lon': -94.8466322, 'lat': 39.7686055, 'id': 2353.0, 'name': 'Jason Fanning', 'date': '2017-02-20', 'manner_of_death': 'shot', 'armed': 'unarmed', 'age': 27.0, 

{'_id': ObjectId('5edf01a9b6c40d5bb5f9322a'), 'city': 'Sparta', 'state': 'NC', 'lon': -81.1209189, 'lat': 36.5054071, 'id': 1462.0, 'name': 'Koltlee Whitson', 'date': '2016-04-13', 'manner_of_death': 'shot', 'armed': 'gun', 'age': 26.0, 'gender': 'M', 'race': 'W', 'signs_of_mental_illness': False, 'threat_level': 'attack', 'flee': 'Car', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f9322b'), 'city': 'Sparta', 'state': 'NC', 'lon': -81.1209189, 'lat': 36.5054071, 'id': 3280.0, 'name': 'Skyler Burnette', 'date': '2018-01-06', 'manner_of_death': 'shot', 'armed': 'knife', 'age': 21.0, 'gender': 'M', 'race': 'N', 'signs_of_mental_illness': False, 'threat_level': 'other', 'flee': 'Not fleeing', 'body_camera': False}
{'_id': ObjectId('5edf01a9b6c40d5bb5f9322c'), 'city': 'Sparta', 'state': 'NC', 'lon': -81.1209189, 'lat': 36.5054071, 'id': 3613.0, 'name': 'Chad Eric Montgomery', 'date': '2018-04-11', 'manner_of_death': 'shot', 'armed': 'machete', 'age': 39.0, 'gender': 'M', 'race'

In [None]:
# EXTRACT DF TO JSON FILE
merge_df.to_json("police_shootings.json")

In [None]:
# EXTRACT DF TO CSV FILE
merge_df.to_csv("police_shootings.csv")