In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
from sodapy import Socrata
from app_token import app_token
from pgconnect import pgconnect

In [3]:
import json
import psycopg2

In [4]:
from sqlalchemy import create_engine

### POSTGRES CONNECTION

In [6]:
db = pgconnect['db']
host = pgconnect['host']
user = pgconnect['user']

In [7]:
# sqlalchemy engine for using .to_sql
url = f"postgres+psycopg2://{user}:@{host}:5432/{db}"
engine = create_engine(url)

In [9]:
con = psycopg2.connect(database=db,host=host,user=user)
cur = con.cursor()

In [10]:
### write the uof_filenum table
cur.execute(open('create_uof_filenum.sql').read())
con.commit() # commit the create statement to the database

In [4]:
# columns for uof_filenum table
table_cols = ['objectid',
'zip',
'filenum',
'occurred_d',
'occurred_t',
'current_ba',
'offsex',
'offrace',
'hire_dt',
'off_injure',
'offcondtyp',
'off_hospit',
'service_ty',
'uof_reason',
'cycles_num',
'street_n',
'street',
'street_g',
'street_t',
'address',
'citnum',
'citrace',
'citsex',
'cit_injure',
'citcondtyp',
'cit_arrest',
'cit_infl_a',
'citcharget',
'council_district',
'ra',
'beat',
'sector',
'division',
'x',
'y',
'geocoded_column',
'year_reported']

### API 

In [5]:
limit = 6000

In [6]:
client = Socrata("www.dallasopendata.com", app_token=app_token['token'])

In [7]:
url_dict = dict (
    data_2019 ='46zb-7qgj',
    data_2018 ='33un-ry4j',
    data_2017 ='tsu5-ca6k',
    data_2016 ='99fn-pvaf',
    data_2015 ='594v-2cnd',
    data_2014 ='xiv3-e8g7',
    data_2013 ='6gnu-avpf'
    )

### Processing the files in this order is essential to not corrupting the data by copying down values to empty cells

In [15]:
df_2019 = pd.DataFrame(client.get(url_dict['data_2019'], limit=limit))

In [16]:
df_2019.head(30)

Unnamed: 0,objectid,zip,filenum,uofnum,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,...,council_district,ra,beat,sector,division,x,y,geocoded_column,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs
0,2817,75253,UF2019-1702,"62295, 63542",2019-12-01T00:00:00.000,10:34 PM,11285,Male,White,2017-03-08T00:00:00.000,...,D8,6062.0,357.0,350.0,SOUTHEAST,2557123.437,6944231.397,"{'type': 'Point', 'coordinates': [-96.586265, ...",8.0,3.0
1,2234,75208,UF2019-1344,61093,2019-10-06T00:00:00.000,12:50 AM,11208,Male,White,2016-08-24T00:00:00.000,...,D1,4160.0,444.0,440.0,SOUTHWEST,2474936.793,6952151.398,"{'type': 'Point', 'coordinates': [-96.853036, ...",1.0,3.0
2,2755,75231,UF2019-1665,62820,2019-12-31T00:00:00.000,11:37 PM,9415,Male,White,2008-04-02T00:00:00.000,...,D9,6034.0,247.0,240.0,NORTHEAST,2508349.267,7001784.466,"{'type': 'Point', 'coordinates': [-96.741661, ...",13.0,3.0
3,2110,75228,UF2019-1314,60990,2019-09-30T00:00:00.000,6:20 PM,9884,Male,Hispanic,2009-06-10T00:00:00.000,...,D9,1132.0,228.0,220.0,NORTHEAST,2536678.324,6999039.025,"{'type': 'Point', 'coordinates': [-96.649175, ...",13.0,3.0
4,1663,75051,UF2019-1030,"59592, 59600",2019-08-04T00:00:00.000,12:10 AM,10480,Male,Hispanic,2012-09-26T00:00:00.000,...,,,,,,2433285.622,6953645.72,"{'type': 'Point', 'coordinates': [-96.98722, 3...",,
5,2538,75217,UF2019-1539,62255,2019-11-20T00:00:00.000,12:30 AM,9697,Male,White,2008-12-10T00:00:00.000,...,D5,2196.0,331.0,330.0,SOUTHEAST,2518664.313,6949657.426,"{'type': 'Point', 'coordinates': [-96.710924, ...",5.0,3.0
6,1966,75214,UF2019-1218,60444,2019-09-11T00:00:00.000,12:43 AM,10431,Male,White,2012-06-20T00:00:00.000,...,D2,1171.0,111.0,110.0,CENTRAL,2511580.252,6981361.061,"{'type': 'Point', 'coordinates': [-96.732593, ...",12.0,3.0
7,515,75204,UF2019-0338,55835,2019-03-11T00:00:00.000,3:50 AM,11278,Male,White,2017-02-08T00:00:00.000,...,D14,2017.0,155.0,150.0,CENTRAL,2494741.403,6978419.971,"{'type': 'Point', 'coordinates': [-96.787302, ...",12.0,3.0
8,1768,75201,UF2019-1096,58582,2019-06-23T00:00:00.000,7:50 PM,11233,Male,White,2016-11-02T00:00:00.000,...,D14,2062.0,134.0,130.0,CENTRAL,2491108.139,6971148.627,"{'type': 'Point', 'coordinates': [-96.799308, ...",12.0,3.0
9,72,75207,UF2019-0045,54460,2019-01-07T00:00:00.000,8:15 PM,9770,Male,White,2009-03-18T00:00:00.000,...,D6,6067.0,514.0,510.0,NORTHWEST,2476175.336,6981696.717,"{'type': 'Point', 'coordinates': [-96.846541, ...",2.0,3.0


In [17]:
# replace NaN string values with None type for proper load into database
df_2019.replace(to_replace='NaN',value=None,inplace=True)

objectid                       None
zip                            None
filenum                        None
uofnum                         None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
address                        None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                  

In [18]:
# cast all empty values into None type
df_2019.where(pd.notnull(df_2019),None,inplace=True)

In [19]:
# convert the geocode column from dict to JSON string for load into database
df_2019['geocoded_column'] = df_2019['geocoded_column'].apply(lambda x:json.dumps(x))

In [20]:
# replace NULL string values with None type for proper load into database
df_2019.replace(to_replace='NULL',inplace=True)

objectid                       None
zip                            None
filenum                        None
uofnum                         None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
address                        None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                  

In [21]:
# adding the year reported values
df_2019['year_reported'] = '2019'

In [22]:
df_2019.head(30)

Unnamed: 0,objectid,zip,filenum,uofnum,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,...,ra,beat,sector,division,x,y,geocoded_column,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs,year_reported
0,2817,75253,UF2019-1702,"62295, 63542",2019-12-01T00:00:00.000,10:34 PM,11285,Male,White,2017-03-08T00:00:00.000,...,6062.0,357.0,350.0,SOUTHEAST,2557123.437,6944231.397,"{""type"": ""Point"", ""coordinates"": [-96.586265, ...",8.0,3.0,2019
1,2234,75208,UF2019-1344,61093,2019-10-06T00:00:00.000,12:50 AM,11208,Male,White,2016-08-24T00:00:00.000,...,4160.0,444.0,440.0,SOUTHWEST,2474936.793,6952151.398,"{""type"": ""Point"", ""coordinates"": [-96.853036, ...",1.0,3.0,2019
2,2755,75231,UF2019-1665,62820,2019-12-31T00:00:00.000,11:37 PM,9415,Male,White,2008-04-02T00:00:00.000,...,6034.0,247.0,240.0,NORTHEAST,2508349.267,7001784.466,"{""type"": ""Point"", ""coordinates"": [-96.741661, ...",13.0,3.0,2019
3,2110,75228,UF2019-1314,60990,2019-09-30T00:00:00.000,6:20 PM,9884,Male,Hispanic,2009-06-10T00:00:00.000,...,1132.0,228.0,220.0,NORTHEAST,2536678.324,6999039.025,"{""type"": ""Point"", ""coordinates"": [-96.649175, ...",13.0,3.0,2019
4,1663,75051,UF2019-1030,"59592, 59600",2019-08-04T00:00:00.000,12:10 AM,10480,Male,Hispanic,2012-09-26T00:00:00.000,...,,,,,2433285.622,6953645.72,"{""type"": ""Point"", ""coordinates"": [-96.98722, 3...",,,2019
5,2538,75217,UF2019-1539,62255,2019-11-20T00:00:00.000,12:30 AM,9697,Male,White,2008-12-10T00:00:00.000,...,2196.0,331.0,330.0,SOUTHEAST,2518664.313,6949657.426,"{""type"": ""Point"", ""coordinates"": [-96.710924, ...",5.0,3.0,2019
6,1966,75214,UF2019-1218,60444,2019-09-11T00:00:00.000,12:43 AM,10431,Male,White,2012-06-20T00:00:00.000,...,1171.0,111.0,110.0,CENTRAL,2511580.252,6981361.061,"{""type"": ""Point"", ""coordinates"": [-96.732593, ...",12.0,3.0,2019
7,515,75204,UF2019-0338,55835,2019-03-11T00:00:00.000,3:50 AM,11278,Male,White,2017-02-08T00:00:00.000,...,2017.0,155.0,150.0,CENTRAL,2494741.403,6978419.971,"{""type"": ""Point"", ""coordinates"": [-96.787302, ...",12.0,3.0,2019
8,1768,75201,UF2019-1096,58582,2019-06-23T00:00:00.000,7:50 PM,11233,Male,White,2016-11-02T00:00:00.000,...,2062.0,134.0,130.0,CENTRAL,2491108.139,6971148.627,"{""type"": ""Point"", ""coordinates"": [-96.799308, ...",12.0,3.0,2019
9,72,75207,UF2019-0045,54460,2019-01-07T00:00:00.000,8:15 PM,9770,Male,White,2009-03-18T00:00:00.000,...,6067.0,514.0,510.0,NORTHWEST,2476175.336,6981696.717,"{""type"": ""Point"", ""coordinates"": [-96.846541, ...",2.0,3.0,2019


In [23]:
# load into table
df_2019[table_cols].to_sql('uof_filenum',engine,schema='cdep',if_exists='append',index=False,method='multi')

## Load 2018 data

In [None]:
## Should fix the trailing zeros in the current_ba

In [8]:
df_2018 = pd.DataFrame(client.get(url_dict['data_2018'], limit=limit))

In [9]:
df_2018.columns

Index(['filenum', 'uofnum', 'occurred_d', 'current_ba', 'offsex', 'offrace',
       'hire_dt', 'off_injure', 'offcondtyp', 'off_hospit', 'service_ty',
       'forcetype', 'uof_reason', 'cycles_num', 'forceeffec', 'street_n',
       'street', 'street_g', 'street_t', 'address', 'citnum', 'citrace',
       'citsex', 'cit_injure', 'citcondtyp', 'cit_arrest', 'cit_infl_a',
       'citcharget', 'ra', 'beat', 'sector', 'division', 'geocoded_column',
       ':@computed_region_sjyw_rtbm', ':@computed_region_2f7u_b5gs'],
      dtype='object')

In [12]:
df_2018.groupby('current_ba')['filenum'].count()

current_ba
0E-11                6
10005.00000000000    3
10009.00000000000    2
10013.00000000000    1
10020.00000000000    1
                    ..
9975.00000000000     1
9990.00000000000     2
9993.00000000000     1
9994.00000000000     2
9996.00000000000     2
Name: filenum, Length: 1047, dtype: int64

In [24]:
df_2018['current_ba'].sort_values().head(10)

2019                0E-11
2465                0E-11
2699                0E-11
2427                0E-11
1965                0E-11
1099                0E-11
2235    10005.00000000000
785     10005.00000000000
898     10005.00000000000
1598    10009.00000000000
Name: current_ba, dtype: object

In [29]:
df_2018.iloc[1099]

filenum                                                              UF2018-1239
uofnum                                                                     52493
occurred_d                                               2018-09-30T00:00:00.000
current_ba                                                                 0E-11
offsex                                                                    Female
offrace                                                                    Asian
hire_dt                                                  2017-06-28T00:00:00.000
off_injure                                                                    No
offcondtyp                                          No injuries noted or visible
off_hospit                                                                    No
service_ty                                                                Arrest
forcetype                                               Weapon display at Person
uof_reason                  

In [26]:
df_2018.head(30)

Unnamed: 0,filenum,uofnum,occurred_d,current_ba,offsex,offrace,hire_dt,off_injure,offcondtyp,off_hospit,...,cit_arrest,cit_infl_a,citcharget,ra,beat,sector,division,geocoded_column,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs
0,UF2018-0945,51374,2018-08-03T00:00:00.000,8681.0,Male,White,2005-08-05T00:00:00.000,No,No injuries noted or visible,No,...,Yes,Alchohol,Public Intoxication,2062.0,134.0,130.0,CENTRAL,"{'type': 'Point', 'coordinates': [-96.799965, ...",12.0,3.0
1,UF2018-0868,51056,2018-07-20T00:00:00.000,8019.0,Male,Hispanic,2001-04-13T00:00:00.000,No,No injuries noted or visible,No,...,Yes,Mentally unstable,APOWW,4119.0,413.0,410.0,SOUTHWEST,"{'type': 'Point', 'coordinates': [-90.516982, ...",,
2,UF2018-1228,52701,2018-10-10T00:00:00.000,11171.0,Male,Hispanic,2016-04-06T00:00:00.000,Yes,Laceration/Cut,No,...,Yes,Agitated,Assault/Public Servant,2073.0,134.0,130.0,CENTRAL,,,
3,UF2018-0912,51050,2018-07-19T00:00:00.000,6412.0,Male,Black,1990-07-16T00:00:00.000,Yes,Sprain/Strain,No,...,Yes,Alchohol,Assault,3070.0,523.0,520.0,NORTHWEST,,,
4,UF2018-1014,"51858, 51859",2018-08-18T00:00:00.000,10991.0,Male,White,2015-03-25T00:00:00.000,Yes,Redness/Swelling,No,...,Yes,Mentally unstable,Assault/Public Servant,1247.0,327.0,320.0,SOUTHEAST,"{'type': 'Point', 'coordinates': [-96.648101, ...",7.0,3.0
5,UF2018-1650,53440,2018-11-25T00:00:00.000,11459.0,Male,Hispanic,2018-02-07T00:00:00.000,No,No injuries noted or visible,No,...,Yes,Agitated,"Assault/FV, Resisting Arrest",3046.0,526.0,520.0,NORTHWEST,"{'type': 'Point', 'coordinates': [-96.857673, ...",2.0,3.0
6,UF2018-1146,51950,2018-09-04T00:00:00.000,8940.0,Male,White,2006-09-27T00:00:00.000,No,No injuries noted or visible,No,...,No,,No Arrest,,,,,"{'type': 'Point', 'coordinates': [-81.582777, ...",,
7,UF2018-0266,48871,2018-04-02T00:00:00.000,9513.0,Male,White,2008-07-09T00:00:00.000,No,No injuries noted or visible,No,...,Yes,Alchohol,"Assault/FV, Warrant/Hold",4421.0,625.0,620.0,NORTH CENTRAL,"{'type': 'Point', 'coordinates': [-89.11496700...",,
8,UF2018-0310,48859,2018-03-29T00:00:00.000,7080.0,Male,White,1994-07-13T00:00:00.000,Yes,Abrasion/Scrape,No,...,Yes,None detected,Crim Trespass/Bldg/Prop,9608.0,258.0,250.0,NORTHEAST,,,
9,UF2018-1427,53617,2018-12-03T00:00:00.000,9365.0,Male,Asian,2008-01-23T00:00:00.000,No,No injuries noted or visible,No,...,Yes,Unknown Drugs,APOWW,3094.0,542.0,540.0,NORTHWEST,"{'type': 'Point', 'coordinates': [-81.209393, ...",,


In [27]:
# replace NaN string values with None type for proper load into database
df_2018.replace(to_replace='NaN',inplace=True)

filenum                        None
uofnum                         None
occurred_d                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
address                        None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                     None
cit_infl_a                     None
citcharget                  

In [28]:
# cast all empty values into None type
df_2018.where(pd.notnull(df_2018),None,inplace=True)

In [29]:
# convert the geocode column from dict to JSON string for load into database
df_2018['geocoded_column'] = df_2018['geocoded_column'].apply(lambda x:json.dumps(x))

In [30]:
# replace NULL string values with None type for proper load into database
df_2018.replace(to_replace='NULL',inplace=True)

filenum                        None
uofnum                         None
occurred_d                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
address                        None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                     None
cit_infl_a                     None
citcharget                  

In [31]:
# adding the year reported values
df_2018['year_reported'] = '2018'

In [32]:
df_2018.head(30)

Unnamed: 0,filenum,uofnum,occurred_d,current_ba,offsex,offrace,hire_dt,off_injure,offcondtyp,off_hospit,...,cit_infl_a,citcharget,ra,beat,sector,division,geocoded_column,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs,year_reported
0,UF2018-0945,51374,2018-08-03T00:00:00.000,8681.0,Male,White,2005-08-05T00:00:00.000,No,No injuries noted or visible,No,...,Alchohol,Public Intoxication,2062.0,134.0,130.0,CENTRAL,"{""type"": ""Point"", ""coordinates"": [-96.799965, ...",12.0,3.0,2018
1,UF2018-0868,51056,2018-07-20T00:00:00.000,8019.0,Male,Hispanic,2001-04-13T00:00:00.000,No,No injuries noted or visible,No,...,Mentally unstable,APOWW,4119.0,413.0,410.0,SOUTHWEST,"{""type"": ""Point"", ""coordinates"": [-90.516982, ...",,,2018
2,UF2018-1228,52701,2018-10-10T00:00:00.000,11171.0,Male,Hispanic,2016-04-06T00:00:00.000,Yes,Laceration/Cut,No,...,Agitated,Assault/Public Servant,2073.0,134.0,130.0,CENTRAL,,,,2018
3,UF2018-0912,51050,2018-07-19T00:00:00.000,6412.0,Male,Black,1990-07-16T00:00:00.000,Yes,Sprain/Strain,No,...,Alchohol,Assault,3070.0,523.0,520.0,NORTHWEST,,,,2018
4,UF2018-1014,"51858, 51859",2018-08-18T00:00:00.000,10991.0,Male,White,2015-03-25T00:00:00.000,Yes,Redness/Swelling,No,...,Mentally unstable,Assault/Public Servant,1247.0,327.0,320.0,SOUTHEAST,"{""type"": ""Point"", ""coordinates"": [-96.648101, ...",7.0,3.0,2018
5,UF2018-1650,53440,2018-11-25T00:00:00.000,11459.0,Male,Hispanic,2018-02-07T00:00:00.000,No,No injuries noted or visible,No,...,Agitated,"Assault/FV, Resisting Arrest",3046.0,526.0,520.0,NORTHWEST,"{""type"": ""Point"", ""coordinates"": [-96.857673, ...",2.0,3.0,2018
6,UF2018-1146,51950,2018-09-04T00:00:00.000,8940.0,Male,White,2006-09-27T00:00:00.000,No,No injuries noted or visible,No,...,Agitated,No Arrest,,,,,"{""type"": ""Point"", ""coordinates"": [-81.582777, ...",,,2018
7,UF2018-0266,48871,2018-04-02T00:00:00.000,9513.0,Male,White,2008-07-09T00:00:00.000,No,No injuries noted or visible,No,...,Alchohol,"Assault/FV, Warrant/Hold",4421.0,625.0,620.0,NORTH CENTRAL,"{""type"": ""Point"", ""coordinates"": [-89.11496700...",,,2018
8,UF2018-0310,48859,2018-03-29T00:00:00.000,7080.0,Male,White,1994-07-13T00:00:00.000,Yes,Abrasion/Scrape,No,...,None detected,Crim Trespass/Bldg/Prop,9608.0,258.0,250.0,NORTHEAST,,,,2018
9,UF2018-1427,53617,2018-12-03T00:00:00.000,9365.0,Male,Asian,2008-01-23T00:00:00.000,No,No injuries noted or visible,No,...,Unknown Drugs,APOWW,3094.0,542.0,540.0,NORTHWEST,"{""type"": ""Point"", ""coordinates"": [-81.209393, ...",,,2018


In [33]:
# load into table
df_2018[[item for item in table_cols if item in df_2018.columns]].to_sql('uof_filenum',engine,schema='cdep',if_exists='append',index=True,index_label='objectid',method='multi')

- - -

## Load 2017 Data

In [34]:
df_2017 = pd.DataFrame(client.get(url_dict['data_2017'], limit=limit))

In [35]:
df_2017.head(20)

Unnamed: 0,objectid_1,filenum,uofnum,address,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,...,beat,sector,division,dist_name,x,y,geolocation,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs,taag_name
0,1063,UF2017-1685,45836,220 W CAMP WISDOM RD,2017-10-24T00:00:00.000,10:30 AM,11217,Female,Hispanic,2016-09-07T00:00:00.000,...,747,740,SOUTH CENTRAL,D8,2483989.795963,6927942.126927,"{'type': 'Point', 'coordinates': [-96.824112, ...",8.0,3.0,
1,1367,UF2017-1878,47141,1531 POCONO TRL,2017-12-25T00:00:00.000,5:30 PM,11285,Male,White,2017-03-08T00:00:00.000,...,327,320,SOUTHEAST,D5,2539821.664527,6956486.797079,"{'type': 'Point', 'coordinates': [-96.641689, ...",5.0,3.0,
2,1438,UF2017-1927,46916,8145 HUNNICUT RD,2017-11-28T00:00:00.000,1:12 AM,10224,Male,White,2010-08-18T00:00:00.000,...,237,230,NORTHEAST,D7,2519568.33956,6978413.242452,"{'type': 'Point', 'coordinates': [-96.706316, ...",7.0,3.0,
3,2198,UF2017-662,41532,7814 N CENTRAL EXPY,2017-04-18T00:00:00.000,11:41 AM,5260,Male,White,1985-08-15T00:00:00.000,...,214,210,NORTHEAST,D14,2499646.679584,6999604.605725,"{'type': 'Point', 'coordinates': [-96.769859, ...",12.0,3.0,Central Southwestern*
4,2573,UF2017-912,43186,3000 MUNICIPAL ST,2017-06-18T00:00:00.000,5:58 PM,9388,Male,White,2008-02-20T00:00:00.000,...,351,350,SOUTHEAST,D7,2508016.266634,6956733.492611,"{'type': 'Point', 'coordinates': [-96.745228, ...",7.0,3.0,
5,2036,UF2017-561,"40432, 40433, 41431",1720 WOOD ST,2017-03-09T00:00:00.000,9:18 AM,8321,Male,Black,2002-09-13T00:00:00.000,...,134,130,CENTRAL,D2,2492217.494981,6970630.126899,"{'type': 'Point', 'coordinates': [-96.795932, ...",12.0,3.0,
6,2651,UF2017-965,"42928, 42929",160 E AIRPORT FWY,2017-03-19T00:00:00.000,3:00 AM,9709,Male,Hispanic,2009-01-07T00:00:00.000,...,0,0,,,0.0,0.0,,,,
7,1162,UF2017-1741,46173,2911 E LEDBETTER DR,2017-11-05T00:00:00.000,5:03 PM,10998,Male,White,2015-03-25T00:00:00.000,...,733,730,SOUTH CENTRAL,D4,2500054.985552,6940069.640364,"{'type': 'Point', 'coordinates': [-96.771562, ...",4.0,3.0,Great Trinity Forest BonnieView*
8,2336,UF2017-764,"41695, 42056",4600 S MALCOLM X BLVD,2017-04-25T00:00:00.000,2:50 AM,7896,Male,White,2000-05-10T00:00:00.000,...,346,340,SOUTHEAST,D7,2505343.370728,6962661.133308,,,,Hatcher Scyene*
9,183,UF2017-1100,"43581, 43582",9727 WHITEHURST DR,2017-07-07T00:00:00.000,12:34 AM,9134,Male,White,2007-05-30T00:00:00.000,...,242,240,NORTHEAST,D10,2512709.712169,7015414.940771,"{'type': 'Point', 'coordinates': [-96.727081, ...",9.0,3.0,


In [36]:
df_2017.columns

Index(['objectid_1', 'filenum', 'uofnum', 'address', 'occurred_d',
       'occurred_t', 'current_ba', 'offsex', 'offrace', 'hire_dt',
       'off_injure', 'offcondtyp', 'off_hospit', 'service_ty', 'forcetype',
       'uof_reason', 'cycles_num', 'forceeffec', 'street_n', 'street',
       'street_g', 'street_t', 'citnum', 'citrace', 'citsex', 'cit_injure',
       'citcondtyp', 'cit_arrest', 'cit_infl_a', 'citcharget', 'ra', 'beat',
       'sector', 'division', 'dist_name', 'x', 'y', 'geolocation',
       ':@computed_region_sjyw_rtbm', ':@computed_region_2f7u_b5gs',
       'taag_name'],
      dtype='object')

In [37]:
# rename columns
df_2017.rename(columns={'objectid_1':'objectid', 'dist_name':'council_district','geolocation':'geocoded_column'},inplace=True)




In [38]:
# fix issue with the boolean types 
df_2017 = df_2017.astype({'off_injure':'str', 'off_hospit':'str', 'cit_injure':'str', 'cit_arrest':'str'})

In [39]:
# replace NaN string values with None type for proper load into database
df_2017.replace(to_replace='NaN',value=None,inplace=True)

objectid                       None
filenum                        None
uofnum                         None
address                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                  

In [40]:
# cast all empty values into None type
df_2017.where(df_2017.notnull(),other=None,inplace=True)

In [41]:
# convert the geocode column from dict to JSON string for load into database
df_2017['geocoded_column'] = df_2017['geocoded_column'].apply(lambda x:json.dumps(x))

In [42]:
# replace NULL string values with None type for proper load into database
df_2017.replace(to_replace='NULL',inplace=True)

objectid                       None
filenum                        None
uofnum                         None
address                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                  

In [43]:
# adding the year reported values
df_2017['year_reported'] = '2017'

In [44]:
df_2017.head(20)

Unnamed: 0,objectid,filenum,uofnum,address,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,...,sector,division,council_district,x,y,geocoded_column,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs,taag_name,year_reported
0,1063,UF2017-1685,45836,220 W CAMP WISDOM RD,2017-10-24T00:00:00.000,10:30 AM,11217,Female,Hispanic,2016-09-07T00:00:00.000,...,740,SOUTH CENTRAL,D8,2483989.795963,6927942.126927,"{""type"": ""Point"", ""coordinates"": [-96.824112, ...",8.0,3.0,,2017
1,1367,UF2017-1878,47141,1531 POCONO TRL,2017-12-25T00:00:00.000,5:30 PM,11285,Male,White,2017-03-08T00:00:00.000,...,320,SOUTHEAST,D5,2539821.664527,6956486.797079,"{""type"": ""Point"", ""coordinates"": [-96.641689, ...",5.0,3.0,,2017
2,1438,UF2017-1927,46916,8145 HUNNICUT RD,2017-11-28T00:00:00.000,1:12 AM,10224,Male,White,2010-08-18T00:00:00.000,...,230,NORTHEAST,D7,2519568.33956,6978413.242452,"{""type"": ""Point"", ""coordinates"": [-96.706316, ...",7.0,3.0,,2017
3,2198,UF2017-662,41532,7814 N CENTRAL EXPY,2017-04-18T00:00:00.000,11:41 AM,5260,Male,White,1985-08-15T00:00:00.000,...,210,NORTHEAST,D14,2499646.679584,6999604.605725,"{""type"": ""Point"", ""coordinates"": [-96.769859, ...",12.0,3.0,Central Southwestern*,2017
4,2573,UF2017-912,43186,3000 MUNICIPAL ST,2017-06-18T00:00:00.000,5:58 PM,9388,Male,White,2008-02-20T00:00:00.000,...,350,SOUTHEAST,D7,2508016.266634,6956733.492611,"{""type"": ""Point"", ""coordinates"": [-96.745228, ...",7.0,3.0,,2017
5,2036,UF2017-561,"40432, 40433, 41431",1720 WOOD ST,2017-03-09T00:00:00.000,9:18 AM,8321,Male,Black,2002-09-13T00:00:00.000,...,130,CENTRAL,D2,2492217.494981,6970630.126899,"{""type"": ""Point"", ""coordinates"": [-96.795932, ...",12.0,3.0,,2017
6,2651,UF2017-965,"42928, 42929",160 E AIRPORT FWY,2017-03-19T00:00:00.000,3:00 AM,9709,Male,Hispanic,2009-01-07T00:00:00.000,...,0,,,0.0,0.0,,,,,2017
7,1162,UF2017-1741,46173,2911 E LEDBETTER DR,2017-11-05T00:00:00.000,5:03 PM,10998,Male,White,2015-03-25T00:00:00.000,...,730,SOUTH CENTRAL,D4,2500054.985552,6940069.640364,"{""type"": ""Point"", ""coordinates"": [-96.771562, ...",4.0,3.0,Great Trinity Forest BonnieView*,2017
8,2336,UF2017-764,"41695, 42056",4600 S MALCOLM X BLVD,2017-04-25T00:00:00.000,2:50 AM,7896,Male,White,2000-05-10T00:00:00.000,...,340,SOUTHEAST,D7,2505343.370728,6962661.133308,,,,Hatcher Scyene*,2017
9,183,UF2017-1100,"43581, 43582",9727 WHITEHURST DR,2017-07-07T00:00:00.000,12:34 AM,9134,Male,White,2007-05-30T00:00:00.000,...,240,NORTHEAST,D10,2512709.712169,7015414.940771,"{""type"": ""Point"", ""coordinates"": [-96.727081, ...",9.0,3.0,,2017


In [45]:
# load into table
df_2017[[item for item in table_cols if item in df_2017.columns]].to_sql('uof_filenum',engine,schema='cdep',if_exists='append',index=False,method='multi')

## Load 2016 Data

In [46]:
df_2016 = pd.DataFrame(client.get(url_dict['data_2016'], limit=limit))

In [47]:
df_2016.columns

Index(['objectid', 'filenum', 'uofnum', 'address', 'occurred_d', 'occurred_t',
       'current_ba', 'offsex', 'offrace', 'hire_dt', 'off_injure',
       'offcondtyp', 'off_hospit', 'service_ty', 'forcetype', 'uof_reason',
       'cycles_num', 'forceeffec', 'street_n', 'street', 'street_g',
       'street_t', 'citnum', 'citrace', 'citsex', 'cit_injure', 'citcondtyp',
       'cit_arrest', 'cit_infl_a', 'citcharget', 'ra', 'beat', 'sector',
       'division', 'dist_name', 'taag_name', 'geolocation',
       ':@computed_region_sjyw_rtbm', ':@computed_region_2f7u_b5gs',
       ':@computed_region_at43_7y52', ':@computed_region_3qur_xvie',
       ':@computed_region_28rh_izyk'],
      dtype='object')

In [48]:
# rename columns
df_2016.rename(columns={'dist_name':'council_district','geolocation':'geocoded_column'},inplace=True)

In [49]:
df_2016.head(50)

Unnamed: 0,objectid,filenum,uofnum,address,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,...,sector,division,council_district,taag_name,geocoded_column,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs,:@computed_region_at43_7y52,:@computed_region_3qur_xvie,:@computed_region_28rh_izyk
0,609,UF2016-1185,37702,211 N ERVAY ST,2016-09-03T00:00:00.000,4:14:00 AM,10810,Male,Black,2014-05-07T00:00:00.000,...,130,CENTRAL,D14,,"{'latitude': '32.782205', 'longitude': '-96.79...",12.0,3.0,14.0,20152.0,3162.0
1,1705,UF2016-0418,33413,7647 FERGUSON RD,2016-03-22T00:00:00.000,11:00:00 PM,7706,Male,White,1999-01-08T00:00:00.000,...,230,NORTHEAST,D9,Ferguson Highland*,"{'latitude': '32.798978', 'longitude': '-96.71...",13.0,3.0,9.0,19361.0,3162.0
2,1239,UF2016-0616,34567,716 BIMEBELLA LN,2016-05-22T00:00:00.000,1:29:00 PM,11014,Male,Black,2015-05-20T00:00:00.000,...,430,SOUTHWEST,D6,,"{'latitude': '32.73971', 'longitude': '-96.925...",6.0,3.0,6.0,18528.0,3162.0
3,2320,UF2016-1349,31460,5600 L B J FWY,2016-01-10T00:00:00.000,8:55:00 PM,6692,Male,Black,1991-07-29T00:00:00.000,...,640,NORTH CENTRAL,D11,,"{'human_address': '{""address"": ""5600 L B J FWY...",,,,,
4,221,UF2016-1303,"37879, 37898",4600 S MALCOLM X BLVD,2016-11-08T00:00:00.000,2:30:00 AM,9844,Male,White,2009-10-04T00:00:00.000,...,340,SOUTHEAST,D7,Hatcher Scyene*,"{'human_address': '{""address"": ""4600 S MALCOLM...",,,,,
5,549,UF2016-1084,36724,1234 PEAVY RD,2016-09-11T00:00:00.000,7:20:00 PM,9855,Male,White,2009-06-10T00:00:00.000,...,230,NORTHEAST,D9,,"{'latitude': '32.837527', 'longitude': '-96.69...",13.0,3.0,9.0,18924.0,3162.0
6,133,UF2016-1475,38441,511 N AKARD ST,2016-11-26T00:00:00.000,9:00:00 PM,9881,Male,White,2009-06-10T00:00:00.000,...,130,CENTRAL,D14,,"{'latitude': '32.784328', 'longitude': '-96.80...",12.0,3.0,14.0,20152.0,3162.0
7,1761,UF2016-0334,33133,4709 LUCKY LN,2016-03-16T00:00:00.000,8:00:00 PM,9058,Female,Black,2007-07-04T00:00:00.000,...,510,NORTHWEST,D6,,"{'latitude': '32.803725', 'longitude': '-96.87...",6.0,3.0,6.0,20162.0,3162.0
8,2014,UF2016-0216,33989,300 S LAMAR ST,2016-02-17T00:00:00.000,9:25:00 PM,10381,Male,Hispanic,2012-05-09T00:00:00.000,...,130,CENTRAL,D14,,"{'latitude': '32.778107', 'longitude': '-96.80...",12.0,3.0,14.0,20153.0,3162.0
9,19,UF2016-1499,"38840, 38841",18600 DALLAS NORTH TOLLWAY,2016-12-24T00:00:00.000,8:55:00 AM,9705,Male,White,2009-01-07T00:00:00.000,...,610,NORTH CENTRAL,D12,,"{'human_address': '{""address"": ""18600 DALLAS N...",,,,,


In [50]:
# replace NaN string values with None type for proper load into database
df_2016.replace(to_replace='NaN',value=None,inplace=True)

objectid                       None
filenum                        None
uofnum                         None
address                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                  

In [51]:
# cast all empty values into None type
df_2016.where(pd.notnull(df_2016),None,inplace=True)

In [52]:
# convert the geocode column from dict to JSON string for load into database
df_2016['geocoded_column'] = df_2016['geocoded_column'].apply(lambda x:json.dumps(x))

In [53]:
# replace NULL string values with None type for proper load into database
df_2016.replace(to_replace='NULL',inplace=True)

objectid                       None
filenum                        None
uofnum                         None
address                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                  

In [54]:
# adding the year reported values
df_2016['year_reported'] = '2016'

In [55]:
df_2016.head(50)

Unnamed: 0,objectid,filenum,uofnum,address,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,...,division,council_district,taag_name,geocoded_column,:@computed_region_sjyw_rtbm,:@computed_region_2f7u_b5gs,:@computed_region_at43_7y52,:@computed_region_3qur_xvie,:@computed_region_28rh_izyk,year_reported
0,609,UF2016-1185,37702,211 N ERVAY ST,2016-09-03T00:00:00.000,4:14:00 AM,10810,Male,Black,2014-05-07T00:00:00.000,...,CENTRAL,D14,,"{""latitude"": ""32.782205"", ""longitude"": ""-96.79...",12.0,3.0,14.0,20152.0,3162.0,2016
1,1705,UF2016-0418,33413,7647 FERGUSON RD,2016-03-22T00:00:00.000,11:00:00 PM,7706,Male,White,1999-01-08T00:00:00.000,...,NORTHEAST,D9,Ferguson Highland*,"{""latitude"": ""32.798978"", ""longitude"": ""-96.71...",13.0,3.0,9.0,19361.0,3162.0,2016
2,1239,UF2016-0616,34567,716 BIMEBELLA LN,2016-05-22T00:00:00.000,1:29:00 PM,11014,Male,Black,2015-05-20T00:00:00.000,...,SOUTHWEST,D6,,"{""latitude"": ""32.73971"", ""longitude"": ""-96.925...",6.0,3.0,6.0,18528.0,3162.0,2016
3,2320,UF2016-1349,31460,5600 L B J FWY,2016-01-10T00:00:00.000,8:55:00 PM,6692,Male,Black,1991-07-29T00:00:00.000,...,NORTH CENTRAL,D11,,"{""human_address"": ""{\""address\"": \""5600 L B J ...",,,,,,2016
4,221,UF2016-1303,"37879, 37898",4600 S MALCOLM X BLVD,2016-11-08T00:00:00.000,2:30:00 AM,9844,Male,White,2009-10-04T00:00:00.000,...,SOUTHEAST,D7,Hatcher Scyene*,"{""human_address"": ""{\""address\"": \""4600 S MALC...",,,,,,2016
5,549,UF2016-1084,36724,1234 PEAVY RD,2016-09-11T00:00:00.000,7:20:00 PM,9855,Male,White,2009-06-10T00:00:00.000,...,NORTHEAST,D9,,"{""latitude"": ""32.837527"", ""longitude"": ""-96.69...",13.0,3.0,9.0,18924.0,3162.0,2016
6,133,UF2016-1475,38441,511 N AKARD ST,2016-11-26T00:00:00.000,9:00:00 PM,9881,Male,White,2009-06-10T00:00:00.000,...,CENTRAL,D14,,"{""latitude"": ""32.784328"", ""longitude"": ""-96.80...",12.0,3.0,14.0,20152.0,3162.0,2016
7,1761,UF2016-0334,33133,4709 LUCKY LN,2016-03-16T00:00:00.000,8:00:00 PM,9058,Female,Black,2007-07-04T00:00:00.000,...,NORTHWEST,D6,,"{""latitude"": ""32.803725"", ""longitude"": ""-96.87...",6.0,3.0,6.0,20162.0,3162.0,2016
8,2014,UF2016-0216,33989,300 S LAMAR ST,2016-02-17T00:00:00.000,9:25:00 PM,10381,Male,Hispanic,2012-05-09T00:00:00.000,...,CENTRAL,D14,,"{""latitude"": ""32.778107"", ""longitude"": ""-96.80...",12.0,3.0,14.0,20153.0,3162.0,2016
9,19,UF2016-1499,"38840, 38841",18600 DALLAS NORTH TOLLWAY,2016-12-24T00:00:00.000,8:55:00 AM,9705,Male,White,2009-01-07T00:00:00.000,...,NORTH CENTRAL,D12,,"{""human_address"": ""{\""address\"": \""18600 DALLA...",,,,,,2016


In [56]:
# load into table
df_2016[[item for item in table_cols if item in df_2016.columns]].to_sql('uof_filenum',engine,schema='cdep',if_exists='append',index=False,method='multi')

## Load 2015 Data

In [57]:
df_2015 = pd.DataFrame(client.get(url_dict['data_2015'], limit=limit))

In [58]:
df_2015.columns

Index(['id', 'filenum', 'occurred_dt', 'occurred_tm', 'current_badge_no',
       'offsex', 'offrace', 'hire_dt', 'off_injured', 'offcondtype',
       'off_hospital', 'service_type', 'uofnum', 'forcetype', 'uof_reason',
       'cycles_num', 'forceeffective', 'citnum', 'citrace', 'citsex',
       'cit_injured', 'citcondtype', 'cit_arrested', 'cit_infl_assmt',
       'citchargetype', 'arc_street', 'ra', 'beat', 'sector', 'division',
       'dist_name', 'geolocation', ':@computed_region_at43_7y52',
       ':@computed_region_3qur_xvie', ':@computed_region_28rh_izyk'],
      dtype='object')

In [59]:
# rename columns
df_2015.rename(columns={'id':'objectid','occurred_dt':'occurred_d', 'occurred_tm':'occurred_t', 'current_badge_no':'current_ba','off_injured':'off_injure','offcondtype':'offcondtyp','off_hospital':'off_hospit','service_type':'service_ty','forceeffective':'forceeffec','cit_injured':'cit_injure','citcondtype':'citcondtyp','cit_arrested':'cit_arrest','cit_infl_assmt':'cit_infl_a','citchargetype':'citcharget','arc_street':'address','dist_name':'council_district','geolocation':'geocoded_column'},inplace=True)

In [60]:
df_2015.head(30)

Unnamed: 0,objectid,filenum,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,off_injure,offcondtyp,...,address,ra,beat,sector,division,council_district,geocoded_column,:@computed_region_at43_7y52,:@computed_region_3qur_xvie,:@computed_region_28rh_izyk
0,62,UF2015-0108,2015-02-10T00:00:00.000,16:00,10757,Male,White,2014-01-29T00:00:00.000,No,No injuries noted or visible,...,2100 Arden Rd/ Lancaster Rd. Rd.,4196,714,710,SOUTH CENTRAL,D4,"{'human_address': '{""address"": ""2100 Arden Rd/...",,,
1,63,UF2015-0108,2015-02-10T00:00:00.000,16:00,8285,Male,White,2002-07-19T00:00:00.000,No,No injuries noted or visible,...,2100 Arden Rd/ Lancaster Rd. Rd.,4196,714,710,SOUTH CENTRAL,D4,"{'human_address': '{""address"": ""2100 Arden Rd/...",,,
2,68,UF2015-0111,2015-02-11T00:00:00.000,15:45,6175,Male,White,1989-11-02T00:00:00.000,No,No injuries noted or visible,...,8700 Walnut Hill Ln.,6061,213,210,NORTHEAST,D10,"{'human_address': '{""address"": ""8700 Walnut Hi...",,,
3,76,UF2015-0117,2015-02-01T00:00:00.000,18:30,10313,Male,White,2012-03-28T00:00:00.000,No,No injuries noted or visible,...,4407 ELSIE FAYE HEGGINS ST,2121,312,310,SOUTHEAST,D7,"{'human_address': '{""address"": ""4407 ELSIE FAY...",,,
4,77,UF2015-0117,2015-02-01T00:00:00.000,18:30,10506,Male,White,2013-04-10T00:00:00.000,No,No injuries noted or visible,...,4407 ELSIE FAYE HEGGINS ST,2121,312,310,SOUTHEAST,D7,"{'human_address': '{""address"": ""4407 ELSIE FAY...",,,
5,125,UF2015-0152,2015-02-24T00:00:00.000,23:40,10282,Male,Black,2010-09-29T00:00:00.000,No,No injuries noted or visible,...,4500 Larue St.,4202,434,430,SOUTHWEST,D3,"{'human_address': '{""address"": ""4500 Larue St....",,,
6,126,UF2015-0152,2015-02-24T00:00:00.000,23:40,8933,Male,White,2006-09-27T00:00:00.000,No,No injuries noted or visible,...,4500 Larue St.,4202,434,430,SOUTHWEST,D3,"{'human_address': '{""address"": ""4500 Larue St....",,,
7,127,UF2015-0152,2015-02-23T00:00:00.000,23:30,10282,Male,Black,2010-09-29T00:00:00.000,No,No injuries noted or visible,...,4500 Larue St.,4202,434,430,SOUTHWEST,D3,"{'human_address': '{""address"": ""4500 Larue St....",,,
8,130,UF2015-0154,2015-02-28T00:00:00.000,18:00,10327,Male,Black,2012-03-28T00:00:00.000,No,No injuries noted or visible,...,9386 L B J FWY,1030,252,250,NORTHEAST,D10,"{'human_address': '{""address"": ""9386 L B J FWY...",,,
9,174,UF2015-0186,2015-02-17T00:00:00.000,14:30,6736,Male,White,1991-08-26T00:00:00.000,No,No injuries noted or visible,...,9229 John Carpenter Frwy.,3098,512,510,NORTHWEST,D6,"{'human_address': '{""address"": ""9229 John Carp...",,,


In [61]:
# replace NaN string values with None type for proper load into database
df_2015.replace(to_replace='NaN',value=None,inplace=True)

objectid                       None
filenum                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
uofnum                         None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                     None
cit_infl_a                     None
citcharget                     None
address                        None
ra                             None
beat                        

In [62]:
# cast all empty values into None type
df_2015.where(pd.notnull(df_2015),None,inplace=True)

In [63]:
# convert the geocode column from dict to JSON string for load into database
df_2015['geocoded_column'] = df_2015['geocoded_column'].apply(lambda x:json.dumps(x))

In [64]:
# replace NULL string values with None type for proper load into database
df_2015.replace(to_replace='NULL',inplace=True)

objectid                       None
filenum                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
uofnum                         None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                     None
cit_infl_a                     None
citcharget                     None
address                        None
ra                             None
beat                        

In [65]:
# adding the year reported values
df_2015['year_reported'] = '2015'

In [66]:
df_2015.head(30)

Unnamed: 0,objectid,filenum,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,off_injure,offcondtyp,...,ra,beat,sector,division,council_district,geocoded_column,:@computed_region_at43_7y52,:@computed_region_3qur_xvie,:@computed_region_28rh_izyk,year_reported
0,62,UF2015-0108,2015-02-10T00:00:00.000,16:00,10757,Male,White,2014-01-29T00:00:00.000,No,No injuries noted or visible,...,4196,714,710,SOUTH CENTRAL,D4,"{""human_address"": ""{\""address\"": \""2100 Arden ...",,,,2015
1,63,UF2015-0108,2015-02-10T00:00:00.000,16:00,8285,Male,White,2002-07-19T00:00:00.000,No,No injuries noted or visible,...,4196,714,710,SOUTH CENTRAL,D4,"{""human_address"": ""{\""address\"": \""2100 Arden ...",,,,2015
2,68,UF2015-0111,2015-02-11T00:00:00.000,15:45,6175,Male,White,1989-11-02T00:00:00.000,No,No injuries noted or visible,...,6061,213,210,NORTHEAST,D10,"{""human_address"": ""{\""address\"": \""8700 Walnut...",,,,2015
3,76,UF2015-0117,2015-02-01T00:00:00.000,18:30,10313,Male,White,2012-03-28T00:00:00.000,No,No injuries noted or visible,...,2121,312,310,SOUTHEAST,D7,"{""human_address"": ""{\""address\"": \""4407 ELSIE ...",,,,2015
4,77,UF2015-0117,2015-02-01T00:00:00.000,18:30,10506,Male,White,2013-04-10T00:00:00.000,No,No injuries noted or visible,...,2121,312,310,SOUTHEAST,D7,"{""human_address"": ""{\""address\"": \""4407 ELSIE ...",,,,2015
5,125,UF2015-0152,2015-02-24T00:00:00.000,23:40,10282,Male,Black,2010-09-29T00:00:00.000,No,No injuries noted or visible,...,4202,434,430,SOUTHWEST,D3,"{""human_address"": ""{\""address\"": \""4500 Larue ...",,,,2015
6,126,UF2015-0152,2015-02-24T00:00:00.000,23:40,8933,Male,White,2006-09-27T00:00:00.000,No,No injuries noted or visible,...,4202,434,430,SOUTHWEST,D3,"{""human_address"": ""{\""address\"": \""4500 Larue ...",,,,2015
7,127,UF2015-0152,2015-02-23T00:00:00.000,23:30,10282,Male,Black,2010-09-29T00:00:00.000,No,No injuries noted or visible,...,4202,434,430,SOUTHWEST,D3,"{""human_address"": ""{\""address\"": \""4500 Larue ...",,,,2015
8,130,UF2015-0154,2015-02-28T00:00:00.000,18:00,10327,Male,Black,2012-03-28T00:00:00.000,No,No injuries noted or visible,...,1030,252,250,NORTHEAST,D10,"{""human_address"": ""{\""address\"": \""9386 L B J ...",,,,2015
9,174,UF2015-0186,2015-02-17T00:00:00.000,14:30,6736,Male,White,1991-08-26T00:00:00.000,No,No injuries noted or visible,...,3098,512,510,NORTHWEST,D6,"{""human_address"": ""{\""address\"": \""9229 John C...",,,,2015


In [67]:
# load into table
df_2015[[item for item in table_cols if item in df_2015.columns]].to_sql('uof_filenum',engine,schema='cdep',if_exists='append',index=False,method='multi')

## Load 2014 Data

In [68]:
df_2014 = pd.DataFrame(client.get(url_dict['data_2014'], limit=limit))

In [69]:
df_2014.columns

Index(['objectid', 'filenum', 'occurred_d', 'occurred_tm', 'current_ba',
       'offsex', 'offrace', 'hire_dt', 'off_injure', 'offcondtyp',
       'off_hospit', 'service_ty', 'uofnum', 'forcetype', 'uof_reason',
       'cycles_num', 'forceeffec', 'citnum', 'citrace', 'citsex', 'cit_injure',
       'citcondtyp', 'cit_arrest', 'cit_infl_a', 'citcharget', 'arc_street',
       'ra', 'beat', 'sector', 'division', 'dist_name'],
      dtype='object')

In [70]:
# rename columns
df_2014.rename(columns={'occurred_tm':'occurred_t','arc_street':'address','dist_name':'council_district'},inplace=True)

In [71]:
df_2014.head(30)

Unnamed: 0,objectid,filenum,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,off_injure,offcondtyp,...,citcondtyp,cit_arrest,cit_infl_a,citcharget,address,ra,beat,sector,division,council_district
0,1,UF2014-1864,2014-12-31T00:00:00.000,22:56,8912,Male,White,2006-09-13T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Unknown,"Assault/FV, Warrant/Hold",1238 N MASTERS DR,2206,335,330,SOUTHEAST,D5
1,2,UF2014-1864,2014-12-31T00:00:00.000,0:45,10759,Male,White,2014-03-12T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Unknown,"Assault/FV, Warrant/Hold",1238 N MASTERS DR,2206,335,330,SOUTHEAST,D5
2,3,UF2014-1864,2014-12-31T00:00:00.000,0:45,7523,Female,White,2003-03-20T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Unknown,"Assault/FV, Warrant/Hold",1238 N MASTERS DR,2206,335,330,SOUTHEAST,D5
3,4,UF2015-0002,2014-12-31T00:00:00.000,0:52,10694,Male,Black,2013-11-06T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Mentally unstable,APOWW,3287 S POLK ST,4262,454,450,SOUTHWEST,D4
4,5,UF2014-1865,2014-12-31T00:00:00.000,0:45,10144,Male,White,2010-03-03T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,None detected,Warrant/Hold,5200 BEXAR ST,2189,348,340,SOUTHEAST,D7
5,6,UF2014-1898,2014-12-31T00:00:00.000,23:00,9592,Female,White,2008-09-03T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Alchohol and unknown drugs,Public Intoxication,7720 MCCALLUM BLVD,6009,623,620,NORTH CENTRAL,D12
6,7,UF2015-0078,2014-12-31T00:00:00.000,16:20,10640,Male,White,2013-09-25T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Alchohol and unknown drugs,Public Intoxication,7720 MCCALLUM BLVD,6009,623,620,NORTH CENTRAL,D12
7,8,UF2015-0078,2014-12-31T00:00:00.000,23:20,10640,Male,White,2013-09-25T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Alchohol and unknown drugs,Public Intoxication,7720 MCCALLUM BLVD,6009,623,620,NORTH CENTRAL,D12
8,9,UF2015-0078,2014-12-31T00:00:00.000,23:20,10156,Male,Asian,2010-03-03T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,Alchohol and unknown drugs,Public Intoxication,7720 MCCALLUM BLVD,6009,623,620,NORTH CENTRAL,D12
9,10,UF2014-1893,2014-12-30T00:00:00.000,2:30,10578,Male,Hispanic,2013-06-19T00:00:00.000,No,No injuries noted or visible,...,No injuries noted or visible,Yes,,Criminal Mischief,500 HAVERHILL LN,2204,334,330,SOUTHEAST,D5


In [72]:
# replace NaN string values with None type for proper load into database
df_2014.replace(to_replace='NaN',inplace=True)

objectid            None
filenum             None
occurred_d          None
occurred_t          None
current_ba          None
offsex              None
offrace             None
hire_dt             None
off_injure          None
offcondtyp          None
off_hospit          None
service_ty          None
uofnum              None
forcetype           None
uof_reason          None
cycles_num          None
forceeffec          None
citnum              None
citrace             None
citsex              None
cit_injure          None
citcondtyp          None
cit_arrest          None
cit_infl_a          None
citcharget          None
address             None
ra                  None
beat                None
sector              None
division            None
council_district    None
dtype: object

In [73]:
# cast all empty values into None type
df_2014.where(pd.notnull(df_2014),None,inplace=True)

In [74]:
# replace NULL string values with None type for proper load into database
df_2014.replace(to_replace='NULL',inplace=True)

objectid            None
filenum             None
occurred_d          None
occurred_t          None
current_ba          None
offsex              None
offrace             None
hire_dt             None
off_injure          None
offcondtyp          None
off_hospit          None
service_ty          None
uofnum              None
forcetype           None
uof_reason          None
cycles_num          None
forceeffec          None
citnum              None
citrace             None
citsex              None
cit_injure          None
citcondtyp          None
cit_arrest          None
cit_infl_a          None
citcharget          None
address             None
ra                  None
beat                None
sector              None
division            None
council_district    None
dtype: object

In [75]:
# adding the year reported values
df_2014['year_reported'] = '2014'

In [76]:
# load into table
df_2014[[item for item in table_cols if item in df_2014.columns]].dropna(thresh=30).to_sql('uof_filenum',engine,schema='cdep',if_exists='append',index=False,method='multi')

## Load 2013 Data

In [77]:
df_2013 = pd.DataFrame(client.get(url_dict['data_2013'], limit=limit))

In [78]:
df_2013.columns

Index(['objectid', 'filenum', 'uofnum', 'match_addr', 'occurred_d',
       'occurred_t', 'current_ba', 'offsex', 'offrace', 'hire_dt',
       'off_injure', 'offcondtyp', 'off_hospit', 'service_ty', 'forcetype',
       'uof_reason', 'cycles_num', 'forceeffec', 'street_n', 'street',
       'street_g', 'street_t', 'address', 'citnum', 'citrace', 'citsex',
       'cit_injure', 'citcondtyp', 'cit_arrest', 'cit_infl_a', 'citcharget',
       'ra', 'beat', 'sector', 'division', 'dist_name', 'taag_name', 'x', 'y',
       'geolocation', ':@computed_region_at43_7y52',
       ':@computed_region_3qur_xvie', ':@computed_region_28rh_izyk'],
      dtype='object')

In [79]:
df_2013.head(30)

Unnamed: 0,objectid,filenum,uofnum,match_addr,occurred_d,occurred_t,current_ba,offsex,offrace,hire_dt,...,sector,division,dist_name,taag_name,x,y,geolocation,:@computed_region_at43_7y52,:@computed_region_3qur_xvie,:@computed_region_28rh_izyk
0,51,UF2013-00121,"2030, 2034",4600 S MALCOLM X BLVD,2013-01-09T00:00:00.000,21:00,10018,Male,White,2009-09-30T00:00:00.000,...,340,SOUTHEAST,D7,Central CFHawn,2505343.37073,6962661.13331,"{'human_address': '{""address"": ""4600 S MALCOLM...",,,
1,52,UF2013-00121,"2041, 2042",4600 S MALCOLM X BLVD,2013-01-09T00:00:00.000,21:00,10116,Male,White,2010-01-20T00:00:00.000,...,340,SOUTHEAST,D7,Central CFHawn,2505343.37073,6962661.13331,"{'human_address': '{""address"": ""4600 S MALCOLM...",,,
2,3061,UF2013-02233,12045,3839 S FITZHUGH AVE,2013-10-27T00:00:00.000,23:30,5118,Male,White,1984-11-28T00:00:00.000,...,110,CENTRAL,D7,,2504798.67034,6969124.88107,"{'latitude': '32.77433', 'longitude': '-96.754...",7.0,18524.0,3162.0
3,120,UF2013-00212,"3195, 3196",1700 S CESAR CHAVEZ BLVD,2013-02-04T00:00:00.000,21:45,8747,Female,White,2005-12-16T00:00:00.000,...,150,CENTRAL,D2,,2495720.74048,6969057.26971,"{'human_address': '{""address"": ""1700 S CESAR C...",,,
4,131,UF2013-00227,13590,8300 L B J SERV S,2013-02-06T00:00:00.000,15:00,9222,Male,White,2007-09-05T00:00:00.000,...,250,NORTHEAST,D10,,2501991.75,7023278.88488,"{'human_address': '{""address"": ""8300 L B J SER...",,,
5,137,UF2013-00231,"2837, 2838, 2839, 2840",6500 S CENTRAL EXPY,2013-01-28T00:00:00.000,19:45,9997,Male,Other,2009-09-30T00:00:00.000,...,340,SOUTHEAST,D7,,2504175.3574,6952684.54824,"{'human_address': '{""address"": ""6500 S CENTRAL...",,,
6,151,UF2013-00243,"3446, 3905",3600 OAK GROVE AVE,2013-02-09T00:00:00.000,20:05,9266,Male,Hispanic,2007-09-26T00:00:00.000,...,120,CENTRAL,D14,Ross Bennett,2492013.90082,6980260.20744,"{'human_address': '{""address"": ""3600 OAK GROVE...",,,
7,248,UF2013-00355,7876,6500 JULIUS SCHEPPS FWY,2013-02-24T00:00:00.000,19:50,9000,Male,Hispanic,2006-11-15T00:00:00.000,...,710,SOUTH CENTRAL,D7,,2502439.97052,6952770.76314,"{'human_address': '{""address"": ""6500 JULIUS SC...",,,
8,424,UF2013-00522,5227,1419 BIG TOWN BLVD,2013-02-27T00:00:00.000,18:30,8687,Male,White,2005-08-05T00:00:00.000,...,320,SOUTHEAST,D7,,2531429.64322,6972870.2393,"{'human_address': '{""address"": ""1419 BIG TOWN ...",,,
9,514,UF2013-00623,"3099, 3101",9959 ADLETA BLVD,2013-02-03T00:00:00.000,4:55,10210,Male,White,2010-06-09T00:00:00.000,...,250,NORTHEAST,D10,Forest Audelia,2517112.38673,7015120.47541,"{'latitude': '32.900662', 'longitude': '-96.71...",10.0,19761.0,3162.0


In [80]:
df_2013[['match_addr','address']]

Unnamed: 0,match_addr,address
0,4600 S MALCOLM X BLVD,4600 S MALCOLM X Blvd.
1,4600 S MALCOLM X BLVD,4600 S Malcolm X Blvd.
2,3839 S FITZHUGH AVE,3839 S Fitzhugh Ave.
3,1700 S CESAR CHAVEZ BLVD,1700 CESAR CHAVEZ Blvd.
4,8300 L B J SERV S,8300 W LBJ service road Frwy.
...,...,...
4216,3700 GUARANTY ST,3700 Guaranty St.
4217,5900 HARRY HINES BLVD,5900 Harry Hines Blvd.
4218,9500 WEBB CHAPEL RD,9500 WEBB CHAPEL Rd.
4219,9100 N CENTRAL EXPY,9100 N CENTRAL Expwy.


In [81]:
df_2013.drop(['address'],axis=1,inplace=True)

In [82]:
# rename columns
df_2013.rename(columns={'match_addr':'address','dist_name':'council_district','geolocation':'geocoded_column'},inplace=True)

In [83]:
# replace NaN string values with None type for proper load into database
df_2013.replace(to_replace='NaN',inplace=True)

objectid                       None
filenum                        None
uofnum                         None
address                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                  

In [84]:
# cast all empty values into None type
df_2013.where(pd.notnull(df_2013),None,inplace=True)

In [85]:
# convert the geocode column from dict to JSON string for load into database
df_2013['geocoded_column'] = df_2013['geocoded_column'].apply(lambda x:json.dumps(x))

In [86]:
# replace NULL string values with None type for proper load into database
df_2013.replace(to_replace='NULL',inplace=True)

objectid                       None
filenum                        None
uofnum                         None
address                        None
occurred_d                     None
occurred_t                     None
current_ba                     None
offsex                         None
offrace                        None
hire_dt                        None
off_injure                     None
offcondtyp                     None
off_hospit                     None
service_ty                     None
forcetype                      None
uof_reason                     None
cycles_num                     None
forceeffec                     None
street_n                       None
street                         None
street_g                       None
street_t                       None
citnum                         None
citrace                        None
citsex                         None
cit_injure                     None
citcondtyp                     None
cit_arrest                  

In [87]:
# adding the year reported values
df_2013['year_reported'] = '2013'

In [88]:
# load into table
df_2013[[item for item in table_cols if item in df_2013.columns]].to_sql('uof_filenum',engine,schema='cdep',if_exists='append',index=False,method='multi')

In [89]:
con.commit()
con.close()

- - -

## Experimental code for above

In [None]:
df_2014.dropna(thresh=30)

In [None]:
con.rollback() # for rolling back a failed transaction

In [None]:
type(df_2019.geocoded_column[0])

In [None]:
df_2019[df_2019['geocoded_column']=='NaN']

In [None]:
json.dumps(df_2019['geocoded_column'][0])

In [None]:
df_2019[['off_injure',
       'offcondtyp', 'off_hospit', 'service_ty', 'forcetype', 'uof_reason',
       'cycles_num', 'forceeffec', 'street_n', 'street', 'street_g',
       'street_t', 'address', 'citnum', 'citrace', 'citsex', 'cit_injure']]