In [1]:
import pandas as pd
import psycopg2
from pgconnect import pgconnect
from sqlalchemy import create_engine

In [6]:
import requests

In [2]:
db = pgconnect['db']
host = pgconnect['host']
user = pgconnect['user']

In [4]:
pi_id = 'qv6i-rri7' # police incidents identifier

In [7]:
limit = 6000

In [13]:
badge_num_fields = ['ro1badge','ro2badge']
badge_name_fields = ['ro1name','ro2name']

In [75]:
# instantiate empty DataFrame for the rows
column_names=['badge_num','off_name']
df = pd.DataFrame(columns=column_names)

In [76]:
# loop through both reporting officer 1 and reporting officer 2 to get unique list of officers
for n,num in enumerate(badge_num_fields):
    name = badge_name_fields[n]
    search = f'https://www.dallasopendata.com/resource/{pi_id}.json?$select=DISTINCT%20{num} AS {column_names[0]},{name} AS {column_names[1]}&$order={num}&$limit={limit}'
    r = requests.get(search)
    j = r.json()
    df = df.append(pd.DataFrame.from_dict(j),ignore_index=True)

In [77]:
# drop the duplicate badge number/name combinations (will preserve duplicate badge numbers)
df.drop_duplicates(inplace=True)

In [81]:
# drop rows with NaNs for both number and name
df.dropna(axis=0,how='all',inplace=True)

In [83]:
# cast all empty values into None type
df.where(pd.notnull(df),None,inplace=True)

## Write the result to Database

In [47]:
con = psycopg2.connect(database=db,host=host,user=user)
cur = con.cursor()

In [48]:
## Create the table
cur.execute(open('create_officer.sql').read())
con.commit()

In [49]:
# sqlalchemy engine for using .to_sql
url = f"postgres+psycopg2://{user}:@{host}:5432/{db}"
engine = create_engine(url)

In [85]:
df.to_sql('officer',engine,schema='cdep',if_exists='append',index=False,method='multi')

In [86]:
con.close()

In [73]:
con.rollback()

### Work out the for loop

In [84]:
df[pd.isnull(df.off_name)]

Unnamed: 0,badge_num,off_name
1652,4838,
4035,UP10,
4036,UP11,
4037,UP12,
4038,UP13,
4039,UP14,
4040,UP15,
4042,UP17,
4043,UP18,
4044,UP19,


In [18]:
num = badge_num_fields[0]
name = badge_name_fields[0]

In [32]:
search = f'https://www.dallasopendata.com/resource/{pi_id}.json?$select=DISTINCT%20{num} AS {column_names[0]},{name} AS {column_names[1]}&$order={num}&$limit={limit}'

In [33]:
r = requests.get(search)

In [34]:
j = r.json()

In [36]:
df.append(pd.DataFrame.from_dict(j))

Unnamed: 0,badge_num,name
0,057074,"MITCHELL,TERIANN"
1,077590,"SHELBY,DAVID"
2,10002,"MOLINA,EDGAR"
3,10003,"GRAY,ROY,CALVERT"
4,10004,"CLARK,TOMI,DANIELLE"
...,...,...
4077,UP8,
4078,UP9,
4079,WF 7310,"ROBERTS,JOHN"
4080,WF 8005,"SWINDELL,MARK"


In [27]:
df

Unnamed: 0,badge_num,name


In [35]:
j

[{'badge_num': '057074', 'name': 'MITCHELL,TERIANN'},
 {'badge_num': '077590', 'name': 'SHELBY,DAVID'},
 {'badge_num': '10002', 'name': 'MOLINA,EDGAR'},
 {'badge_num': '10003', 'name': 'GRAY,ROY,CALVERT'},
 {'badge_num': '10004', 'name': 'CLARK,TOMI,DANIELLE'},
 {'badge_num': '10005', 'name': 'RUIZ,RICHARD,ALAN'},
 {'badge_num': '10006', 'name': 'HERNANDEZ,RACIEL'},
 {'badge_num': '10007', 'name': 'ROBINSON,KATHERINE'},
 {'badge_num': '10009', 'name': 'ROMERO,JOSHUA,MICHAEL'},
 {'badge_num': '10010', 'name': 'ESCARCEGA-ARGUMEDO,MARGARITA'},
 {'badge_num': '10012', 'name': 'ROLLISON,TONY,ANTWAN'},
 {'badge_num': '10013', 'name': 'DELASBOUR,LANCE,LA MAR DENNIS'},
 {'badge_num': '10014', 'name': 'PHENIX,KEYSHALA,MONIQUE'},
 {'badge_num': '10015', 'name': 'TURANO,JENNIFER,LENA'},
 {'badge_num': '10017', 'name': 'MANUEL,MICHAEL,CHADRICK'},
 {'badge_num': '10018', 'name': 'WINCHESTER,ANDREW,SCOTT'},
 {'badge_num': '10020', 'name': 'WILLIS,RYAN,DESHAWN'},
 {'badge_num': '10021', 'name': 'CHAT

In [43]:
df.shape

(8111, 2)