In [1]:
import numpy as np
import pandas as pd
import psycopg2

In [2]:
import json

with open('config.json') as f:
    conf = json.load(f)
    host = conf['host']
    database = conf['database']
    user = conf['user']
    passw = conf['passw']

In [3]:
conn_str = "host={} dbname={} user={} password={}".format(host, database, user, passw)
conn = psycopg2.connect(conn_str)

In [4]:
query = '''
SELECT      DISTINCT inc.incident_id,
            date_part('year',inc.incident_date),
            vic.age_num,
            vic.sex_code AS VICTIM_SEX,
            oft.offense_category_name AS OFFENSE_CATEGORY,
            ori.countyname AS COUNTY
            
FROM        nibrs_victim as vic
JOIN        nibrs_offense as off
ON          off.incident_id = vic.incident_id
JOIN        nibrs_offense_type as oft
ON          oft.offense_type_id = off.offense_type_id
JOIN        nibrs_incident as inc
ON          inc.incident_id = vic.incident_id
JOIN        cde_agencies as ags
ON          ags.agency_id = inc.agency_id
JOIN        ori_to_fips as ori
ON          ori.ori9 = ags.ori

WHERE       vic.victim_type_id = 4
AND         inc.incident_date BETWEEN '2016-01-01' AND '2016-12-31';
'''

In [5]:
df = pd.read_sql(query, con=conn)
df.head(3).T

Unnamed: 0,0,1,2
incident_id,87264241,87264244,87264248
date_part,2016,2016,2016
age_num,59,50,26
victim_sex,F,M,F
offense_category,Larceny/Theft Offenses,Fraud Offenses,Larceny/Theft Offenses
county,BURNET,BURNET,BURNET


In [6]:
crime_df = df.groupby(['county']).incident_id.agg('count')
crime_df = crime_df.to_frame().reset_index()
crime_df = crime_df.rename({'incident_id': 'crime_cnt'}, axis='columns')
crime_df

Unnamed: 0,county,crime_cnt
0,BELL,3348
1,BEXAR,392
2,BRAZORIA,2994
3,BREWSTER,70
4,BURNET,342
5,CALHOUN,756
6,CAMERON,2
7,COLLIN,16800
8,CORYELL,111
9,DALLAS,4395


In [7]:
df.join(crime_df.set_index('county'), on='county')

Unnamed: 0,incident_id,date_part,age_num,victim_sex,offense_category,county,crime_cnt
0,87264241,2016.0,59.0,F,Larceny/Theft Offenses,BURNET,342
1,87264244,2016.0,50.0,M,Fraud Offenses,BURNET,342
2,87264248,2016.0,26.0,F,Larceny/Theft Offenses,BURNET,342
3,87264256,2016.0,25.0,F,Assault Offenses,CAMERON,2
4,87264264,2016.0,26.0,M,Larceny/Theft Offenses,CAMERON,2
5,87264268,2016.0,41.0,M,Assault Offenses,BURNET,342
6,87264307,2016.0,33.0,M,Destruction/Damage/Vandalism of Property,BURNET,342
7,87264319,2016.0,23.0,F,Fraud Offenses,BURNET,342
8,87264341,2016.0,48.0,F,Fraud Offenses,BURNET,342
9,87264368,2016.0,26.0,M,Fraud Offenses,BURNET,342


In [8]:
df.head()

Unnamed: 0,incident_id,date_part,age_num,victim_sex,offense_category,county
0,87264241,2016.0,59.0,F,Larceny/Theft Offenses,BURNET
1,87264244,2016.0,50.0,M,Fraud Offenses,BURNET
2,87264248,2016.0,26.0,F,Larceny/Theft Offenses,BURNET
3,87264256,2016.0,25.0,F,Assault Offenses,CAMERON
4,87264264,2016.0,26.0,M,Larceny/Theft Offenses,CAMERON


In [9]:
age_df = df[['victim_sex','age_num']]

In [10]:
age_df.columns = ['victim_sex','age_num']

axarr = age_df.hist(column='age_num', by ='victim_sex', bins=99, rwidth=0.9, sharex=True, sharey=True, layout = (1, 3), figsize=(15,5))

for ax in axarr.flatten():
    ax.set_xlabel("Age")
    ax.set_ylabel("Victim Count")

In [None]:
off_df = df[['victim_sex','offense_category']]

In [None]:
off_df.columns = ['victim_sex','offense_category']

axarr = off_df.hist(column='offense_category', by = 'victim_sex', bins=18, sharex=True, sharey=True, layout = (1, 3), figsize=(15,5))

for ax in axarr.flatten():
    ax.set_xlabel("Offense Category")
    ax.set_ylabel("Victim Count")

In [None]:
df['victim_sex'].value_counts()

In [None]:
df['offense_category'].unique()

In [None]:
df.groupby(['victim_sex','offense_category']).incident_id.agg('count')

In [None]:
hospitals = pd.read_csv('export.csv')
hospitals.head(3).T

In [None]:
hospitals['beds'] = hospitals['beds'].clip_lower(0)

In [None]:
hosp_cnt = hospitals.groupby('county').id.agg('count')

In [None]:
hosp_cnt = hosp_cnt.to_frame().reset_index()

In [None]:
hosp_cnt

In [None]:
test_merge = hospitals.merge(hosp_cnt,on='county')

In [None]:
test_merge.head(3).T

In [None]:
bed_cnt = hospitals.groupby('county').beds.agg('sum')

In [None]:
bed_cnt = bed_cnt.to_frame().reset_index()

In [None]:
bed_cnt

In [None]:
hosp = test_merge.merge(bed_cnt,on='county')

In [None]:
hosp.head(3).T

In [None]:
hosp = hosp.rename({'id_x': 'id', 'id_y': 'count', 'beds_x': 'beds', 'beds_y': 'bed_count'}, axis='columns')

In [None]:
hosp.head(3).T

In [None]:
hosp_merge = hosp[['county','count','bed_count']]

In [None]:
#df = df.merge(hosp_merge,on='county')
df = df.join(hosp_merge.set_index('county'), on='county')

In [None]:
df.head(3).T