# Intelligent Model: Random Forest Classifier

In [1]:
import pandas as pd

### Read in query outputs to dataframes

In [2]:
crime_df = pd.read_csv('crime_query.csv')
h_df = pd.read_csv('hosp_query.csv')
f_df = pd.read_csv('fire_query.csv')
pop_df = pd.read_csv('census_populations.csv')

In [3]:
crime_df.head().T

Unnamed: 0,0,1,2,3,4
incident_id,67693343,67693344,67693345,67693346,67693348
year,2013,2013,2013,2013,2013
age_num,46,57,51,24,49
victim_sex,F,M,F,M,M
crime_against,Property,Property,Property,Property,Property
offense,All Other Larceny,Theft From Motor Vehicle,Theft From Motor Vehicle,All Other Larceny,Burglary/Breaking & Entering
offense_category,Larceny/Theft Offenses,Larceny/Theft Offenses,Larceny/Theft Offenses,Larceny/Theft Offenses,Burglary/Breaking & Entering
location_id,20,18,18,5,20
location_name,Residence/Home,Parking/Drop Lot/Garage,Parking/Drop Lot/Garage,Commercial/Office Building,Residence/Home
county,POTTER,POTTER,POTTER,POTTER,POTTER


### Get crime count by county and join to crime_df

In [4]:
tmp_df4 = crime_df.groupby(['county']).incident_id.agg('count')
tmp_df4 = tmp_df4.to_frame().reset_index()
tmp_df4 = tmp_df4.rename({'incident_id': 'crime_cnt'}, axis='columns')
tmp_df4

Unnamed: 0,county,crime_cnt
0,BELL,10240
1,BEXAR,35
2,BRAZORIA,9487
3,BREWSTER,61
4,BURNET,1303
5,CALHOUN,1634
6,CAMERON,3
7,CHEROKEE,396
8,COKE,2
9,COLLIN,55442


In [5]:
crime_df = crime_df.join(tmp_df4.set_index('county'), on='county')

In [6]:
crime_df.head().T

Unnamed: 0,0,1,2,3,4
incident_id,67693343,67693344,67693345,67693346,67693348
year,2013,2013,2013,2013,2013
age_num,46,57,51,24,49
victim_sex,F,M,F,M,M
crime_against,Property,Property,Property,Property,Property
offense,All Other Larceny,Theft From Motor Vehicle,Theft From Motor Vehicle,All Other Larceny,Burglary/Breaking & Entering
offense_category,Larceny/Theft Offenses,Larceny/Theft Offenses,Larceny/Theft Offenses,Larceny/Theft Offenses,Burglary/Breaking & Entering
location_id,20,18,18,5,20
location_name,Residence/Home,Parking/Drop Lot/Garage,Parking/Drop Lot/Garage,Commercial/Office Building,Residence/Home
county,POTTER,POTTER,POTTER,POTTER,POTTER


### Get hospital and bed counts by county and join to h_df

In [7]:
h_df.head()

Unnamed: 0,county,hosp_id,beds
0,HARRIS,25577030,1082.0
1,JONES,3379501,45.0
2,COLLIN,475013,73.0
3,POTTER,679106,451.0
4,BRAZORIA,1177515,64.0


In [8]:
h_df['beds'] = h_df['beds'].clip_lower(1)

In [9]:
tmp_df = h_df.groupby(['county']).hosp_id.agg('count')
tmp_df = tmp_df.to_frame().reset_index()
tmp_df = tmp_df.rename({'hosp_id': 'hosp_cnt'}, axis='columns')
tmp_df

Unnamed: 0,county,hosp_cnt
0,ANDERSON,1
1,ANDREWS,1
2,ANGELINA,2
3,ATASCOSA,1
4,AUSTIN,1
5,BAILEY,1
6,BASTROP,1
7,BAYLOR,1
8,BEE,1
9,BELL,7


In [10]:
tmp_df2 = h_df.groupby(['county']).beds.agg('sum')
tmp_df2 = tmp_df2.to_frame().reset_index()
tmp_df2 = tmp_df2.rename({'beds': 'bed_cnt'}, axis='columns')
tmp_df2

Unnamed: 0,county,bed_cnt
0,ANDERSON,86.0
1,ANDREWS,34.0
2,ANGELINA,420.0
3,ATASCOSA,67.0
4,AUSTIN,32.0
5,BAILEY,25.0
6,BASTROP,8.0
7,BAYLOR,49.0
8,BEE,69.0
9,BELL,896.0


In [11]:
h_df = tmp_df.join(tmp_df2.set_index('county'), on='county')
h_df

Unnamed: 0,county,hosp_cnt,bed_cnt
0,ANDERSON,1,86.0
1,ANDREWS,1,34.0
2,ANGELINA,2,420.0
3,ATASCOSA,1,67.0
4,AUSTIN,1,32.0
5,BAILEY,1,25.0
6,BASTROP,1,8.0
7,BAYLOR,1,49.0
8,BEE,1,69.0
9,BELL,7,896.0


### Get fire station count by county and join to f_df

In [12]:
f_df.head()

Unnamed: 0,county,fire_id
0,EL PASO,10410064
1,EL PASO,10410225
2,EL PASO,10139838
3,EL PASO,10139423
4,EL PASO,10410065


In [13]:
tmp_df3 = f_df.groupby(['county']).fire_id.agg('count')
tmp_df3 = tmp_df3.to_frame().reset_index()
tmp_df3 = tmp_df3.rename({'fire_id': 'fire_cnt'}, axis='columns')
tmp_df3

Unnamed: 0,county,fire_cnt
0,ANDERSON,21
1,ANDREWS,1
2,ANGELINA,18
3,ARANSAS,5
4,ARCHER,7
5,ARMSTRONG,2
6,ATASCOSA,2
7,AUSTIN,5
8,BAILEY,1
9,BANDERA,6


### Merge hospital dataframe and fire station dataframe

In [14]:
hf_df = tmp_df3.join(h_df.set_index('county'), on='county')
hf_df

Unnamed: 0,county,fire_cnt,hosp_cnt,bed_cnt
0,ANDERSON,21,1.0,86.0
1,ANDREWS,1,1.0,34.0
2,ANGELINA,18,2.0,420.0
3,ARANSAS,5,,
4,ARCHER,7,,
5,ARMSTRONG,2,,
6,ATASCOSA,2,1.0,67.0
7,AUSTIN,5,1.0,32.0
8,BAILEY,1,1.0,25.0
9,BANDERA,6,,


### Join to training dataframe

In [15]:
train_df = crime_df.join(hf_df.set_index('county'), on='county')

In [16]:
train_df.head(3).T

Unnamed: 0,0,1,2
incident_id,67693343,67693344,67693345
year,2013,2013,2013
age_num,46,57,51
victim_sex,F,M,F
crime_against,Property,Property,Property
offense,All Other Larceny,Theft From Motor Vehicle,Theft From Motor Vehicle
offense_category,Larceny/Theft Offenses,Larceny/Theft Offenses,Larceny/Theft Offenses
location_id,20,18,18
location_name,Residence/Home,Parking/Drop Lot/Garage,Parking/Drop Lot/Garage
county,POTTER,POTTER,POTTER


In [17]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463095 entries, 0 to 463094
Data columns (total 16 columns):
incident_id         463095 non-null int64
year                463095 non-null int64
age_num             456081 non-null float64
victim_sex          463095 non-null object
crime_against       463095 non-null object
offense             463095 non-null object
offense_category    463095 non-null object
location_id         463095 non-null int64
location_name       463095 non-null object
county              463095 non-null object
officers            463095 non-null int64
civilians           463095 non-null int64
crime_cnt           463095 non-null int64
fire_cnt            463095 non-null int64
hosp_cnt            461467 non-null float64
bed_cnt             461467 non-null float64
dtypes: float64(3), int64(7), object(6)
memory usage: 56.5+ MB


### Bring in population feature

In [18]:
pop_df.head()

Unnamed: 0.1,Unnamed: 0,county,year,population,population_description
0,0,ANDERSON,2010,58477,"25,000 - 99,999"
1,4,ANDERSON,2011,58379,"25,000 - 99,999"
2,6,ANDERSON,2012,58036,"25,000 - 99,999"
3,8,ANDERSON,2013,57960,"25,000 - 99,999"
4,10,ANDERSON,2014,57837,"25,000 - 99,999"


In [19]:
pop_df['county'] = pop_df['county'].str.upper()
pop_df.drop_duplicates(['county','year'], inplace=True)

In [20]:
pop_df.head()

Unnamed: 0.1,Unnamed: 0,county,year,population,population_description
0,0,ANDERSON,2010,58477,"25,000 - 99,999"
1,4,ANDERSON,2011,58379,"25,000 - 99,999"
2,6,ANDERSON,2012,58036,"25,000 - 99,999"
3,8,ANDERSON,2013,57960,"25,000 - 99,999"
4,10,ANDERSON,2014,57837,"25,000 - 99,999"


In [21]:
pop_df['population_description'] = pd.cut(pop_df['population'], [0, 10000, 25000, 100000, 500000], 
                                   labels=['Under 10,000', '10,000 - 24,999', '25,000 - 99,999','Over 100,000'])


In [22]:
pop_df.head(10)

Unnamed: 0.1,Unnamed: 0,county,year,population,population_description
0,0,ANDERSON,2010,58477,"25,000 - 99,999"
1,4,ANDERSON,2011,58379,"25,000 - 99,999"
2,6,ANDERSON,2012,58036,"25,000 - 99,999"
3,8,ANDERSON,2013,57960,"25,000 - 99,999"
4,10,ANDERSON,2014,57837,"25,000 - 99,999"
5,12,ANDERSON,2015,57641,"25,000 - 99,999"
6,14,ANDERSON,2016,57558,"25,000 - 99,999"
7,16,ANDERSON,2017,57741,"25,000 - 99,999"
8,18,ANDREWS,2010,14817,"10,000 - 24,999"
9,22,ANDREWS,2011,15386,"10,000 - 24,999"


In [23]:
pop_df['population_description'].value_counts()

Under 10,000       677
10,000 - 24,999    511
25,000 - 99,999    500
Over 100,000       225
Name: population_description, dtype: int64

In [24]:
train_df =pd.merge(train_df, pop_df, how = 'outer', on=['county','year'])
train_df.head()

Unnamed: 0.1,incident_id,year,age_num,victim_sex,crime_against,offense,offense_category,location_id,location_name,county,officers,civilians,crime_cnt,fire_cnt,hosp_cnt,bed_cnt,Unnamed: 0,population,population_description
0,67693343.0,2013,46.0,F,Property,All Other Larceny,Larceny/Theft Offenses,20.0,Residence/Home,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
1,67693344.0,2013,57.0,M,Property,Theft From Motor Vehicle,Larceny/Theft Offenses,18.0,Parking/Drop Lot/Garage,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
2,67693345.0,2013,51.0,F,Property,Theft From Motor Vehicle,Larceny/Theft Offenses,18.0,Parking/Drop Lot/Garage,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
3,67693346.0,2013,24.0,M,Property,All Other Larceny,Larceny/Theft Offenses,5.0,Commercial/Office Building,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
4,67693348.0,2013,49.0,M,Property,Burglary/Breaking & Entering,Burglary/Breaking & Entering,20.0,Residence/Home,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"


In [25]:
train_df.dropna(axis=0, subset=['incident_id'], inplace=True)
train_df.head()

Unnamed: 0.1,incident_id,year,age_num,victim_sex,crime_against,offense,offense_category,location_id,location_name,county,officers,civilians,crime_cnt,fire_cnt,hosp_cnt,bed_cnt,Unnamed: 0,population,population_description
0,67693343.0,2013,46.0,F,Property,All Other Larceny,Larceny/Theft Offenses,20.0,Residence/Home,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
1,67693344.0,2013,57.0,M,Property,Theft From Motor Vehicle,Larceny/Theft Offenses,18.0,Parking/Drop Lot/Garage,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
2,67693345.0,2013,51.0,F,Property,Theft From Motor Vehicle,Larceny/Theft Offenses,18.0,Parking/Drop Lot/Garage,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
3,67693346.0,2013,24.0,M,Property,All Other Larceny,Larceny/Theft Offenses,5.0,Commercial/Office Building,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"
4,67693348.0,2013,49.0,M,Property,Burglary/Breaking & Entering,Burglary/Breaking & Entering,20.0,Residence/Home,POTTER,343.0,68.0,47721.0,12.0,5.0,878.0,3374.0,122088.0,"Over 100,000"


In [26]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 463095 entries, 0 to 463094
Data columns (total 19 columns):
incident_id               463095 non-null float64
year                      463095 non-null int64
age_num                   456081 non-null float64
victim_sex                463095 non-null object
crime_against             463095 non-null object
offense                   463095 non-null object
offense_category          463095 non-null object
location_id               463095 non-null float64
location_name             463095 non-null object
county                    463095 non-null object
officers                  463095 non-null float64
civilians                 463095 non-null float64
crime_cnt                 463095 non-null float64
fire_cnt                  463095 non-null float64
hosp_cnt                  461467 non-null float64
bed_cnt                   461467 non-null float64
Unnamed: 0                462307 non-null float64
population                462307 non-null float

In [27]:
# Drop NA in victim age column (<2%)
train_df.dropna(axis=0, subset=['age_num'], inplace=True)

# Drop NA in hospital_cnt column (<2%)
train_df.dropna(axis=0, subset=['hosp_cnt'], inplace=True)

# Drop NA in population column (<2%)
train_df.dropna(axis=0, subset=['population'], inplace=True)

# Drop offense_category_Gambling Offenses
train_df = train_df[train_df.offense_category != 'Gambling Offenses']

# Create Ratio Columns
train_df['crime_pop_ratio'] = train_df['crime_cnt'] / train_df['population']
train_df['beds_pop_ratio'] = train_df['bed_cnt'] / train_df['population']
train_df['beds_crime_ratio'] = train_df['bed_cnt'] / train_df['crime_cnt']
train_df['fire_pop_ratio'] = train_df['fire_cnt'] / train_df['population']
train_df['fire_crime_ratio'] = train_df['fire_cnt'] / train_df['crime_cnt']

# Reduce df to only desired features to train/test model
train_df = train_df[['age_num', 'victim_sex', 'offense_category', 'population_description',
                     'officers', 'civilians', 'crime_pop_ratio', 'beds_pop_ratio', 'beds_crime_ratio',
                     'fire_pop_ratio', 'fire_crime_ratio', 'county']]

In [28]:
train_df['population_description'].value_counts()

Over 100,000       153415
25,000 - 99,999     38620
10,000 - 24,999      3931
Under 10,000          101
Name: population_description, dtype: int64

In [29]:
train_df['crime_pop_ratio'].value_counts()

0.079568    51005
0.078250    50632
0.076740    48436
0.064609    18707
0.060620    18160
0.062602    17255
0.393968    16214
0.391367    16113
0.390874    14973
0.038820    10768
0.040163     9747
0.041512     9133
0.194799     8179
0.196802     7445
0.199683     6990
0.087082     6740
0.089490     6157
0.088230     6140
0.136818     6033
0.137734     5434
0.167770     5289
0.137572     5111
0.166055     5000
0.005343     4914
0.164082     4771
0.005495     4743
0.005421     3946
0.030468     3695
0.031007     3362
0.028734     3203
            ...  
0.022286      366
0.062601      350
0.029097      343
0.062903      302
0.063386      240
0.007738      205
0.004485      201
0.004442      191
0.004428      187
0.007678      175
0.022746      156
0.000682      152
0.022903      150
0.000700      131
0.023077      120
0.003587      110
0.003546      104
0.011949       94
0.003635       88
0.011617       74
0.011864       63
0.006680       60
0.010992       41
0.000029       24
0.000984  

In [30]:
train_df['beds_pop_ratio'].value_counts()

0.002845    51005
0.002797    50632
0.002743    48436
0.002433    18707
0.002283    18160
0.002358    17255
0.007248    16214
0.007201    16113
0.007192    14973
0.001376    10768
0.001424     9747
0.001472     9133
0.004541     8179
0.004588     7445
0.004655     6990
0.004730     6740
0.004861     6157
0.004792     6140
0.005539     6033
0.005576     5434
0.007195     5289
0.005569     5111
0.007122     5000
0.002868     4914
0.007037     4771
0.002950     4743
0.002910     3946
0.002666     3695
0.002713     3362
0.001081     3203
            ...  
0.000018      366
0.001690      350
0.001585      343
0.001698      302
0.001711      240
0.001935      205
0.002988      201
0.002959      191
0.002949      187
0.001920      175
0.002511      156
0.002355      152
0.002528      150
0.002417      131
0.002547      120
0.002448      110
0.002420      104
0.001539       94
0.002481       88
0.001496       74
0.001528       63
0.002738       60
0.011503       41
0.002393       24
0.001436  

In [31]:
train_df['beds_crime_ratio'].value_counts()

0.035750      150073
0.037661       54122
0.018399       47300
0.035456       29648
0.023311       22614
0.054316       19037
0.040484       16578
0.042888       15060
0.536782       13603
0.087500       10110
0.037630        9477
0.011490        7828
0.145761        7746
0.016222        5869
0.031770        5277
0.013942        4662
0.028589        4589
0.333968        4097
0.186739        3685
3.947205        3502
0.236919        3409
0.139561        2640
0.108303        1902
0.056995        1735
0.015300        1566
0.339971        1327
0.054490        1287
0.000808        1225
0.026998         892
0.097814         794
3.454777         749
0.666099         579
0.110375         426
0.250000         396
0.682390         302
0.128755         231
0.409836          60
1.046512          41
196.142857        35
83.750000         24
1.458333          22
416.333333         3
Name: beds_crime_ratio, dtype: int64

In [32]:
train_df['fire_pop_ratio'].value_counts()

0.000059    51005
0.000058    50632
0.000057    48436
0.000058    18707
0.000055    18160
0.000056    17255
0.000099    16214
0.000098    16113
0.000098    14973
0.000053    10768
0.000054     9747
0.000056     9133
0.000153     8179
0.000155     7445
0.000157     6990
0.000112     6740
0.000116     6157
0.000114     6140
0.000137     6033
0.000138     5434
0.000200     5289
0.000138     5111
0.000198     5000
0.000051     4914
0.000195     4771
0.000053     4743
0.000052     3946
0.000098     3695
0.000100     3362
0.000097     3203
            ...  
0.000234      366
0.000406      350
0.000223      343
0.000408      302
0.000411      240
0.000293      205
0.000153      201
0.000151      191
0.000151      187
0.000291      175
0.000251      156
0.000066      152
0.000253      150
0.000068      131
0.000255      120
0.000180      110
0.000178      104
0.000462       94
0.000183       88
0.000449       74
0.000458       63
0.000329       60
0.000511       41
0.000039       24
0.000328  

In [33]:
train_df['fire_crime_ratio'].value_counts()

0.000742    150073
0.000902     54122
0.000251     47300
0.001355     29648
0.000786     22614
0.001291     19037
0.001003     16578
0.001191     15060
0.009598     13603
0.003223     10110
0.003373      9477
0.001873      7828
0.006777      7746
0.002704      5869
0.001121      5277
0.002535      4662
0.003756      4589
0.005471      4097
0.008390      3685
0.060982      3502
0.009884      3409
0.008321      2640
0.002063      1902
0.014393      1735
0.004284      1566
0.019174      1327
0.007675      1287
0.010509      1225
0.006479       892
0.006904       794
0.096815       749
0.034072       579
0.011038       426
0.037879       396
0.050314       302
0.038627       231
0.049180        60
0.046512        41
2.514286        35
1.375000        24
0.333333        22
6.666667         3
Name: fire_crime_ratio, dtype: int64

In [34]:
pop_df['population_description'] = pd.cut(pop_df['population'], [0, 10000, 25000, 100000, 500000], labels=['Under 10,000', '10,000 - 24,999', '25,000 - 99,999','Over 100,000'])

In [35]:
train_df['crime_pop_ratio'].describe()

count    454522.000000
mean          0.108643
std           0.105649
min           0.000007
25%           0.060620
50%           0.078250
75%           0.089490
max           0.393968
Name: crime_pop_ratio, dtype: float64

In [36]:
officers_avg = train_df.groupby(['population_description']).officers.agg('mean')
officers_avg

population_description
Under 10,000        11.158416
10,000 - 24,999     16.167896
25,000 - 99,999     69.365510
Over 100,000       207.098791
Name: officers, dtype: float64

In [37]:
civilians_avg = train_df.groupby(['population_description']).civilians.agg('mean')
civilians_avg

population_description
Under 10,000        2.217822
10,000 - 24,999     7.837446
25,000 - 99,999    36.503884
Over 100,000       59.745162
Name: civilians, dtype: float64

In [38]:
rural = train_df[train_df.population_description == 'Under 10,000']
suburban = train_df[train_df.population_description == '10,000 - 24,999']
urban = train_df[train_df.population_description == '25,000 - 99,999']
metro = train_df[train_df.population_description == 'Over 100,000']

In [39]:
rural_cp_rat_high = rural['crime_pop_ratio'].quantile(.2)
rural_bp_rat_low = rural['beds_pop_ratio'].quantile(.8)
rural_bp_rat_high = rural['beds_pop_ratio'].quantile(1)
rural_bc_rat_low = rural['beds_crime_ratio'].quantile(.8)
rural_bc_rat_high = rural['beds_crime_ratio'].quantile(1)
rural_fp_rat_low = rural['fire_pop_ratio'].quantile(.8)
rural_fp_rat_high = rural['fire_pop_ratio'].quantile(1)
rural_fc_rat_low = rural['fire_crime_ratio'].quantile(.8)
rural_fc_rat_high = rural['fire_crime_ratio'].quantile(1)

In [40]:
print((0,rural_cp_rat_high), (rural_bp_rat_low,rural_bp_rat_high), (rural_bc_rat_low,rural_bc_rat_high), 
      (rural_fp_rat_low,rural_fp_rat_high), (rural_fc_rat_low, rural_fc_rat_high))

(0, 0.006679807271134472) (0.011503067484662576, 0.011503067484662576) (1.0465116279069768, 1.0465116279069768) (0.0005112474437627812, 0.0005112474437627812) (0.04918032786885246, 0.04918032786885246)


In [41]:
suburban_cp_rat_high = suburban['crime_pop_ratio'].quantile(.2)
suburban_bp_rat_low = suburban['beds_pop_ratio'].quantile(.8)
suburban_bp_rat_high = suburban['beds_pop_ratio'].quantile(1)
suburban_bc_rat_low = suburban['beds_crime_ratio'].quantile(.8)
suburban_bc_rat_high = suburban['beds_crime_ratio'].quantile(1)
suburban_fp_rat_low = suburban['fire_pop_ratio'].quantile(.8)
suburban_fp_rat_high = suburban['fire_pop_ratio'].quantile(1)
suburban_fc_rat_low = suburban['fire_crime_ratio'].quantile(.8)
suburban_fc_rat_high = suburban['fire_crime_ratio'].quantile(1)

In [42]:
print((0,suburban_cp_rat_high), (suburban_bp_rat_low,suburban_bp_rat_high), 
      (suburban_bc_rat_low,suburban_bc_rat_high), (suburban_fp_rat_low,suburban_fp_rat_high), 
      (suburban_fc_rat_low,suburban_fc_rat_high))

(0, 0.0577216871471272) (0.005645964795748921, 0.005645964795748921) (0.09781357882623705, 1.4583333333333333) (0.00040562466197944835, 0.0004615621313913534) (0.006904487917146145, 0.3333333333333333)


In [43]:
urban_cp_rat_high = urban['crime_pop_ratio'].quantile(.2)
urban_bp_rat_low = urban['beds_pop_ratio'].quantile(.8)
urban_bp_rat_high = urban['beds_pop_ratio'].quantile(1)
urban_bc_rat_low = urban['beds_crime_ratio'].quantile(.8)
urban_bc_rat_high = urban['beds_crime_ratio'].quantile(1)
urban_fp_rat_low = urban['fire_pop_ratio'].quantile(.8)
urban_fp_rat_high = urban['fire_pop_ratio'].quantile(1)
urban_fc_rat_low = urban['fire_crime_ratio'].quantile(.8)
urban_fc_rat_high = urban['fire_crime_ratio'].quantile(1)

In [44]:
print((0,urban_cp_rat_high), (urban_bp_rat_low,urban_bp_rat_high), (urban_bc_rat_low,urban_bc_rat_high), 
      (urban_fp_rat_low,urban_fp_rat_high), (urban_fc_rat_low, urban_fc_rat_high))

(0, 0.047208628539429795) (0.007121818261748802, 0.007195362988296431) (0.04288834469521477, 0.6823899371069182) (0.0002999850007499625, 0.00033606256874007086) (0.0027036160865157146, 0.050314465408805034)


In [45]:
metro_cp_rat_high = metro['crime_pop_ratio'].quantile(.2)
metro_bp_rat_low = metro['beds_pop_ratio'].quantile(.8)
metro_bp_rat_high = metro['beds_pop_ratio'].quantile(1)
metro_bc_rat_low = metro['beds_crime_ratio'].quantile(.8)
metro_bc_rat_high = metro['beds_crime_ratio'].quantile(1)
metro_fp_rat_low = metro['fire_pop_ratio'].quantile(.8)
metro_fp_rat_high = metro['fire_pop_ratio'].quantile(1)
metro_fc_rat_low = metro['fire_crime_ratio'].quantile(.8)
metro_fc_rat_high = metro['fire_crime_ratio'].quantile(1)

In [46]:
print((0,metro_cp_rat_high), (metro_bp_rat_low,metro_bp_rat_high), (metro_bc_rat_low,metro_bc_rat_high), 
      (metro_fp_rat_low,metro_fp_rat_high), (metro_fc_rat_low, metro_fc_rat_high))

(0, 0.03057116791813664) (0.007200616727081864, 0.007248470638740516) (0.054316398182569184, 416.3333333333333) (0.00013856044930535028, 0.00021260067868678195) (0.00322265625, 6.666666666666667)


In [47]:
rural_dict = {'crime_pop': rural_cp_rat_high/2, 'beds_pop': (rural_bp_rat_low+rural_bp_rat_high)/2, 'beds_crime':
             (rural_bc_rat_low+rural_bc_rat_high)/2, 'fire_pop': (rural_fp_rat_low+rural_fp_rat_high)/2,
             'fire_crime': (rural_fc_rat_low+rural_fc_rat_high)/2}
rural_dict

{'beds_crime': 1.0465116279069768,
 'beds_pop': 0.011503067484662576,
 'crime_pop': 0.003339903635567236,
 'fire_crime': 0.04918032786885246,
 'fire_pop': 0.0005112474437627812}

In [48]:
suburban_dict = {'crime_pop': suburban_cp_rat_high/2, 'beds_pop': (suburban_bp_rat_low+suburban_bp_rat_high)/2, 
                 'beds_crime': (suburban_bc_rat_low+suburban_bc_rat_high)/2, 'fire_pop': 
                 (suburban_fp_rat_low+suburban_fp_rat_high)/2, 'fire_crime': 
                 (suburban_fc_rat_low+suburban_fc_rat_high)/2}
suburban_dict

{'beds_crime': 0.7780734560797852,
 'beds_pop': 0.005645964795748921,
 'crime_pop': 0.0288608435735636,
 'fire_crime': 0.17011891062523973,
 'fire_pop': 0.0004335933966854009}

In [49]:
urban_dict = {'crime_pop': urban_cp_rat_high/2, 'beds_pop': (urban_bp_rat_low+urban_bp_rat_high)/2, 'beds_crime':
             (urban_bc_rat_low+urban_bc_rat_high)/2, 'fire_pop': (urban_fp_rat_low+urban_fp_rat_high)/2,
             'fire_crime': (urban_fc_rat_low+urban_fc_rat_high)/2}
urban_dict

{'beds_crime': 0.36263914090106647,
 'beds_pop': 0.007158590625022616,
 'crime_pop': 0.023604314269714897,
 'fire_crime': 0.026509040747660376,
 'fire_pop': 0.00031802378474501665}

In [50]:
metro_dict = {'crime_pop': metro_cp_rat_high/2, 'beds_pop': (metro_bp_rat_low+metro_bp_rat_high)/2, 'beds_crime':
             (metro_bc_rat_low+metro_bc_rat_high)/2, 'fire_pop': (metro_fp_rat_low+metro_fp_rat_high)/2,
             'fire_crime': (metro_fc_rat_low+metro_fc_rat_high)/2}
metro_dict

{'beds_crime': 208.19382486575793,
 'beds_pop': 0.00722454368291119,
 'crime_pop': 0.01528558395906832,
 'fire_crime': 3.3349446614583336,
 'fire_pop': 0.0001755805639960661}