# Exploring the relationship between gender and policing


You'll be working with a dataset of traffic stops by police officers that was collected by the Stanford Open Policing Project. 

You'll be focusing on data from the state of Rhode Island. 

In [None]:
import pandas as pd
ri = pd.read_csv('/work/data_science_notes/ 23. Analyzing police activity with pandas/data/police_edited.csv')
ri.head()

Unnamed: 0,stop_datetime,stop_date,stop_time,driver_gender,driver_race,violation_raw,violation,search_conducted,search_type,stop_outcome,is_arrested,stop_duration,drugs_related_stop,district
0,2005-01-04 12:55:00,2005-01-04,12:55,M,White,Equipment/Inspection Violation,Equipment,False,,Citation,False,0-15 Min,False,Zone X4
1,2005-01-23 23:15:00,2005-01-23,23:15,M,White,Speeding,Speeding,False,,Citation,False,0-15 Min,False,Zone K3
2,2005-02-17 04:15:00,2005-02-17,04:15,M,White,Speeding,Speeding,False,,Citation,False,0-15 Min,False,Zone X4
3,2005-02-20 17:15:00,2005-02-20,17:15,M,White,Call for Service,Other,False,,Arrest Driver,True,16-30 Min,False,Zone X1
4,2005-02-24 01:20:00,2005-02-24,01:20,F,White,Speeding,Speeding,False,,Citation,False,0-15 Min,False,Zone X3


## Do the genders commit different violations?

### Examining traffic violations

In [None]:
# Count the unique values in the violation column of ri. 
ri.violation.value_counts()

Speeding               48423
Moving violation       16224
Equipment              10921
Other                   4409
Registration/plates     3703
Seat belt               2856
Name: violation, dtype: int64

In [None]:
# Express the violation counts as proportions of the total.
ri.violation.value_counts(normalize=True)

Speeding               0.559571
Moving violation       0.187483
Equipment              0.126202
Other                  0.050950
Registration/plates    0.042791
Seat belt              0.033004
Name: violation, dtype: float64

### Comparing violations by gender

In [None]:
# Create a DataFrame, female, that only contains rows in which driver_gender is 'F'
female = ri[ri['driver_gender'] == 'F']

In [None]:
# Count the violations committed by female drivers and express them as proportions
female.violation.value_counts(normalize=True)

Speeding               0.658114
Moving violation       0.138218
Equipment              0.105199
Registration/plates    0.044418
Other                  0.029738
Seat belt              0.024312
Name: violation, dtype: float64

In [None]:
# Create a DataFrame, male, that only contains rows in which driver_gender is 'M'
male = ri[ri.driver_gender == 'M']

In [None]:
# Count the violations committed by male drivers and express them as proportions
male.violation.value_counts(normalize=True)

Speeding               0.522243
Moving violation       0.206144
Equipment              0.134158
Other                  0.058985
Registration/plates    0.042175
Seat belt              0.036296
Name: violation, dtype: float64

## Does gender affect who gets a ticket for speeding?

### Comparing speeding outcomes by gender

In [None]:
# Create a DataFrame, female_and_speeding, that only includes female drivers who were stopped for speeding
female_and_speeding = ri[(ri.driver_gender == 'F') & (ri.violation == 'Speeding')]

In [None]:
# Count the stop outcomes for the female drivers and express them as proportions
female_and_speeding.stop_outcome.value_counts(normalize=True)

Citation            0.952192
Arrest Driver       0.005752
N/D                 0.000959
Arrest Passenger    0.000639
No Action           0.000383
Name: stop_outcome, dtype: float64

In [None]:
# Create a DataFrame, male_and_speeding, that only includes male drivers who were stopped for speeding
male_and_speeding = ri[(ri.driver_gender == 'M') & (ri.violation == 'Speeding')]

In [None]:
# Count the stop outcomes for the male drivers and express them as proportions
male_and_speeding.stop_outcome.value_counts(normalize=True)

Citation            0.944595
Arrest Driver       0.015895
Arrest Passenger    0.001281
No Action           0.001068
N/D                 0.000976
Name: stop_outcome, dtype: float64

## Does gender affect whose vehicle is searched?

### Calculating the search rate

In [None]:
# Check the data type of search_conducted to confirm that it's a Boolean Series
ri['search_conducted'].dtypes

dtype('bool')

In [None]:
# Calculate the search rate by counting the Series values and expressing them as proportions
ri.search_conducted.value_counts(normalize=True)

False    0.961785
True     0.038215
Name: search_conducted, dtype: float64

In [None]:
# Calculate the search rate by taking the mean of the Series
ri.search_conducted.mean()

0.0382153092354627

### Comparing search rates by gender

In [None]:
# Filter the DataFrame to only include female drivers, and then calculate the search rate by taking the mean of search_conducted
ri[ri.driver_gender == 'F'].search_conducted.mean()

0.019180617481282074

In [None]:
# Filter the DataFrame to only include male drivers, and then repeat the search rate calculation
ri[ri.driver_gender == 'M'].search_conducted.mean()

0.04542557598546892

In [None]:
# Group by driver gender to calculate the search rate for both groups simultaneously
ri.groupby('driver_gender').search_conducted.mean()

driver_gender
F    0.019181
M    0.045426
Name: search_conducted, dtype: float64

### Adding a second factor to the analysis

In [None]:
# Use a .groupby() to calculate the search rate for each combination of gender and violation 
# Are males and females searched at about the same rate for each violation?
ri.groupby(['driver_gender', 'violation']).search_conducted.mean()

driver_gender  violation          
F              Equipment              0.039984
               Moving violation       0.039257
               Other                  0.041018
               Registration/plates    0.054924
               Seat belt              0.017301
               Speeding               0.008309
M              Equipment              0.071496
               Moving violation       0.061524
               Other                  0.046191
               Registration/plates    0.108802
               Seat belt              0.035119
               Speeding               0.027885
Name: search_conducted, dtype: float64

In [None]:
# Reverse the ordering to group by violation before gender 
# The results may be easier to compare when presented this way
ri.groupby(['violation', 'driver_gender']).search_conducted.mean()

violation            driver_gender
Equipment            F                0.039984
                     M                0.071496
Moving violation     F                0.039257
                     M                0.061524
Other                F                0.041018
                     M                0.046191
Registration/plates  F                0.054924
                     M                0.108802
Seat belt            F                0.017301
                     M                0.035119
Speeding             F                0.008309
                     M                0.027885
Name: search_conducted, dtype: float64

## Does gender affect who is frisked during a search?

During a vehicle search, the police officer may pat down the driver to check if they have a weapon. This is known as a "protective frisk".

In [None]:
# Count the search_type values in the ri DataFrame to see how many times "Protective Frisk" was the only search type
ri.search_type.value_counts()

Incident to Arrest                                          1290
Probable Cause                                               924
Inventory                                                    219
Reasonable Suspicion                                         214
Protective Frisk                                             164
Incident to Arrest,Inventory                                 123
Incident to Arrest,Probable Cause                            100
Probable Cause,Reasonable Suspicion                           54
Incident to Arrest,Inventory,Probable Cause                   35
Probable Cause,Protective Frisk                               35
Incident to Arrest,Protective Frisk                           33
Inventory,Probable Cause                                      25
Protective Frisk,Reasonable Suspicion                         19
Incident to Arrest,Inventory,Protective Frisk                 18
Incident to Arrest,Probable Cause,Protective Frisk            13
Inventory,Protective Fris

In [None]:
# Create a new column, frisk, that is True if search_type contains the string "Protective Frisk" and False otherwise
ri['frisk'] = ri.search_type.str.contains('Protective Frisk', na=False)

In [None]:
# Check the data type of frisk to confirm that it's a Boolean Series
ri['frisk'].dtypes

dtype('bool')

In [None]:
# Take the sum of frisk to count the total number of frisks
ri['frisk'].sum()

303

### Comparing frisk rates by gender

In [None]:
# Create a DataFrame, searched, that only contains rows in which search_conducted is True
searched = ri[ri.search_conducted]

In [None]:
# Take the mean of the frisk column to find out what percentage of searches included a frisk
searched.frisk.mean()

0.09162382824312065

In [None]:
# Calculate the frisk rate for each gender using a .groupby()
searched.groupby('driver_gender').frisk.mean()

driver_gender
F    0.074561
M    0.094353
Name: frisk, dtype: float64

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=dc001eea-22fe-4a27-852d-7fbece520334' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>