In [27]:
# Import dependencies
import os
import pandas as pd
import numpy as np

# File to load
csvpath = "Resources\\fatal_police_shootings.csv"

# Read CSV and store into dataframe
Brynn_project_df = pd.read_csv(csvpath)

In [28]:
# Display dataframe
Brynn_project_df

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3,Tim Elliot,1/2/2015,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,1/2/2015,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,1/3/2015,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False
3,8,Matthew Hoffman,1/4/2015,shot,toy weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False
4,9,Michael Rodriguez,1/4/2015,shot,nail gun,39.0,M,H,Evans,CO,False,attack,Not fleeing,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5437,5954,James Tober,6/28/2020,shot,gun,68.0,M,,Kettering,OH,True,attack,Not fleeing,False
5438,5953,John Parks,6/29/2020,shot,undetermined,57.0,M,W,Paducah,KY,False,undetermined,Car,False
5439,5958,Wade Russell Meisberger,6/29/2020,shot,gun,48.0,M,W,Hazleton,PA,False,attack,Car,False
5440,5962,Brittany S. Teichroeb,6/29/2020,shot,gun,26.0,F,W,Midland,TX,False,other,Other,False


# Total Fatal Shooting Deaths

In [29]:
total_deaths = len(Brynn_project_df['id'].value_counts())
display_total_deaths = pd.DataFrame({"Total Fatal Shooting Deaths": total_deaths}, index=[0])
display_total_deaths

Unnamed: 0,Total Fatal Shooting Deaths
0,5442


# Gender Demographics

Unnamed: 0,Total Fatal Shooting Deaths,Percentage of Deaths
M,5199,95.53%
F,242,4.45%


# Age Demographics

In [47]:
# Establish bins for ages
bins = [0, 9, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 89, 999]

# Categorize age groups
age_groups = ["<10", "10-15", "16-20", "21-25", "26-30", 
              "31-35", "36-40", "41-45", "46-50", "51-55", "56-60", 
              "61-65", "66-70", "71-75", "76-80", "81-85", "86-89","90+" ]

# Display
Brynn_project_df["Age Group"] = pd.cut(Brynn_project_df['age'], bins, labels=age_groups)

# Calculate totals and percentages
age_group = Brynn_project_df[["id", "Age Group"]]
age_group = age_group.drop_duplicates()
age_count = age_group["Age Group"].value_counts()
age_percent = (age_count / total_deaths) * 100

# Summary
age_demographics = pd.DataFrame({
    "Total Fatal Shooting Deaths per Age Group": age_count, 
    "Percentage of Deaths per Age Group": age_percent})

# Clean/Format
age_demographics["Percentage of Deaths per Age Group"] = age_demographics["Percentage of Deaths per Age Group"].map('{:,.2f}%'.format)

# Display
age_demographics.sort_index()

Unnamed: 0,Total Fatal Shooting Deaths per Age Group,Percentage of Deaths per Age Group
<10,2,0.04%
10-15,18,0.33%
16-20,355,6.52%
21-25,689,12.66%
26-30,804,14.77%
31-35,848,15.58%
36-40,677,12.44%
41-45,497,9.13%
46-50,441,8.10%
51-55,326,5.99%


# Clean Dataframe for readability

In [52]:
Brynn_project_df["gender"].replace({"M": "Male", "F": "Female"}, inplace=True)
Brynn_project_df["race"].replace({"A": "Asian", "W": "White", "B": "Black", "O": "Other", 
                                  "H": "Hispanic", "N": "Native American"}, inplace=True)
Brynn_project_df.replace(np.nan, '', regex=True)
Brynn_project_df

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,Age Group
0,3,Tim Elliot,1/2/2015,shot,gun,53.0,Male,Asian,Shelton,WA,True,attack,Not fleeing,False,51-55
1,4,Lewis Lee Lembke,1/2/2015,shot,gun,47.0,Male,White,Aloha,OR,False,attack,Not fleeing,False,46-50
2,5,John Paul Quintero,1/3/2015,shot and Tasered,unarmed,23.0,Male,Hispanic,Wichita,KS,False,other,Not fleeing,False,21-25
3,8,Matthew Hoffman,1/4/2015,shot,toy weapon,32.0,Male,White,San Francisco,CA,True,attack,Not fleeing,False,31-35
4,9,Michael Rodriguez,1/4/2015,shot,nail gun,39.0,Male,Hispanic,Evans,CO,False,attack,Not fleeing,False,36-40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5437,5954,James Tober,6/28/2020,shot,gun,68.0,Male,Unknown,Kettering,OH,True,attack,Not fleeing,False,66-70
5438,5953,John Parks,6/29/2020,shot,undetermined,57.0,Male,White,Paducah,KY,False,undetermined,Car,False,56-60
5439,5958,Wade Russell Meisberger,6/29/2020,shot,gun,48.0,Male,White,Hazleton,PA,False,attack,Car,False,46-50
5440,5962,Brittany S. Teichroeb,6/29/2020,shot,gun,26.0,Female,White,Midland,TX,False,other,Other,False,26-30


In [53]:
Brynn_clean_df = Brynn_project_df[['id', 'name', 'date', 'age', 'gender', 'race', 'city', 'state']]
Brynn_clean_df

Unnamed: 0,id,name,date,age,gender,race,city,state
0,3,Tim Elliot,1/2/2015,53.0,Male,Asian,Shelton,WA
1,4,Lewis Lee Lembke,1/2/2015,47.0,Male,White,Aloha,OR
2,5,John Paul Quintero,1/3/2015,23.0,Male,Hispanic,Wichita,KS
3,8,Matthew Hoffman,1/4/2015,32.0,Male,White,San Francisco,CA
4,9,Michael Rodriguez,1/4/2015,39.0,Male,Hispanic,Evans,CO
...,...,...,...,...,...,...,...,...
5437,5954,James Tober,6/28/2020,68.0,Male,Unknown,Kettering,OH
5438,5953,John Parks,6/29/2020,57.0,Male,White,Paducah,KY
5439,5958,Wade Russell Meisberger,6/29/2020,48.0,Male,White,Hazleton,PA
5440,5962,Brittany S. Teichroeb,6/29/2020,26.0,Female,White,Midland,TX


# Racial Demographics

In [54]:
# Calculate race totals and percentages per race
racial_group = Brynn_project_df[["id", "race"]]
racial_group = racial_group.drop_duplicates()
race_count = racial_group["race"].value_counts()
race_percent = (race_count / total_deaths) * 100

# Summary 
racial_demographics = pd.DataFrame({
    "Total Fatal Shooting Deaths": race_count,
    "Percentage of Deaths": race_percent})

# Clean/format
racial_demographics["Percentage of Deaths"] = racial_demographics["Percentage of Deaths"].map('{0:,.2f}%'.format)

# Display
racial_demographics

Unnamed: 0,Total Fatal Shooting Deaths,Percentage of Deaths
White,2494,45.83%
Black,1298,23.85%
Hispanic,906,16.65%
Unknown,524,9.63%
Asian,94,1.73%
Native American,78,1.43%
Other,48,0.88%


# Gender Demographics

In [55]:
# Calculate gender totals and percentages per gender
gender_group = Brynn_project_df[["id", "gender"]]
gender_group = gender_group.drop_duplicates()
gender_count = gender_group["gender"].value_counts()
gender_percent = (gender_count / total_deaths) * 100

# Summary 
gender_demographics = pd.DataFrame({
    "Total Fatal Shooting Deaths": gender_count,
    "Percentage of Deaths": gender_percent})

# Clean/format
gender_demographics["Percentage of Deaths"] = gender_demographics["Percentage of Deaths"].map('{0:,.2f}%'.format)

# Display
gender_demographics

Unnamed: 0,Total Fatal Shooting Deaths,Percentage of Deaths
Male,5199,95.53%
Female,242,4.45%
Unknown,1,0.02%


# City and State Statistics

In [60]:
# Calculate totals and percentage per state
total_states = Brynn_project_df["state"].value_counts(dropna=False)
state_percent = (total_states / total_deaths) * 100

# Summary
state_demo = pd.DataFrame({
    "Total Fatal Shooting Deaths": total_states,
    "Percentage of Deaths": state_percent})

# Clean/format
state_demo["Percentage of Deaths"] = state_demo["Percentage of Deaths"].map('{0:,.2f}%'.format)

# Display
state_demo

Unnamed: 0,Total Fatal Shooting Deaths,Percentage of Deaths
CA,804,14.77%
TX,485,8.91%
FL,353,6.49%
AZ,254,4.67%
CO,199,3.66%
GA,183,3.36%
OK,165,3.03%
OH,156,2.87%
NC,154,2.83%
WA,152,2.79%
