In [1]:

# GRUPPO 'E' (Tony, Tatiana, Lorenzo)
# Dataset principale: https://github.com/washingtonpost/data-police-shootings/blob/master/v2/fatal-police-shootings-data.csv
# Ulteriori dataset: Guardare le slides.
# Nessuna operazione preliminare
# import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd

df = pd.read_csv("data/fatal-police-shootings-data.csv")
dataframe = pd.read_csv("data/fatal-police-shootings-data.csv")
dataframe_us_population = pd.read_csv("data/historical_state_population_by_year.csv")
dataframe_pop_by_race = pd.read_csv("data/population-by-race.csv")


In [2]:

df = df[df["race"] != "B;H"]

category_order = df['race'].value_counts().index
df['race'] = pd.Categorical(df['race'], categories=category_order, ordered=True)
df = df.sort_values('race')

df['race'] = df['race'].replace({'W': 'White', 'B': 'Black', 'H': 'Hispanic', 'N':'Native American', 'A':'Asian Heritage', 'O':'Other'})
race_percentage = df['race'].value_counts(normalize=True) * 100

raceAndArms = px.histogram(df, x="race", barmode="relative", color="race", title="Race analysis of shot people")

raceAndArms.update_layout(
    yaxis_title="number of people shot",
    width=800,
    height=400
)

raceAndArms.show()


  grouped = df.groupby(required_grouper, sort=False)  # skip one_group groupers


In [3]:

fig = px.histogram(data_frame=df, x="age", title='Distribution of age', labels={"age": "Age [years]"},   height=350, width=950)
fig.update_layout(yaxis_title="Number of people")
fig.update_xaxes(range=[0, 100])

In [4]:


df_filtered = df[df['gender'] != 'non-binary']

fig = px.histogram(df_filtered, x='age', color='gender', title='Distribution of age and gender', labels={'age': 'Age [years]', 'gender': 'Gender'},
                    height=350, width=950)

fig.show()

In [5]:


df_filtered = df[df['gender'] != 'non-binary']

fig = px.histogram(df_filtered, x='gender',title='Distribution of Gender', labels={'gender': 'Gender'}, height=430, width=400)
fig.update_layout(yaxis_title="Number of people")

fig.show()

In [6]:

df_filtered = df[df['gender'].isin(['male', 'female'])]
fig = px.pie(df_filtered, names='gender', title='Distribution of Gender', labels={'gender': 'Gender'},
               height=300, width=600)
fig.show()

In [7]:

fig = px.pie(df, names='was_mental_illness_related', title='Distribution based on Mental Illness', labels={'was_mental_illness_related': 'Mental Illness Related'},
             height=300, width=600)
fig.update_layout(legend_title='Has mental illness:')

fig.show()

In [8]:

df['race_source'] = df['race_source'].fillna('unspecified')
df_filtered = df[~df['race_source'].isin(['other', 'undetermined'])]
fig = px.pie(df_filtered, names='race_source', title='Distribution based on Race Source', labels={'race_source': 'Race Source'},
             height=300, width=600)
fig.show()

In [9]:

dataframe_until_2022 = dataframe[dataframe["date"].str[:4].astype(int) <= 2022]
dataframe_until_2022["year"] = dataframe_until_2022["date"].str[:4].astype(int)
count_by_year = dataframe_until_2022.groupby('year').size().reset_index(name='number_of_deaths')
fig = px.line(count_by_year, x='year', y='number_of_deaths',
              labels={'number_of_deaths': 'Number of Deaths by Shooting', 'year': 'Year'},
              title="Number of deaths by police shooting per year (US)")
fig.update_layout(yaxis_title="Count", xaxis_title="Year", width=800, height=350)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:


# otherwise globals are changed...
dataframe = pd.read_csv("data/fatal-police-shootings-data.csv")

dataframe_us_population_copy = dataframe_us_population.copy()

dataframe_us_population_copy.columns = ["state", "year", "population"]
dataframe["year"] = dataframe["date"].str[:4]
dataframe_2019 = dataframe[dataframe["year"] == "2019"]
dataframe_2019["year"] = dataframe_2019["year"].astype(int)

complete_merged_df_2019 = pd.merge(dataframe_2019, dataframe_us_population_copy, on=['state', 'year'])

total_fatal_shootings_by_state = dataframe_2019.groupby('state').size().reset_index(name='total_fatal_shootings')

merged_df_2019 = pd.merge(complete_merged_df_2019, total_fatal_shootings_by_state, on='state')
merged_df_2019 = merged_df_2019[['state', 'total_fatal_shootings', 'population']]
grouped_df_2019 = merged_df_2019.groupby('state').agg({
    'total_fatal_shootings': 'first',
    'population': 'first'
}).reset_index()

grouped_df_2019_original = grouped_df_2019



grouped_df_2019['shootings_per_million'] = (grouped_df_2019['total_fatal_shootings'] / grouped_df_2019['population']) * 1e6
grouped_df_2019 = grouped_df_2019.sort_values(by='shootings_per_million', ascending=False)
grouped_df_2019 = pd.concat([grouped_df_2019.head(5), grouped_df_2019.tail(5)])

color_dict = {
    state: px.colors.sequential.Blues[::-1][i] for i, state in enumerate(grouped_df_2019.tail(5)['state'])
}
color_dict.update({
    state: px.colors.sequential.Reds[::-1][i] for i, state in enumerate(grouped_df_2019.head(5)['state'])
})

fig = px.histogram(grouped_df_2019, x='shootings_per_million', y='state', color='state',
                   color_discrete_map=color_dict,
                   labels={'state': 'State', 'shootings_per_million': 'Number of Deaths'},
                   category_orders={'state': grouped_df_2019['state'].tolist()},
                   height=400, width=950)

fig.update_layout(yaxis_title="State", xaxis_title="Number of Victims",
                  title="Fatal Police Shootings per Million Inhabitants (2019, US)", legend_title="Legend",
                  barmode='overlay', showlegend=True)

for trace in fig.data:
    trace.showlegend = False

fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name="States with Most Police Kills",
                         marker=dict(color="Crimson", size=10), legendgroup='grouped_traces'))
fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name="States with Least Police Kills",
                         marker=dict(color="blue", size=10), legendgroup='grouped_traces'))

# per lo spazio => new category "vuota"
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [11]:

fig_box = px.box(dataframe_2019, y='threat_type', x='age',
                 labels={'age': 'Age', 'threat_type': 'Threat Type'},
                 title='Age distribution by threat type (2019, US)',
                 category_orders={'threat_type': ['shoot', 'point', 'attack', 'threat', 'move', 'flee', 'undetermined']}
                 )

fig_box.update_layout(
    yaxis_title="Threat type",
    xaxis_title="Age",
    height=450,
    width=900
)

fig_box.show()


In [12]:


dataframe["year"] = dataframe["date"].str[:4].astype(int)

dataframe_pop_by_race = dataframe_pop_by_race.groupby(['Year']).agg({
    'Total Population': 'sum',
    'White Alone': 'sum',
    'Black Alone': 'sum',
}).reset_index()

dataframe_pop_by_race["year"] = dataframe_pop_by_race["Year"]
dataframe_pop_by_race_copy = dataframe_pop_by_race[(dataframe_pop_by_race["year"] >= 2015) & (dataframe_pop_by_race["year"] <= 2019)]

tot_fatal_shootings_by_race = dataframe.groupby(['year', 'race']).size().reset_index(name='count')

tot_fatal_shootings_by_race_pivot = tot_fatal_shootings_by_race.pivot(index='year', columns='race', values='count')
tot_fatal_shootings_by_race_pivot = tot_fatal_shootings_by_race_pivot[['B', 'W']].reset_index()
tot_fatal_shootings_by_race_pivot.columns = ['year', 'count_black', 'count_white']

merged_df = pd.merge(tot_fatal_shootings_by_race_pivot, dataframe_pop_by_race_copy, on=['year'])
filtered_df = merged_df[['count_black', 'count_white', 'year', 'Total Population', 'White Alone', 'Black Alone']]
filtered_df['shootings_per_million_black'] = (filtered_df['count_black'] / filtered_df['Black Alone']) * 1e6
filtered_df['shootings_per_million_white'] = (filtered_df['count_white'] / filtered_df['White Alone']) * 1e6
filtered_df['year'] = filtered_df['year'].astype(str)

fig = go.Figure()

fig.add_trace(go.Bar(
    y=filtered_df['year'],
    x=filtered_df['shootings_per_million_black'],
    name='Black',
    orientation='h',
    marker=dict(color="black"),
))
fig.add_trace(go.Bar(
    y=filtered_df['year'],
    x=filtered_df['shootings_per_million_white'],
    name='White',
    orientation='h',
    marker=dict(color="#FFBBB9"),
))


fig.update_layout(
    barmode='group',
    width=780,
    height=380,
    legend_title="Race",
    yaxis_title="Year",
    xaxis_title="Count per million people of the given race",
    title='Fatal Police Shootings per Million People (2015-2019), divided by race',
    bargap=0.4,
    plot_bgcolor='lightgray'
)

fig.show()


In [13]:

dataframe["year"] = dataframe["date"].str[:4].astype(int)

dataframe_pop_by_race_copy = dataframe_pop_by_race.copy()
dataframe_pop_by_race_copy = dataframe_pop_by_race_copy.groupby(['Year']).agg({
    'Total Population': 'sum',
    'White Alone': 'sum',
    'Black Alone': 'sum',
}).reset_index()

dataframe_pop_by_race_copy["year"] = dataframe_pop_by_race_copy["Year"]
dataframe_pop_by_race = dataframe_pop_by_race_copy[(dataframe_pop_by_race_copy["year"] >= 2015) & (dataframe_pop_by_race_copy["year"] <= 2019)]

tot_fatal_shootings_by_race = dataframe.groupby(['year', 'race']).size().reset_index(name='count')
#print(tot_fatal_shootings_by_race)

tot_fatal_shootings_by_race_pivot = tot_fatal_shootings_by_race.pivot(index='year', columns='race', values='count')
tot_fatal_shootings_by_race_pivot = tot_fatal_shootings_by_race_pivot[['B', 'W']].reset_index()
tot_fatal_shootings_by_race_pivot.columns = ['year', 'count_black', 'count_white']
#print(tot_fatal_shootings_by_race_pivot)


filtered_df = merged_df[['count_black', 'count_white', 'year']]
#print(filtered_df)

armed_df = dataframe[dataframe['armed_with'].isin(['gun', 'knife', 'blunt_object'])]
#print(armed_df)

armed_by_race = armed_df.groupby(['year', 'race']).size().reset_index(name='count_armed')
#print(armed_by_race)

armed_by_race_pivot = armed_by_race.pivot(index='year', columns='race', values='count_armed')
armed_by_race_pivot = armed_by_race_pivot[['B', 'W']].reset_index()
armed_by_race_pivot.columns = ['year', 'count_armed_black', 'count_armed_white']
#print(armed_by_race_pivot)

total_armed_incidents_by_year = armed_df.groupby('year').size().reset_index(name='total_armed_incidents')
total_armed_incidents_by_year = total_armed_incidents_by_year[(total_armed_incidents_by_year["year"] >= 2015) & (total_armed_incidents_by_year["year"] <= 2019)]
#print(total_armed_incidents_by_year)

filtered_df = pd.merge(filtered_df, armed_by_race_pivot, on=['year'], how='left')
filtered_df = pd.merge(filtered_df, total_armed_incidents_by_year, on=['year'])
filtered_df["white_percentage"] = filtered_df["count_armed_white"] / filtered_df["count_white"] * 100
filtered_df["black_percentage"] = filtered_df["count_armed_black"] / filtered_df["count_black"] * 100
#print(filtered_df)

filtered_df['year'] = filtered_df['year'].astype(str)

#only for the legend
filtered_df.rename(columns={'black_percentage': 'Black'}, inplace=True)
filtered_df.rename(columns={'white_percentage': 'White'}, inplace=True)

fig = px.bar(filtered_df, y='year',
             x=['Black', 'White'],
             color_discrete_sequence=['black', '#FFBBB9'],
             labels={'value': 'Percentage of Armed Incidents (armed accidents of a race / total accidents of that race)'},
             title='Percentage of Armed Incidents (Black and White) in Fatal Police Shootings (2015-2019)',
             width=800, height=350,
             category_orders={'year': list(reversed(filtered_df['year'].unique()))},
             )

fig.update_layout(barmode='group', legend_title="Race", yaxis_title="Year", bargap=0.4, plot_bgcolor='lightgray')
fig.show()


In [14]:

age_df = pd.DataFrame({
    "Age": df["age"],
    "Mental_Illness_Related": df["was_mental_illness_related"]
})

filtered_age_df = age_df.dropna(subset=['Age'])
#filtered_age_df = filtered_age_df[(filtered_age_df['Age'] >= 15) & (filtered_age_df['Age'] <= 89)]
count_by_age = filtered_age_df.groupby('Age')['Mental_Illness_Related'].value_counts(normalize=True).unstack(fill_value=0) * 100

fig = px.bar(count_by_age, x=count_by_age.index, y=True, labels={'True': 'Percentage of True'})
fig.update_layout(xaxis_title="Age", yaxis_title="Percentage of mental illness related cases",
                  title="Percentage of Mental Illness Related by Age", width=900, height=400)

fig.show()