In [35]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import dash
from dash import dcc, html, Dash, State
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.express as px
from dash.exceptions import PreventUpdate

mapbox_token = 'pk.eyJ1IjoiYW5kcmVhc29zdGVkIiwiYSI6ImNsbmJxMjFndDA4dm8ybXJrMzhia2NqdnoifQ.fXDNIJ1LelhA1ypNiaJE9w'

Data wrangling

In [40]:
df = pd.read_csv('Data/homicide_data.csv')
df['victim_full_name'] = df['victim_first'] +  ' ' + df['victim_last']

df = df.sort_values('city')

df['reported_date'] = pd.to_datetime(df['reported_date'], format='%Y%m%d')

# Extract month from date and convert to string
df['month'] = df['reported_date'].dt.strftime('%Y-%m')
df['quarter'] = df['reported_date'].dt.to_period('Q').dt.strftime('%Y-Q%q')

avg_city_coords = df.groupby('city').agg({'lat': 'mean', 'lon': 'mean'}).reset_index()\
                .rename(columns={"lat": "avg_lat", "lon": "avg_lon"})

df = pd.merge(df, avg_city_coords, on = 'city', how = 'left')

In [65]:
obs = df.groupby(['disposition', 'city']).size().reset_index(name='n_obs')

# Pivot the DataFrame to have 'disposition' as columns and 'city' as index
obs_pivot = obs.pivot(index='city', columns='disposition', values='n_obs')

# Calculate the proportion of 'Open/No arrest' over the sum of all dispositions
obs_pivot['closed_prop'] = obs_pivot['Open/No arrest'] / obs_pivot.sum(axis=1)

# Reset index if you want to have 'city' as a regular column
obs_pivot = obs_pivot.reset_index()

In [66]:
obs_pivot

disposition,city,Closed by arrest,Closed without arrest,Open/No arrest,closed_prop
0,Albuquerque,232.0,52.0,94.0,0.248677
1,Atlanta,600.0,58.0,315.0,0.323741
2,Baltimore,1002.0,152.0,1673.0,0.591793
3,Baton Rouge,228.0,16.0,180.0,0.424528
4,Birmingham,453.0,64.0,283.0,0.35375
5,Boston,304.0,,310.0,0.504886
6,Buffalo,202.0,8.0,311.0,0.596929
7,Charlotte,481.0,44.0,162.0,0.235808
8,Chicago,1462.0,387.0,3686.0,0.665944
9,Cincinnati,385.0,49.0,260.0,0.37464


Dash app

In [49]:
app = dash.Dash(__name__)

cityoptions = [{'label': 'Select all', 'value': 'all cities'}]+[{'label': city, 'value': city} for city in sorted(df['city'].unique())]
raceoptions = [{'label': 'Select all', 'value': 'all'}]+[{'label': r, 'value': r} for r in sorted(df['victim_race'].unique())]
sexoptions = [{'label': 'Select all', 'value': 'all'}]+[{'label': s, 'value': s} for s in sorted(df['victim_sex'].unique())]

# Define layout
app.layout = html.Div([
    html.H4(id='title-output',style={"margin-bottom":"5px","margin-top":"1px","font-size":"20px","text-align":"center"}),
    dcc.Dropdown(
        id='city_dropdown',
        options = cityoptions,
        value = 'all cities', #default
        #value = None,
        #placeholder = "Select city",
        clearable=False,
        style={"width": "200px", "display": "inline-block","margin-top":"0"}
    ),
        dcc.Dropdown(
        id='race_dropdown',
        options = raceoptions,
        value = 'all', #default
        #value = None,
        #placeholder = "Select race",
        clearable=False,
        style={"width": "200px", "display": "inline-block","margin-top":"0"}
    ),
        dcc.Dropdown(
        id='sex_dropdown',
        options = sexoptions,
        value = 'all', #default
        #value = None,
        #placeholder = "Select gender",
        clearable=False,
        style={"width": "200px", "display": "inline-block","margin-top":"0"}
    ),
    dcc.Graph(id = 'murder-map'),
    dcc.Graph(id = 'disp-colchart'),
    dcc.Graph(id = 'arrested-linechart'),
dcc.Store(id='selected-city', data = 'all cities')
])


def make_colchart(df):
    fig = go.Figure(data=go.Bar(x= ['Closed by arrest', 'Open/No arrest', 'Closed without arrest'], 
                                y=df["disposition"].value_counts(),
                    marker_color="Gold"))
    fig.update_layout(dragmode='zoom',width=500, height=290)
    fig.update_layout(margin={"r": 5, "t": 5, "l": 5, "b": 5}) 
    return fig


def make_linechart(df):
    if len(df) == 0:
        fig = px.scatter(title='No data available')
        return fig
    else:
        grouped = df.groupby(['quarter', 'disposition']).size().unstack(fill_value=0)

        # Calculate the proportion of closed cases by arrest
        grouped['closed_arrested'] = grouped['Closed by arrest'] / grouped.sum(axis=1)

        # Calculate the rolling average
        window = 4  # You can adjust the window size
        grouped['smoothed'] = grouped['closed_arrested'].rolling(window=window).mean()

        # Create a line chart
        fig = px.line(grouped, 
                    x=grouped.index, 
                    y='closed_arrested', 
                    title='Proportion of Closed Cases by Arrest Over Time')

        # Add smoothed line
        fig.add_scatter(x=grouped.index, y=grouped['smoothed'], mode='lines', name='Smoothed', line=dict(color='red'))

        return fig

def make_map2(df, city):
    fig = px.scatter_mapbox(
        df if city != 'all cities' else avg_city_coords,
        lat="lat" if city != 'all cities' else 'avg_lat',
        lon="lon" if city != 'all cities' else 'avg_lon',
        hover_name="victim_full_name" if city != 'all cities' else 'city',
        hover_data=["victim_race", "victim_age", "victim_sex", "reported_date"] if city != 'all cities' else None,
        color = "disposition"  if city != 'all cities' else None,
        height=700
    )
    #title = f'Murders in {city}' if city is not None else ''

    if city == 'all cities':
        fig.update_traces(marker={'size': 10})

    
    fig.update_layout(mapbox_style="light", 
                      mapbox_accesstoken=mapbox_token, 
                      #title = title,
                      #mapbox_center={"lat": center_lat, "lon": center_lon},  
                      #mapbox_zoom = 10,
                      autosize = True
                      )
    
    fig.update_layout(mapbox_zoom = 9 if city != 'all cities' else 3.9)

    fig.update_traces(uirevision='persist')
    
    fig.update_layout(margin={"r": 0, "l": 0, "b": 0})
    fig.update_layout(clickmode='event+select',
                      hovermode='closest')
    return fig

'''
def make_map(city):
    filtered_df = df[df['city'] == city] if city != 'all cities' else df.groupby('city').agg({'lat': 'mean', 'lon': 'mean'}).reset_index()


    fig = px.scatter_mapbox(
        filtered_df,
        lat="lat",
        lon="lon",
        hover_name="victim_full_name" if city != 'all cities' else 'city',
        hover_data=["victim_race", "victim_age", "victim_sex"] if city != 'all cities' else None,
        color = "disposition"  if city != 'all cities' else None,
        height=700
    )
    
    title = f'Murders in {city}' if city is not None else ''

    
    fig.update_layout(mapbox_style="light", 
                      mapbox_accesstoken=mapbox_token, 
                      title = title,
                      #mapbox_center={"lat": center_lat, "lon": center_lon},  
                      #mapbox_zoom = 10,
                      autosize = True
                      )
    
    fig.update_layout(mapbox_zoom = 9 if city != 'all cities' else 3.9)

    fig.update_traces(uirevision='persist')
    
    fig.update_layout(margin={"r": 0, "l": 0, "b": 0})
    fig.update_layout(clickmode='event+select',
                      hovermode='closest')
    return fig
'''

#helper function to filter data based on city, race, sex
def filter_df(selected_city, selected_race, selected_sex, df = df):
    all_cities_filter = selected_city == 'all cities'
    all_race_filter = selected_race == 'all'
    all_sex_filter = selected_sex == 'all'

    if all_cities_filter and all_race_filter and all_sex_filter:
        # Handle the case when all filters are 'all'
        filtered_df = df  # Include all data
    else:
        city_filter = df['city'] == selected_city if not all_cities_filter else True
        race_filter = df['victim_race'] == selected_race if not all_race_filter else True
        sex_filter = df['victim_sex'] == selected_sex if not all_sex_filter else True

        filtered_df = df[city_filter & race_filter & sex_filter]
    return filtered_df



@app.callback(
    [Output('murder-map', 'figure'),
    Output('disp-colchart', 'figure'),
    Output('arrested-linechart', 'figure')],
    
    [Input('city_dropdown', 'value'),
     Input('race_dropdown', 'value'),
     Input('sex_dropdown', 'value')
    ]
)
def update_figs_on_dropdowns(selected_city, selected_race, selected_sex):
    filtered_df = filter_df(selected_city, selected_race, selected_sex)

    fig_map = make_map2(filtered_df, selected_city)
    fig_col = make_colchart(filtered_df)
    fig_line = make_linechart(filtered_df)
    return fig_map, fig_col, fig_line


@app.callback(
    [Output('disp-colchart', 'figure', allow_duplicate=True),
     Output('arrested-linechart', 'figure', allow_duplicate = True)], 
    [Input('murder-map', 'selectedData')],
    [State('city_dropdown', 'value'), State('race_dropdown', 'value'), State('sex_dropdown', 'value')],
    prevent_initial_call = True
)
def update_figures_on_map_selection(selected_data, selected_city, selected_race, selected_sex):
    if selected_data:
        # Extract the selected points from the map
        selected_points = selected_data['points']

        # Get the victim_full_name(s) from the selected points
        selected_names = [point['hovertext'] for point in selected_points]

        # Filter the DataFrame based on selected names, city, race, and sex
        filtered_df = df[df['victim_full_name'].isin(selected_names)]
        filtered_df = filter_df(selected_city, selected_race, selected_sex, filtered_df)

        # Generate the column chart using the filtered DataFrame
        fig_col = make_colchart(filtered_df)
        fig_line = make_linechart(filtered_df)
        return fig_col, fig_line
    else:
        fig_col = make_colchart(filter_df(selected_city, selected_race, selected_sex))
        fig_line = make_linechart(filter_df(selected_city, selected_race, selected_sex))
        return fig_col, fig_line
    



@app.callback(
    Output('city_dropdown', 'value'),
    Input('selected-city', 'data'),
    prevent_initial_call = True
)
def update_city_dropdown(selected_city):
    return selected_city

@app.callback(
    Output('selected-city', 'data'),
    [Input('murder-map', 'clickData')],
    [State('city_dropdown', 'value')]
)
def update_on_click(click_data, selected_city):
    if selected_city == 'all cities':
        if click_data:
            selected_city = click_data['points'][0]['hovertext']
            return selected_city
    raise PreventUpdate

@app.callback(
    Output('title-output', 'children'),
    [Input('city_dropdown', 'value')]
)
def update_title(city):
    city_list = [city for city in df['city'].unique()]
    if city in city_list:
        return f'Murders in {city}' 
    else:
        return 'Murders in the US'


if __name__ == '__main__':
    app.run_server(debug=True, jupyter_mode = "tab", port = 8050)


Dash app running on http://127.0.0.1:8050/


<IPython.core.display.Javascript object>

Testing things

In [None]:
filtered_df = filter_df('Birmingham', 'Hispanic', 'all')
make_linechart(filtered_df)

In [56]:
obs = df.groupby(['victim_race', 'victim_age', 'victim_sex', 'disposition']).size().reset_index(name='n_obs')

total_obs = obs['n_obs'].sum()