In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import dash
from dash import dcc, html, Dash, State, ctx
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.express as px
from dash.exceptions import PreventUpdate
import dash_bootstrap_components as dbc

from dash.exceptions import PreventUpdate


mapbox_token = 'pk.eyJ1IjoiYW5kcmVhc29zdGVkIiwiYSI6ImNsbmJxMjFndDA4dm8ybXJrMzhia2NqdnoifQ.fXDNIJ1LelhA1ypNiaJE9w'

Data wrangling

In [2]:
df = pd.read_csv('Data/homicide_data.csv')
citydata = pd.read_csv('Data/us-cities-top-1k.csv')
statedata = pd.read_csv('Data/states.csv')

In [3]:
citydata=pd.merge(citydata, statedata, on = "State", how = "left")
df['victim_full_name'] = df['victim_first'] +  ' ' + df['victim_last']

df = df.sort_values('city')

df['reported_date'] = pd.to_datetime(df['reported_date'], format='%Y%m%d')

# Extract month from date and convert to string
df['month'] = df['reported_date'].dt.strftime('%Y-%m')
df['quarter'] = df['reported_date'].dt.to_period('Q').dt.strftime('%Y-Q%q')

df['year'] = df['reported_date'].dt.strftime('%y')
df['month2'] = df['reported_date'].dt.strftime('%m')

#join on data about cities
df = pd.merge(df, citydata.rename(columns={"lat": "avg_lat", "lon": "avg_lon","Abbreviation":"state","State":"Long_state"}), on = ['city',"state"], how = 'left')


df['years_range'] = df.groupby('city')['reported_date'].transform(lambda x: (x.max() - x.min()).days / 365)
df['No_homocides_norm_pr1000'] = (df.groupby('city')['city'].transform('count'))/(df["Population"]*df['years_range'])*1000




avg_city_coords = df[['city','avg_lat','avg_lon','No_homocides_norm_pr1000','Population','state']].drop_duplicates().reset_index()
avg_city_coords = avg_city_coords.drop(49)

In [4]:
obs = df.groupby(['disposition', 'city']).size().reset_index(name='n_obs')

# Pivot the DataFrame to have 'disposition' as columns and 'city' as index
obs_pivot = obs.pivot(index='city', columns='disposition', values='n_obs')

# Calculate the proportion of 'Open/No arrest' over the sum of all dispositions
obs_pivot['open_prop'] = obs_pivot['Open/No arrest'] / obs_pivot.sum(axis=1)

# Reset index if you want to have 'city' as a regular column
obs_pivot = obs_pivot.reset_index()


city_df =pd.merge(avg_city_coords,obs_pivot[["city","open_prop"]],on="city",how="left")

Dash app

In [163]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

cityoptions = [{'label': 'Select all', 'value': 'all cities'}]+[{'label': city, 'value': city} for city in sorted(df['city'].unique())]
raceoptions = [{'label': 'Select all', 'value': 'all'}]+[{'label': r, 'value': r} for r in sorted(df['victim_race'].unique())]
sexoptions = [{'label': 'Select all', 'value': 'all'}]+[{'label': s, 'value': s} for s in sorted(df['victim_sex'].unique())]

# Define layout
# Define layout
app.layout = html.Div([
    html.H4(id='title-output',style={"margin-bottom":"5px","margin-top":"1px","font-size":"24px","text-align":"center"}),
    html.Hr(style={"margin":"0px","marginBottom":"4px"}),
    dcc.Dropdown(
        id='city_dropdown',
        options = cityoptions,
        value = 'all cities', #default
        #value = None,
        #placeholder = "Select city",
        clearable=False,
        style={"width": "200px", "display": "inline-block","margin-top":"0"}
    ),
        dcc.Dropdown(
        id='race_dropdown',
        options = raceoptions,
        value = 'all', #default
        #value = None,
        #placeholder = "Select race",
        clearable=False,
        style={"width": "200px", "display": "inline-block","margin-top":"0"}
    ),
        dcc.Dropdown(
        id='sex_dropdown',
        options = sexoptions,
        value = 'all', #default
        #value = None,
        #placeholder = "Select gender",
        clearable=False,
        style={"width": "200px", "display": "inline-block","margin-top":"0"}
    ),
    dbc.Card([dcc.Graph(id = 'murder-map',style={"height":"48vh"})]
             ,style={"width": "60%","height":"48vh"}),
    dbc.Card([dcc.Graph(id = 'disp-colchart',style={"height":"30vh"})],style={"width": "30%","height":"30vh","marginTop":"5px"}),
    dcc.Graph(id = 'time-spiral'),
dcc.Store(id='selected-city', data = 'all cities')
],
style={"marginLeft":"50px"})


def make_colchart(df):
       
    # Get value counts and reset index
    value_counts = df["disposition"].value_counts().reset_index().sort_values('disposition')
    
    fig = go.Figure(data=go.Bar(x=value_counts['disposition'], 
                                y=value_counts['count'],
                                marker_color="Gold"))
    #fig.update_layout(dragmode='zoom',width=500, height=290)
    fig.update_layout(margin={"r": 5, "t": 5, "l": 5, "b": 5}) 
    fig.update_layout(autosize = True)
    return fig


def make_linechart(df):
    if len(df) == 0:
        fig = px.scatter(title='No data available')
        return fig
    else:
        grouped = df.groupby(['quarter', 'disposition']).size().unstack(fill_value=0)

        # Calculate the proportion of closed cases by arrest
        grouped['open_no_arrest'] = grouped['Open/No arrest'] / grouped.sum(axis=1)

        # Calculate the rolling average
        window = 4  # You can adjust the window size
        grouped['smoothed'] = grouped['open_no_arrest'].rolling(window=window).mean()

        # Create a line chart
        fig = px.line(grouped, 
                    x=grouped.index, 
                    y='open_no_arrest', 
                    title='Proportion of open cases over Time')

        # Add smoothed line
        fig.add_scatter(x=grouped.index, y=grouped['smoothed'], mode='lines', name='Smoothed', line=dict(color='red'))

        return fig


def make_time_map(df):
    df_time = df.sort_values('reported_date')
    df_time['days_since_last_murder'] = df_time['reported_date'].diff().dt.days
    df_time['days_before_next_murder'] = df_time['reported_date'].diff(-1).dt.days*-1

    
    fig = px.scatter(df_time,
                 x = 'days_since_last_murder',
                 y = 'days_before_next_murder',
                 hover_data= ['victim_full_name'],
                 color = 'disposition',
                 opacity=0.5,
                 )
    fig.update_layout(height = 600, width = 600)


    ### all below is for making a title
    race_sel = df_time['victim_race'].unique()
    gender_sel =  df_time['victim_sex'].unique()
    city_sel = df_time['city'].unique()

    race_print = race_sel[0] if len(race_sel) == 1 else 'all races'
    gender_print = gender_sel[0] if len(gender_sel) == 1 else 'all genders'
    city_print = city_sel[0] if len(city_sel) == 1 else 'all cities'

    fig.update_layout(
        title = f'Time between murders in {city_print} for {gender_print} {race_print} victims'
    )

    
    return fig#, df_time

def make_spiral_plot(df):
    df['year'] = df['reported_date'].dt.strftime('%y')
    df['month2'] = df['reported_date'].dt.strftime('%m')
    df_summary = df.groupby(['year', 'month2']).size().reset_index(name = 'num_murders_year_month')#.sort_values(['year', 'month2'])
    
    ## håndterer fejl som ellers opstår ved valg af city med klik på stort kort
    if df_summary.empty:
        # Return a default or empty figure
        fig = px.bar_polar()
        return fig

    #quantiles = df_summary['num_murders_year_month'].agg('quantile', q = [0, 0.25, 0.50, 0.75, 1])
    q0, q0_25, q0_5, q0_75, q1 = np.percentile(df_summary['num_murders_year_month'], [0, 25, 50, 75, 100])


    all_years = df['year'].unique()
    all_months = df['month2'].unique()
    all_combinations = pd.DataFrame([(year, month) for year in all_years for month in all_months], columns=['year', 'month2']).sort_values(['year', 'month2'])

    # Merge with df_summary to include zeros for missing combinations
    df_summary = pd.merge(all_combinations, df_summary, on=['year', 'month2'], how='left').fillna(0)#replace(0, np.nan)
    df_summary['constant_value'] = 1


    df_summary['hovertemplate'] = df_summary.apply(lambda row: 'Year: %s<br>Month: %s<br>Murders: %d' % (row['year'], row['month2'], row['num_murders_year_month'])
                                                  if row['num_murders_year_month'] > 0 else '',
                                                  axis=1)

    fig = px.bar_polar(df_summary, 
                       r = 'constant_value', 
                       #r = 'year',
                       #hover_data=['year'],
                       #custom_data=['hovertext'],
                       custom_data=['year', 'month2', 'num_murders_year_month'], #inkluder evt year og month formateret rigtigt i df_summary 
                       theta='month2', 
                       color = 'num_murders_year_month',  
                       color_continuous_scale=[(0, 'rgba(0,0,0,0)'), #hide zero counts
                                               (0, '#ffffb2'),
                                               (q0/q1, '#fed976'), 
                                               (q0_25/q1, '#feb24c'),
                                               (q0_5/q1, '#fd8d3c'),
                                               (q0_75/q1, '#f03b20'),
                                               (1, '#bd0026')],
                       range_color = [q0, q1],
                       direction = 'counterclockwise',
                       category_orders={'month2': np.sort(df_summary['month2'].unique()),
                                        'year': np.sort(df_summary['year'].unique())}  # Specify the order of months
                   )
    fig.update_polars(hole = 0.25, radialaxis=dict(showticklabels=False, ticks='', linewidth=0)) 
    #fig.update_traces(hovertemplate='Year: %{customdata[0]}<br>Month: %{customdata[1]}<br>Murders: %{customdata[2]}') 
    fig.update_traces(hovertemplate=df_summary['hovertemplate'])
    fig.update_traces(hoverinfo = 'none')
    fig.update_layout(autosize = True)

    return fig




def make_map(df, city):
    fig = px.scatter_mapbox(
        df if city != 'all cities' else city_df,
        lat="lat" if city != 'all cities' else 'avg_lat',
        lon="lon" if city != 'all cities' else 'avg_lon',
        hover_name="victim_full_name" if city != 'all cities' else 'city',
        hover_data=["victim_race", "victim_age", "victim_sex", "reported_date"] if city != 'all cities' else None,
        color = "disposition"  if city != 'all cities' else "open_prop",
        color_continuous_scale="YlOrRd" if city=='all cities' else None, #"Rdbu_r" 
        labels={"open_prop": "% Open cases"},
        size='No_homocides_norm_pr1000' if city=='all cities' else None,
        center={"lat": 39.8283-2, "lon": -98.5795} if city=='all cities' else None,
        range_color=[min(city_df["open_prop"])-0.15,max(city_df["open_prop"])] if city=='all cities' else None,
        color_continuous_midpoint=0.5 if city=='all cities' else None,
        category_orders={"disposition": ['Closed by arrest', 'Closed without arrest', 'Open/No arrest']} if city != 'all cities' else None,
        color_discrete_sequence=['#636EFA', '#00CC96', '#EF553B']
    )

    
    fig.update_layout(mapbox_style="light", 
                      mapbox_accesstoken=mapbox_token, 
                      #title = title,
                      #mapbox_center={"lat": center_lat, "lon": center_lon},  
                      #mapbox_zoom = 10,
                      autosize = True
                      )
    
    
    
    fig.update_layout(mapbox_zoom = 9.5 if city != 'all cities' else 3.2)

    fig.update_traces(uirevision='persist')
    
    #fig.update_layout(margin={"r": 0, "l": 0, "b": 0})
    fig.update_layout(margin={"r": 0, "l": 15, "b": 5,"t":5})
    fig.update_layout(clickmode='event+select',
                      hovermode='closest')
    return fig



#helper function to filter data based on city, race, sex
def filter_df(selected_city, selected_race, selected_sex, df = df):
    all_cities_filter = selected_city == 'all cities'
    all_race_filter = selected_race == 'all'
    all_sex_filter = selected_sex == 'all'

    if all_cities_filter and all_race_filter and all_sex_filter:
        # Handle the case when all filters are 'all'
        filtered_df = df  # Include all data
    else:
        city_filter = df['city'] == selected_city if not all_cities_filter else True
        race_filter = df['victim_race'] == selected_race if not all_race_filter else True
        sex_filter = df['victim_sex'] == selected_sex if not all_sex_filter else True

        filtered_df = df[city_filter & race_filter & sex_filter]
    return filtered_df






@app.callback(
    [Output('murder-map', 'figure'),
    Output('disp-colchart', 'figure'),
    #Output('arrested-linechart', 'figure'),
    #Output('time-map', 'figure'),
    Output('time-spiral', 'figure')
    ],
    
    [Input('city_dropdown', 'value'),
     Input('race_dropdown', 'value'),
     Input('sex_dropdown', 'value')
    ]
)
def update_figs_on_dropdowns(selected_city, selected_race, selected_sex):
    filtered_df = filter_df(selected_city, selected_race, selected_sex)

    fig_map = make_map(filtered_df, selected_city)
    fig_col = make_colchart(filtered_df)
    #fig_line = make_linechart(filtered_df)
    #fig_time = make_time_map(filtered_df)
    fig_spiral = make_spiral_plot(filtered_df)
    #fig_map.update_layout(transition={'duration': 500, 'easing': 'cubic-in-out'})
    return fig_map, fig_col, fig_spiral #fig_time #fig_line


@app.callback(
    [Output('disp-colchart', 'figure', allow_duplicate=True),
     #Output('arrested-linechart', 'figure', allow_duplicate = True),
     #Output('time-map', 'figure', allow_duplicate = True),
     Output('time-spiral', 'figure', allow_duplicate = True)
     ], 
    [Input('murder-map', 'selectedData')],
    [State('city_dropdown', 'value'), State('race_dropdown', 'value'), State('sex_dropdown', 'value')],
    prevent_initial_call = True
)
def update_figures_on_map_selection(selected_data, selected_city, selected_race, selected_sex):
    if selected_data:
        # Extract the selected points from the map
        selected_points = selected_data['points']

        # Get the victim_full_name(s) from the selected points
        selected_names = [point['hovertext'] for point in selected_points]

        # Filter the DataFrame based on selected names, city, race, and sex
        filtered_df = df[df['victim_full_name'].isin(selected_names)]
        filtered_df = filter_df(selected_city, selected_race, selected_sex, filtered_df)

        # Generate the column chart using the filtered DataFrame
        fig_col = make_colchart(filtered_df)
        #fig_line = make_linechart(filtered_df)
        #fig_time = make_time_map(filtered_df)
        fig_spiral = make_spiral_plot(filtered_df)
        return fig_col, fig_spiral #fig_time #,fig_line
    else:
        fig_col = make_colchart(filter_df(selected_city, selected_race, selected_sex))
        #fig_line = make_linechart(filter_df(selected_city, selected_race, selected_sex))
        #fig_time = make_time_map(filter_df(selected_city, selected_race, selected_sex))
        fig_spiral = make_spiral_plot(filter_df(selected_city, selected_race, selected_sex))
        return fig_col, fig_spiral #fig_time#fig_line
    



@app.callback(
    Output('city_dropdown', 'value'),
    Input('selected-city', 'data'),
    prevent_initial_call = True
)
def update_city_dropdown(selected_city):
    return selected_city

@app.callback(
    Output('selected-city', 'data'),
    [Input('murder-map', 'clickData')],
    [State('city_dropdown', 'value')]
)
def update_on_click(click_data, selected_city):
    if selected_city == 'all cities':
        if click_data:
            selected_city = click_data['points'][0]['hovertext']
            return selected_city
    raise PreventUpdate


##### work in progress
@app.callback(
    [Output('murder-map', 'figure', allow_duplicate=True),
     Output('disp-colchart', 'figure', allow_duplicate = True)
     ], 
    [Input('time-spiral', 'selectedData')],
    [State('city_dropdown', 'value'), State('race_dropdown', 'value'), State('sex_dropdown', 'value')],
    prevent_initial_call = True
)
def update_figs_on_spiral_select(selected_data, selected_city, selected_race, selected_sex):
    if selected_data: 
        d = selected_data['points']

        years = np.unique([y for y,m,n in [c['customdata'] for c in d]]).tolist()
        months = np.unique([m for y,m,n in [c['customdata'] for c in d]]).tolist()
        

        filtered_df = df[df['month2'].isin(months) & df['year'].isin(years)]
        filtered_df = filter_df(selected_city, selected_race, selected_sex, filtered_df)

        fig_map = make_map(filtered_df, selected_city)
        fig_col = make_colchart(filtered_df)
        return fig_map, fig_col
    else:
        filtered_df = filter_df(selected_city, selected_race, selected_sex)

        fig_map = make_map(filtered_df, selected_city)
        fig_col = make_colchart(filtered_df)
        return fig_map, fig_col

#####


@app.callback(
    Output('title-output', 'children'),
    [Input('city_dropdown', 'value')]
)
def update_title(city):
    city_list = [city for city in df['city'].unique()]
    if city in city_list:
        return f'Murders in {city}' 
    else:
        return 'Murders in 50 large cities in the US'


if __name__ == '__main__':
    app.run_server(debug=True, jupyter_mode = "tab", port = 8050)


Dash app running on http://127.0.0.1:8050/


<IPython.core.display.Javascript object>

Testing things

Problem: På stort kort opdaterer mord per 1000 indbygger og prop open ikke ved filtrering på race og køn. 
 - Mord per 1000 indbygger er svært at håndtere, da vi ikke har population per befolkningsgruppe
 - Kan man nøjes med blot at opdatere prop closed baseret på filtrering?

In [9]:
def update_prop_open_df(selected_race, selected_sex, df = df):
    df_filtered = df.iloc[:, 0:13].query('victim_race == @selected_race and victim_sex == @selected_sex')
    citydata_new = citydata.drop('State', axis = 1).rename(columns={"lat": "avg_lat", "lon": "avg_lon", "Abbreviation":"state"})
    joined_df = pd.merge(df_filtered, citydata_new, on = ['city', 'state'])

    obs_disp = joined_df.groupby(['disposition', 'city']).size().reset_index(name='n_obs')

    # Pivot the DataFrame to have 'disposition' as columns and 'city' as index
    obs_disp_pivot = obs_disp.pivot(index='city', columns='disposition', values='n_obs')

    # Calculate the proportion of 'Open/No arrest' over the sum of all dispositions
    obs_disp_pivot['open_prop'] = obs_disp_pivot['Open/No arrest'] / obs_disp_pivot.sum(axis=1)

    # Reset index if you want to have 'city' as a regular column
    obs_disp_pivot = obs_disp_pivot.reset_index()

    return(obs_disp_pivot)

    

#### Num murders per year barchart

In [10]:
#df['year'] = df['reported_date'].dt.strftime('%y')

#df.groupby('year').size().reset_index(name = 'num_murders_year')

def make_colchart_counts(df):
    df['year'] = df['reported_date'].dt.strftime('%Y')
    year_counts = df.groupby('year').size().reset_index(name = 'num_murders_year')
    
    fig = go.Figure(data=go.Bar(x=year_counts['year'], 
                                y=year_counts['num_murders_year'],
                                marker_color="Gold"))
   
    #fig = px.line(year_counts, 
    #                x='year', 
    #                y='num_murders_year', 
    #                title='Murders per year')


    fig.update_layout(dragmode='zoom',width=500, height=290)
    fig.update_layout(margin={"r": 5, "t": 5, "l": 5, "b": 5}) 
    return fig




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [160]:
def make_spiral_plot2(df):
    df['year'] = df['reported_date'].dt.strftime('%y')
    df['month2'] = df['reported_date'].dt.strftime('%m')
    df_summary = df.groupby(['year', 'month2']).size().reset_index(name = 'num_murders_year_month')#.sort_values(['year', 'month2'])
    
    ## håndterer fejl som ellers opstår ved valg af city med klik på stort kort
    if df_summary.empty:
        # Return a default or empty figure
        fig = px.bar_polar()
        return fig

    #quantiles = df_summary['num_murders_year_month'].agg('quantile', q = [0, 0.25, 0.50, 0.75, 1])
    q0, q0_25, q0_5, q0_75, q1 = np.percentile(df_summary['num_murders_year_month'], [0, 25, 50, 75, 100])


    all_years = df['year'].unique()
    all_months = df['month2'].unique()
    all_combinations = pd.DataFrame([(year, month) for year in all_years for month in all_months], columns=['year', 'month2']).sort_values(['year', 'month2'])

    # Merge with df_summary to include zeros for missing combinations
    df_summary = pd.merge(all_combinations, df_summary, on=['year', 'month2'], how='left').fillna(0)#replace(0, np.nan)
    df_summary['constant_value'] = 1


    df_summary['hovertemplate'] = df_summary.apply(lambda row: 'Year: %s<br>Month: %s<br>Murders: %d' % (row['year'], row['month2'], row['num_murders_year_month'])
                                                  if row['num_murders_year_month'] > 0 else '',
                                                  axis=1)

    fig = px.bar_polar(df_summary, 
                       r = 'constant_value', 
                       #r = 'year',
                       #hover_data=['year'],
                       #custom_data=['hovertext'],
                       custom_data=['year', 'month2', 'num_murders_year_month'], #inkluder evt year og month formateret rigtigt i df_summary 
                       theta='month2', 
                       color = 'num_murders_year_month',  
                       color_continuous_scale=[(0, 'rgba(0,0,0,0)'), #hide zero counts
                                               (0, '#f0f921'),
                                               (q0/q1, '#fca636'), 
                                               (q0_25/q1, '#e16462'),
                                               (q0_5/q1, '#b12a90'),
                                               (q0_75/q1, '#6a00a8'),
                                               (1, '#0d0887')],
                       range_color = [q0, q1],
                       direction = 'counterclockwise',
                       category_orders={'month2': np.sort(df_summary['month2'].unique()),
                                        'year': np.sort(df_summary['year'].unique())}  # Specify the order of months
                   )
    fig.update_polars(hole = 0.25, radialaxis=dict(showticklabels=False, ticks='', linewidth=0)) 
    #fig.update_traces(hovertemplate='Year: %{customdata[0]}<br>Month: %{customdata[1]}<br>Murders: %{customdata[2]}') 
    fig.update_traces(hovertemplate=df_summary['hovertemplate'])
    fig.update_traces(hoverinfo = 'none')
    fig.update_layout(autosize = True)

    return fig, df_summary


In [161]:
fig, df_out = make_spiral_plot2(filter_df('Denver', 'all', 'all'))
fig



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

