Q1: How has the number of mass shootings evolved in the big US regions between two concrete years? For this, we need you to aggregate the data in the 5 regions (Southeast, Northeast, Midwest, Northwest, and Southwest), and let the user select the first and last year of the comparison. Same for states, both views coordinated.

Ideas: Slider for the beginning and end year (so double slider). Use a map to select the state and the subsequent region (use that map to show other things for it not to be a poor data to ink ratio). Use a line chart to show evolution as it is time series data. Two plots next to each other, as it does not make sense to compare region and state level data. Both lineplots should be coordinated (selecting a year should highlight the corresponding point in the other plot, with its corresponding tooltip).

LINEPLOT xxx STATEMAP

LINEPLOT xxx &LEGEND

Need to determine what is the default view, i.e. what to show when the user opens the page and has not selected a state/region.


In [1]:
import altair as alt
import pandas as pd

# Load the data
data_shootings = pd.read_csv('data/gun_violence_processed.csv')

state_data = pd.read_csv('data/state_data.csv').rename(columns={'state': 'State'})

# Add extra state information to the shootings data
data_shootings = data_shootings.merge(state_data, on='State')

data_shootings['Incident Date'] = pd.to_datetime(data_shootings['Incident Date'])
data_shootings['Year'] = data_shootings['Incident Date'].dt.year
data_shootings['Month'] = data_shootings['Incident Date'].dt.month

data_shootings.loc[:,'count'] = 1  # Add a count column for aggregation

# Add the missing states 


# group by state and month
shootings_df = data_shootings.groupby(['State', 'Year']).agg({
    'region': 'first',
    'Victims Killed': 'sum',
    'Victims Injured': 'sum',
    'Population_per_state_2023': 'first',
    'count': 'sum',
    'FIPS': 'first'
}).rename(columns={
    'region': 'Region',
    'Victims Killed': 'Total Victims Killed',
    'Victims Injured': 'Total Victims Injured',
    'Population_per_state_2023': 'Population',
    'count': 'Total Incidents'
}).reset_index()


region_df = shootings_df.groupby(['Region', 'Year']).agg({
    'Total Victims Killed': 'sum',
    'Total Victims Injured': 'sum',
    'Population': 'sum',
    'Total Incidents': 'sum'
}).reset_index()


state_df = shootings_df.groupby(['State']).agg({
    'Region': 'first',
    'Total Victims Killed': 'sum',
    'Total Victims Injured': 'sum',
    'Population': 'sum',
    'Total Incidents': 'sum',
    'FIPS': 'first'
}).reset_index()


state_df['StateFIPS'] = state_df['FIPS'].astype(str).str[:-3].astype(int)


state_df.head()

Unnamed: 0,State,Region,Total Victims Killed,Total Victims Injured,Population,Total Incidents,FIPS,StateFIPS
0,Alabama,Southeast,118,469,51084680,114,1039,1
1,Alaska,Northwest,6,18,2200218,5,2020,2
2,Arizona,Southwest,72,214,74313440,54,4013,4
3,Arkansas,Southeast,52,245,30677320,54,5069,5
4,California,Northwest,472,1638,389651930,420,6037,6


In [40]:
import altair as alt
from vega_datasets import data as vega_data

# ======== Dimensions ========
MAP_DIM = (600, 400)
REGION_LINE_CHART_DIM = (600, 200)
STATE_LINE_CHART_DIM = (600, 200)

# Load US states topojson
states = alt.topo_feature(vega_data.us_10m.url, feature='states')

# Calculate the number of shootings per 100,000 people
state_df['count_per_100k'] = state_df['Total Incidents'] / state_df['Population'] * 100_000

# Define a selection for the state
state_selection = alt.selection_point(fields=['State'], name='SelectState', empty='none')
region_selection = alt.selection_point(fields=['Region'], name='SelectRegion', empty='none')

# Create the choropleth map
state_chart = alt.Chart(states).mark_geoshape(
    stroke='white',
    strokeWidth=1
).encode(
    color=alt.Color(
        'count_per_100k:Q',
        scale=alt.Scale(scheme='reds'),
        title='Number of Mass Shootings per 100k',
        legend=alt.Legend(format='.2f')
    ),
    tooltip=['State:N', 'count_per_100k:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(state_df, 'StateFIPS', ['count_per_100k', 'State', 'Region'])
).add_params(
    state_selection, region_selection
).properties(
    width=MAP_DIM[0],
    height=MAP_DIM[1],
    title='Number of Mass Shootings per 100k'
).project(
    type='albersUsa'
)

# Dynamic line chart for the selected state
state_line_chart = alt.Chart(shootings_df).mark_line(point=True).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Total Incidents:Q', title='Total Incidents'),
    tooltip=['Year:O', 'Total Incidents:Q', 'Region:N']
).transform_filter(
    state_selection
).properties(
    width=STATE_LINE_CHART_DIM[0],
    height=STATE_LINE_CHART_DIM[1]
)

# Title text for the line chart, dynamically linked to the selection
title_text = alt.Chart(shootings_df).mark_text(
    align='center',
    fontSize=16,
    dy=20
).encode(
    text=alt.condition(
        state_selection,
        'State:N',  # Show the selected state's name
        alt.value('')  # Default text if no state is selected
    )
)

# Combine the dynamic title and line chart
state_line_chart_with_title = alt.vconcat(
    title_text,
    state_line_chart
).resolve_legend(
    color="independent"
)

# Dynamic line chart for the selected region
region_line_chart = alt.Chart(region_df).mark_line(point=True).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Total Incidents:Q', title='Total Incidents'),
    tooltip=['Year:O', 'Total Incidents:Q', 'Region:N']
).transform_filter(
    region_selection
).properties(
    width=REGION_LINE_CHART_DIM[0],
    height=REGION_LINE_CHART_DIM[1]
)

title_text_region = alt.Chart(region_df).mark_text(
    align='center',
    fontSize=16,
    dy=20
).encode(
    text=alt.condition(
        region_selection,
        'Region:N',  # Show the selected region's name
        alt.value('')  # Default text if no region is selected
    )
)

region_line_chart_with_title = alt.vconcat(
    title_text_region,
    region_line_chart
).resolve_legend(
    color="independent"
)

# Combine the charts into a single view
final_chart = alt.hconcat(
    state_chart,
    alt.vconcat(
        region_line_chart_with_title,
        state_line_chart_with_title
    )
).configure_title(fontSize=16)

final_chart
