In [1]:
import altair as alt
import pandas as pd
import numpy as np

In [2]:
years = range(2014, 2024)  # From 2014 to 2023
data_files = {year: pd.read_csv(f'data/mass_shooting_{year}.csv') for year in years}
combined_data = pd.concat(data_files.values(), ignore_index=True)
combined_data.sample(5)

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,Victims Killed,Victims Injured,Suspects Killed,Suspects Injured,Suspects Arrested,Operations
156,152960,"June 28, 2014",California,Antioch,5500 Cedar Point Way,0,7,0,0,0,
4421,2611512,"May 29, 2023",Florida,Hollywood,1203 N Broadwalk,0,9,0,0,3,
429,376203,"July 17, 2015",Ohio,Cincinnati,3100 block of Sunshine Ave,0,4,0,0,0,
1162,875886,"June 25, 2017",Missouri,Saint Louis,5000 block of Thekla Ave,1,3,0,0,0,
1283,780036,"February 21, 2017",Ohio,Cleveland,16700 Lorain Ave,0,4,0,0,2,


In [3]:
combined_data['date'] = pd.to_datetime(combined_data['Incident Date'])
combined_data['year'] = combined_data['date'].dt.year
combined_data['month'] = combined_data['date'].dt.month
combined_data['day'] = combined_data['date'].dt.day
combined_data['weekday'] = combined_data['date'].dt.day_name()
combined_data['weekday_num'] = combined_data['date'].dt.dayofweek
combined_data['month_year'] = combined_data['date'].dt.to_period('M')

combined_data.sample(5)


Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,Victims Killed,Victims Injured,Suspects Killed,Suspects Injured,Suspects Arrested,Operations,date,year,month,day,weekday,weekday_num,month_year
423,378102,"July 18, 2015",California,San Diego,200 block of South Bancroft Street,0,4,0,0,0,,2015-07-18,2015,7,18,Saturday,5,2015-07
1786,1504030,"September 14, 2019",Missouri,Saint Louis,5200 block of Tennessee Ave,1,3,0,0,0,,2019-09-14,2019,9,14,Saturday,5,2019-09
2181,1828080,"October 18, 2020",Illinois,Chicago,200 block of N Sacramento Blvd,1,4,0,0,0,,2020-10-18,2020,10,18,Sunday,6,2020-10
917,542259,"April 16, 2016",Michigan,Detroit,9500 block of Oakland,1,4,0,0,1,,2016-04-16,2016,4,16,Saturday,5,2016-04
3050,2053563,"July 4, 2021",Illinois,Rantoul,400 block of S Maplewood Dr,0,5,0,0,0,,2021-07-04,2021,7,4,Sunday,6,2021-07


In [4]:
# Preparing data for plotting
monthly_data = combined_data.groupby('month_year').agg(
    incident_count=('Incident ID', 'count'),
    victims_killed=('Victims Killed', 'sum'),
    victims_injured=('Victims Injured', 'sum')
).reset_index()
monthly_data['month_year'] = monthly_data['month_year'].dt.to_timestamp()
# only show month and year
monthly_data['month_year'] = monthly_data['month_year'].dt.strftime('%Y-%m')

# Calculate maximum points
monthly_data['is_max_incident'] = monthly_data['incident_count'] == monthly_data['incident_count'].max()
monthly_data['is_max_victim'] = (monthly_data[['victims_killed', 'victims_injured']].sum(axis=1) ==
                                 monthly_data[['victims_killed', 'victims_injured']].sum(axis=1).max())
monthly_data.sample(5)

Unnamed: 0,month_year,incident_count,victims_killed,victims_injured,is_max_incident,is_max_victim
11,2014-12,21,24,73,False,False
65,2019-06,53,35,224,False,False
28,2016-05,29,23,117,False,False
102,2022-07,89,63,390,False,False
67,2019-08,40,66,212,False,False


In [5]:
darkblue = '#002664'
blue = '#7F92B1'
red = '#BB133E'
grey = '#D9D9D9'

In [6]:
# Create a selection interval for zooming
brush = alt.selection_interval(encodings=['x'])

# annotation of the Covid-19 pandemic period
annotation_left = pd.DataFrame({
    'month_year': ['2020-03', '2023-04'], 
    'incident_count': [10, 10],
    'label': ['Covid-19 Pandemic Period', '']
})

# annotation of the 2017 Las Vegas shooting
annotation_right = pd.DataFrame({
    'month_year': ['2017-10'],  # Dates for the annotations
    'incident_count': [500],  # Adjust y-axis positions for the annotations
    'label': ['2017 Las Vegas shooting']
})

# Create a custom annotation for the average incident count, which changes based on the selection
average_annotation = alt.Chart(monthly_data).transform_filter(
    brush
).transform_aggregate(
    avg_incident_count='mean(incident_count)',  # Calculate average
).transform_calculate(
    annotation_text='"Number of Incidents Per Month: " + format(datum.avg_incident_count, ".2f")'
).mark_text(
    align='center',
    fontSize=14,
    color='black'
).encode(
    x=alt.value(600),  # Fixed x-position for the annotation
    y=alt.value(30),    # Fixed y-position for the annotation
    text=alt.Text('annotation_text:N')
).properties(
    width=1200,
    height=350
)

# first chart: Incident count with range selection and highlighted max point
first_chart = alt.Chart(monthly_data).mark_line().encode(
    x=alt.X('month_year:T', title='Month-Year'),
    y=alt.Y('incident_count:Q', title='Incident Count'),
    color=alt.value(darkblue),
).add_params(
    brush
).properties(
    title=alt.TitleParams('Mass Shooting Incidents Count by Month-Year', fontSize=20),
    width=1200,
    height=350
) + alt.Chart(monthly_data).mark_point(size=100).encode(
    x=alt.X('month_year:T'),
    y=alt.Y('incident_count:Q'),
    color=alt.condition('datum.is_max_incident', alt.value(red), alt.value(darkblue)),
    fill=alt.condition('datum.is_max_incident', alt.value(red), alt.value(darkblue)),
    size=alt.condition('datum.is_max_incident', alt.value(200), alt.value(50)),
    tooltip=[
        alt.Tooltip('month_year:T', title='Month-Year', format='%m-%Y', ),
        alt.Tooltip('incident_count:Q', title='Incidents')
    ]
).add_params(
    brush
)+ alt.Chart(annotation_left).mark_text(
    align='left',
    dx=5,
    dy=-10,
    fontSize=16,
    color=red
).encode(
    x='month_year:T',
    y='incident_count:Q',
    text='label'
) + alt.Chart(annotation_left).mark_rule(color=red, strokeDash=[5, 5]).encode(
    x='month_year:T'
) + average_annotation

# second chart: Victims killed and injured controlled by brush selection
second_chart = alt.Chart(monthly_data).mark_line(point=True).encode(
    x=alt.X('month_year:T', title='Month-Year'),
    y=alt.Y('value:Q', title='Victim Count'),
    color=alt.Color(
        'variable:N',
        legend=alt.Legend(title="Victim Type", orient='top-left'),
        scale=alt.Scale(
            domain=['victims_killed', 'victims_injured'],
            range=[red, blue]
        )
    ),
     tooltip=[
        alt.Tooltip('month_year:T', title='Month-Year', format='%m-%Y'),
        alt.Tooltip('variable:N', title='Type'),
        alt.Tooltip('value:Q', title='Count')
    ]
).transform_fold(
    ['victims_killed', 'victims_injured'],
    as_=['variable', 'value']
).transform_filter(
    brush
).properties(
    title=alt.TitleParams('Victims Injured and Killed by Month-Year', fontSize=20),
    width=1200,
    height=350,
) + alt.Chart(annotation_right).mark_text(
    align='left',
    dx=5,
    dy=-10,
    fontSize=16,
    color=red
).encode(
    x='month_year:T',
    y='incident_count:Q',
    text='label'
) + alt.Chart(annotation_right).mark_rule(color=red, strokeDash=[5, 5]).encode(
    x='month_year:T'
)


# Combine the charts
time_chart = alt.vconcat(first_chart, second_chart).resolve_legend(color="independent", size="independent").configure_point(size=60)

# export the chart to a standalone HTML file
time_chart.save('mass_shooting_time_chart.html')