In [1]:
import altair as alt
import pandas as pd
import numpy as np

In [2]:
years = range(2014, 2024)  # From 2014 to 2023
data_files = {year: pd.read_csv(f'data/mass_shooting_{year}.csv') for year in years}
combined_data = pd.concat(data_files.values(), ignore_index=True)
combined_data.sample(5)

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,Victims Killed,Victims Injured,Suspects Killed,Suspects Injured,Suspects Arrested,Operations
4552,2555326,"March 27, 2023",Wisconsin,Milwaukee,7225 N 76th St,0,5,0,0,0,
2159,1839462,"October 31, 2020",California,Anaheim,1210 S State College Blvd,0,4,0,0,0,
3243,1974943,"April 11, 2021",Kansas,Wichita,200 block of N Battin St,1,3,0,0,0,
821,591294,"June 28, 2016",Illinois,Chicago,1600 block of South Christiana,0,4,0,0,0,
2414,1725108,"July 5, 2020",South Carolina,Greenville,1701 White Horse Rd,2,8,0,0,1,


In [3]:
combined_data['date'] = pd.to_datetime(combined_data['Incident Date'])
combined_data['year'] = combined_data['date'].dt.year
combined_data['month'] = combined_data['date'].dt.month
combined_data['day'] = combined_data['date'].dt.day
combined_data['weekday'] = combined_data['date'].dt.day_name()
combined_data['weekday_num'] = combined_data['date'].dt.dayofweek
combined_data['month_year'] = combined_data['date'].dt.to_period('M')

combined_data.sample(5)


Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,Victims Killed,Victims Injured,Suspects Killed,Suspects Injured,Suspects Arrested,Operations,date,year,month,day,weekday,weekday_num,month_year
3605,2391779,"August 19, 2022",Illinois,Chicago,5200 block of S Green St,0,4,0,0,0,,2022-08-19,2022,8,19,Friday,4,2022-08
767,620247,"August 3, 2016",Georgia,Decatur,4110 Glenwood Road,1,3,0,0,0,,2016-08-03,2016,8,3,Wednesday,2,2016-08
489,350182,"May 31, 2015",Georgia,Conyers,4669 Bell Road SE,2,2,1,0,0,,2015-05-31,2015,5,31,Sunday,6,2015-05
472,357611,"June 14, 2015",Missouri,Saint Louis,1400 block of North Market Street,0,4,0,0,0,,2015-06-14,2015,6,14,Sunday,6,2015-06
2257,1787977,"September 6, 2020",Tennessee,Memphis,6555 Lost Oak Dr,0,4,0,0,2,,2020-09-06,2020,9,6,Sunday,6,2020-09


In [4]:
# Preparing data for plotting
monthly_data = combined_data.groupby('month_year').agg(
    incident_count=('Incident ID', 'count'),
    victims_killed=('Victims Killed', 'sum'),
    victims_injured=('Victims Injured', 'sum')
).reset_index()
monthly_data['month_year'] = monthly_data['month_year'].dt.to_timestamp()
# only show month and year
monthly_data['month_year'] = monthly_data['month_year'].dt.strftime('%Y-%m')

# Calculate maximum points
monthly_data['is_max_incident'] = monthly_data['incident_count'] == monthly_data['incident_count'].max()
monthly_data['is_max_victim'] = (monthly_data[['victims_killed', 'victims_injured']].sum(axis=1) ==
                                 monthly_data[['victims_killed', 'victims_injured']].sum(axis=1).max())
monthly_data.sample(5)

Unnamed: 0,month_year,incident_count,victims_killed,victims_injured,is_max_incident,is_max_victim
16,2015-05,35,35,146,False,False
2,2014-03,15,13,57,False,False
77,2020-06,94,66,402,True,False
93,2021-10,72,70,299,False,False
7,2014-08,36,28,147,False,False


In [5]:
darkblue = '#002664'
blue = '#7F92B1'
red = '#BB133E'
grey = '#D9D9D9'

In [12]:
# Create a selection interval for zooming
brush = alt.selection_interval(encodings=['x'])

# annotation of the Covid-19 pandemic period
annotation_left = pd.DataFrame({
    'month_year': ['2020-03', '2023-04'], 
    'incident_count': [10, 10],
    'label': ['Covid-19 Pandemic Period', '']
})

# annotation of the 2017 Las Vegas shooting
annotation_right = pd.DataFrame({
    'month_year': ['2017-10'],  # Dates for the annotations
    'incident_count': [500],  # Adjust y-axis positions for the annotations
    'label': ['2017 Las Vegas shooting']
})

# Create a custom annotation for the average incident count, which changes based on the selection
average_annotation = alt.Chart(monthly_data).transform_filter(
    brush
).transform_aggregate(
    avg_incident_count='mean(incident_count)',  # Calculate average
).transform_calculate(
    annotation_text='"Number of Incidents Per Month: " + format(datum.avg_incident_count, ".2f")'
).mark_text(
    align='center',
    fontSize=14,
    color='black'
).encode(
    x=alt.value(600),  # Fixed x-position for the annotation
    y=alt.value(30),    # Fixed y-position for the annotation
    text=alt.Text('annotation_text:N')
).properties(
    width=1200,
    height=350
)

# first chart: Incident count with range selection and highlighted max point
first_chart = alt.Chart(monthly_data).mark_line().encode(
    x=alt.X('month_year:T', title='Month-Year'),
    y=alt.Y('incident_count:Q', title='Incident Count'),
    color=alt.value(darkblue),
).add_params(
    brush
).properties(
    title=alt.TitleParams('Mass Shooting Incidents Count by Month-Year', fontSize=20),
    width=1200,
    height=350
) + alt.Chart(monthly_data).mark_point(size=100).encode(
    x=alt.X('month_year:T'),
    y=alt.Y('incident_count:Q'),
    color=alt.condition('datum.is_max_incident', alt.value(red), alt.value(darkblue)),
    fill=alt.condition('datum.is_max_incident', alt.value(red), alt.value(darkblue)),
    size=alt.condition('datum.is_max_incident', alt.value(200), alt.value(50)),
    tooltip=[
        alt.Tooltip('month_year:T', title='Month-Year', format='%m-%Y', ),
        alt.Tooltip('incident_count:Q', title='Incidents')
    ]
).add_params(
    brush
)+ alt.Chart(annotation_left).mark_text(
    align='left',
    dx=5,
    dy=-10,
    fontSize=16,
    color=red
).encode(
    x='month_year:T',
    y='incident_count:Q',
    text='label'
) + alt.Chart(annotation_left).mark_rule(color=red, strokeDash=[5, 5]).encode(
    x='month_year:T'
) + average_annotation

# second chart: Victims killed and injured controlled by brush selection
second_chart = alt.Chart(monthly_data).mark_line(point=True).encode(
    x=alt.X('month_year:T', title='Month-Year'),
    y=alt.Y('value:Q', title='Victim Count'),
    color=alt.Color(
        'variable:N',
        legend=alt.Legend(title="Victim Type", orient='top-left'),
        scale=alt.Scale(
            domain=['victims_killed', 'victims_injured'],
            range=[red, blue]
        )
    ),
     tooltip=[
        alt.Tooltip('month_year:T', title='Month-Year', format='%m-%Y'),
        alt.Tooltip('variable:N', title='Type'),
        alt.Tooltip('value:Q', title='Count')
    ]
).transform_fold(
    ['victims_killed', 'victims_injured'],
    as_=['variable', 'value']
).transform_filter(
    brush
).properties(
    title=alt.TitleParams('Victims Injured and Killed by Month-Year', fontSize=20),
    width=1200,
    height=350,
) + alt.Chart(annotation_right).mark_text(
    align='left',
    dx=5,
    dy=-10,
    fontSize=16,
    color=red
).encode(
    x='month_year:T',
    y='incident_count:Q',
    text='label'
) + alt.Chart(annotation_right).mark_rule(color=red, strokeDash=[5, 5]).encode(
    x='month_year:T'
)


# Combine the charts
time_chart = alt.vconcat(first_chart, second_chart).resolve_legend(color="independent", size="independent").configure_point(size=60)

# export the chart to a standalone HTML file
time_chart.save('Viz1.html')