In [8]:
import altair as alt
import pandas as pd
import numpy as np

In [9]:
years = range(2014, 2024)  # From 2014 to 2023
data_files = {year: pd.read_csv(f'data/mass_shooting_{year}.csv') for year in years}
combined_data = pd.concat(data_files.values(), ignore_index=True)
combined_data.sample(5)

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,Victims Killed,Victims Injured,Suspects Killed,Suspects Injured,Suspects Arrested,Operations
1349,1262814,"November 25, 2018",Alabama,Demopolis,Floyd St and Field St,2,2,0,0,0,
1487,1165299,"July 16, 2018",Pennsylvania,Philadelphia,2100 block of N 4th St,1,4,0,0,0,
3733,2344201,"June 29, 2022",Pennsylvania,Philadelphia,1100 block of E Price St,0,4,0,0,0,
4104,2742160,"October 29, 2023",Georgia,Atlanta,120 Piedmont Ave NE,1,3,0,0,0,
4565,2551678,"March 21, 2023",South Carolina,Sumter,190 Whitetail Cir,4,0,1,0,0,


In [10]:
combined_data['date'] = pd.to_datetime(combined_data['Incident Date'])
combined_data['year'] = combined_data['date'].dt.year
combined_data['month'] = combined_data['date'].dt.month
combined_data['day'] = combined_data['date'].dt.day
combined_data['weekday'] = combined_data['date'].dt.day_name()
combined_data['weekday_num'] = combined_data['date'].dt.dayofweek
combined_data['month_year'] = combined_data['date'].dt.to_period('M')

combined_data.sample(5)


Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,Victims Killed,Victims Injured,Suspects Killed,Suspects Injured,Suspects Arrested,Operations,date,year,month,day,weekday,weekday_num,month_year
4077,2760730,"November 20, 2023",Colorado,Westcliffe,173 Rocky Ridge Rd,3,1,0,0,1,,2023-11-20,2023,11,20,Monday,0,2023-11
1344,1272855,"December 9, 2018",Wisconsin,Madison,3554 E Washington Ave,0,4,0,0,2,,2018-12-09,2018,12,9,Sunday,6,2018-12
152,153275,"June 30, 2014",Kentucky,Lexington,Scottsdale Circle,1,3,0,0,1,,2014-06-30,2014,6,30,Monday,0,2014-06
2342,1757380,"August 6, 2020",California,Oakland,4500 block of Fairfax Ave,1,3,0,0,1,,2020-08-06,2020,8,6,Thursday,3,2020-08
2460,1717828,"June 26, 2020",Colorado,Aurora,698 Peoria St,0,5,0,0,0,,2020-06-26,2020,6,26,Friday,4,2020-06


In [11]:
# Preparing data for plotting
monthly_data = combined_data.groupby('month_year').agg(
    incident_count=('Incident ID', 'count'),
    victims_killed=('Victims Killed', 'sum'),
    victims_injured=('Victims Injured', 'sum')
).reset_index()
monthly_data['month_year'] = monthly_data['month_year'].dt.to_timestamp()
# only show month and year
monthly_data['month_year'] = monthly_data['month_year'].dt.strftime('%Y-%m')

# Calculate maximum points
monthly_data['is_max_incident'] = monthly_data['incident_count'] == monthly_data['incident_count'].max()
monthly_data['is_max_victim'] = (monthly_data[['victims_killed', 'victims_injured']].sum(axis=1) ==
                                 monthly_data[['victims_killed', 'victims_injured']].sum(axis=1).max())
monthly_data.sample(5)

Unnamed: 0,month_year,incident_count,victims_killed,victims_injured,is_max_incident,is_max_victim
6,2014-07,33,26,140,False,False
15,2015-04,19,16,71,False,False
93,2021-10,72,70,299,False,False
86,2021-03,45,65,176,False,False
105,2022-10,62,82,242,False,False


In [12]:
darkblue = '#002664'
blue = '#7F92B1'
red = '#BB133E'
grey = '#D9D9D9'

In [None]:
# Create a selection interval for zooming
brush = alt.selection_interval(encodings=['x'])

# annotation of the Covid-19 pandemic period
annotation_left = pd.DataFrame({
    'month_year': ['2020-03', '2023-06'], 
    'incident_count': [10, 10],
    'label': ['Covid-19 Pandemic Period', '']
})

# annotation of the 2017 Las Vegas shooting
annotation_right = pd.DataFrame({
    'month_year': ['2017-10'],  # Dates for the annotations
    'incident_count': [500],  # Adjust y-axis positions for the annotations
    'label': ['2017 Las Vegas shooting']
})

# Create a custom annotation for the average incident count, which changes based on the selection
average_annotation = alt.Chart(monthly_data).transform_filter(
    brush
).transform_aggregate(
    avg_incident_count='mean(incident_count)',  # Calculate average
).transform_calculate(
    annotation_text='"Number of Incidents Per Month: " + format(datum.avg_incident_count, ".2f")'
).mark_text(
    align='center',
    fontSize=14,
    color='black'
).encode(
    x=alt.value(600),  # Fixed x-position for the annotation
    y=alt.value(30),    # Fixed y-position for the annotation
    text=alt.Text('annotation_text:N')
).properties(
    width=1200,
    height=350
)

# first chart: Incident count with range selection and highlighted max point
first_chart = alt.Chart(monthly_data).mark_line().encode(
    x=alt.X('month_year:T', title='Month-Year'),
    y=alt.Y('incident_count:Q', title='Incident Count'),
    color=alt.value(darkblue),
).add_params(
    brush
).properties(
    title=alt.TitleParams('Mass Shooting Incidents Count by Month-Year', fontSize=20),
    width=1200,
    height=350
) + alt.Chart(monthly_data).mark_point(size=100).encode(
    x=alt.X('month_year:T'),
    y=alt.Y('incident_count:Q'),
    color=alt.condition('datum.is_max_incident', alt.value(red), alt.value(darkblue)),
    fill=alt.condition('datum.is_max_incident', alt.value(red), alt.value(darkblue)),
    size=alt.condition('datum.is_max_incident', alt.value(200), alt.value(50)),
    tooltip=[
        alt.Tooltip('month_year:T', title='Month-Year', format='%m-%Y', ),
        alt.Tooltip('incident_count:Q', title='Incidents')
    ]
).add_params(
    brush
)+ alt.Chart(annotation_left).mark_text(
    align='left',
    dx=5,
    dy=-10,
    fontSize=16,
    color=red
).encode(
    x='month_year:T',
    y='incident_count:Q',
    text='label'
) + alt.Chart(annotation_left).mark_rule(color=red, strokeDash=[5, 5]).encode(
    x='month_year:T'
) + average_annotation

# second chart: Victims killed and injured controlled by brush selection
second_chart = alt.Chart(monthly_data).mark_line(point=True).encode(
    x=alt.X('month_year:T', title='Month-Year'),
    y=alt.Y('value:Q', title='Victim Count'),
    color=alt.Color(
        'variable:N',
        legend=alt.Legend(title="Victim Type", orient='top-left', labelFontSize=14),
        scale=alt.Scale(
            domain=['victims_killed', 'victims_injured'],
            range=[red, blue]
        )
    ),
     tooltip=[
        alt.Tooltip('month_year:T', title='Month-Year', format='%m-%Y'),
        alt.Tooltip('variable:N', title='Type'),
        alt.Tooltip('value:Q', title='Count')
    ]
).transform_fold(
    ['victims_killed', 'victims_injured'],
    as_=['variable', 'value']
).transform_filter(
    brush
).properties(
    title=alt.TitleParams('Victims Injured and Killed by Month-Year', fontSize=20),
    width=1200,
    height=350,
) + alt.Chart(annotation_right).mark_text(
    align='left',
    dx=5,
    dy=-10,
    fontSize=16,
    color=red
).encode(
    x='month_year:T',
    y='incident_count:Q',
    text='label'
) + alt.Chart(annotation_right).mark_rule(color=red, strokeDash=[5, 5]).encode(
    x='month_year:T'
)


# Combine the charts
time_chart = alt.vconcat(first_chart, second_chart).resolve_legend(color="independent", size="independent").configure_point(size=60).configure_axis(
    labelFontSize=14,
    titleFontSize=14
).configure_title(
    fontSize=20
).configure_legend(
    titleFontSize=14,
    labelFontSize=14
)

time_chart.show()
# export the chart to a standalone HTML file
time_chart.save('Viz1.html')
