In [11]:
import altair as alt
import pandas as pd

# Load the data
data_shootings = pd.read_csv('data/gun_violence_processed.csv')

state_data = pd.read_csv('data/state_data.csv').rename(columns={'state': 'State'})

# Add extra state information to the shootings data
data_shootings = data_shootings.merge(state_data, on='State')

data_shootings['Incident Date'] = pd.to_datetime(data_shootings['Incident Date'])
data_shootings['Year'] = data_shootings['Incident Date'].dt.year
data_shootings['Month'] = data_shootings['Incident Date'].dt.month

data_shootings.loc[:,'count'] = 1  # Add a count column for aggregation


# group by state and month
state_df = data_shootings.groupby(['State', 'Year']).agg({
    'region': 'first',
    'Victims Killed': 'sum',
    'Victims Injured': 'sum',
    'Population_per_state_2023': 'first',
    'count': 'sum',
    'FIPS': 'first'
}).rename(columns={
    'region': 'Region',
    'Victims Killed': 'Total Victims Killed',
    'Victims Injured': 'Total Victims Injured',
    'Population_per_state_2023': 'Population',
    'count': 'Total Incidents'
}).reset_index()

# fill empty years with 0
all_years = state_df['Year'].unique()
all_states = state_df['State'].unique()

for state in all_states:
    pop = state_df[state_df['State'] == state]['Population'].values[0]
    region = state_df[state_df['State'] == state]['Region'].values[0]
    fips = state_df[state_df['State'] == state]['FIPS'].values[0]
    current_years = state_df[state_df['State'] == state]['Year'].values
    for year in all_years:
        if year not in current_years:
            state_df = pd.concat([state_df, pd.DataFrame([{
                'State': state,
                'Year': year,
                'Region': region,
                'Total Victims Killed': 0,
                'Total Victims Injured': 0,
                'Population': pop,
                'Total Incidents': 0,
                'FIPS': fips
            }])], ignore_index=True)
        
# dataset used for the choropleth map
state_statistics_df = state_df.groupby(['State']).agg({
    'Region': 'first',
    'Total Victims Killed': 'sum',
    'Total Victims Injured': 'sum',
    'Population': 'first',
    'Total Incidents': 'sum',
    'FIPS': 'first'
}).reset_index()


region_df = state_df.groupby(['Region', 'Year']).agg({
    'Total Victims Killed': 'sum',
    'Total Victims Injured': 'sum',
    'Population': 'sum',
    'Total Incidents': 'sum'
}).reset_index()

state_df["Type"] = "State"
region_df["Type"] = "Region"

state_df.rename(columns={"State": "StateOrRegion"}, inplace=True)
region_df.rename(columns={"Region": "StateOrRegion"}, inplace=True)


state_region_df = pd.concat([state_df, region_df])

# add average evolution in the US
us_df = state_df.groupby(['Year']).agg({
    'Total Victims Killed': 'sum',
    'Total Victims Injured': 'sum',
    'Population': 'sum',
    'Total Incidents': 'sum'
}).reset_index()

us_df['StateOrRegion'] = 'United States'
us_df['Type'] = 'Country'

state_region_df = pd.concat([state_region_df, us_df])

state_region_df['Count per 100k'] = state_region_df['Total Incidents'] / state_region_df['Population'] * 100_000


state_statistics_df['StateFIPS'] = state_statistics_df['FIPS'].astype(str).str[:-3].astype(int)

state_statistics_df['Count per 100k'] = state_statistics_df['Total Incidents'] / state_statistics_df['Population'] * 100_000


# TODO: drop useless columns
state_region_df

Unnamed: 0,StateOrRegion,Year,Region,Total Victims Killed,Total Victims Injured,Population,Total Incidents,FIPS,Type,Count per 100k
0,Alabama,2014,Southeast,1,9,5108468,2,1039.0,State,0.039151
1,Alabama,2015,Southeast,4,13,5108468,4,1097.0,State,0.078301
2,Alabama,2016,Southeast,15,53,5108468,15,1045.0,State,0.293630
3,Alabama,2017,Southeast,6,38,5108468,7,1109.0,State,0.137027
4,Alabama,2018,Southeast,11,53,5108468,14,1091.0,State,0.274055
...,...,...,...,...,...,...,...,...,...,...
5,United States,2019,,428,1690,334914895,414,,Country,0.123613
6,United States,2020,,495,2526,334914895,611,,Country,0.182434
7,United States,2021,,668,2784,334914895,689,,Country,0.205724
8,United States,2022,,642,2647,334914895,644,,Country,0.192288


In [None]:
import altair as alt
import pandas as pd

MAP_DIM = (500, 300)
LINE_CHART_DIM = (600, 400)

# Define the base year
base_year = all_years[0]

region_df.rename(columns={'StateOrRegion': 'Region'}, inplace=True)
region_df = region_df.sort_values(by=['Region', 'Year'])

# Calculate percentage change with respect to the base year
region_df['Percentage Change'] = region_df.groupby('Region')['Total Incidents'].apply(lambda x: (x - x.iloc[0]) / x.iloc[0] * 100).reset_index(level=0, drop=True)

# Create a selection slider for the year with ordinal values
year_select = alt.selection_point(fields=['Year'], value=[{'Year': all_years[1]}], name='YearSelect')

# Slope chart
slope_chart = alt.Chart(region_df).mark_line(point=True).encode(
    x='Year:O',
    y='Total Incidents:Q',
    color='Region:N',
    detail='Region:N'
).transform_filter(
    f"datum.Year == YearSelect.Year || datum.Year == {base_year}"
).properties(
    width=400,
    height=300,
    title='Slope Chart of Total Incidents by Region'
)

# Bar plot for percentage change
bar_plot = alt.Chart(region_df).mark_bar().encode(
    x='Region:N',
    y='Percentage Change:Q',
    color='Region:N'
).transform_filter(
    year_select
).properties(
    width=400,
    height=300,
    title='Percentage Change in Total Incidents by Region'
)

year_data = pd.DataFrame({
    'Year': [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023],
})

year_selector = alt.Chart(year_data).mark_rect().encode(
    x=alt.X('Year:O', title='', axis=None),
    opacity=alt.condition(year_select, alt.value(1.0), alt.value(0.6)),
).properties(
    width=MAP_DIM[0],
    height=15
).add_params(
    year_select
)

year_text = alt.Chart(year_data).mark_text(
    align='center',
    baseline='middle',
    dy=1,
    fontWeight='bold',
).encode(
    x=alt.X('Year:O'),
    text=alt.Text('Year:O')
)

# Combine the charts and add the slider
combined_chart = alt.hconcat(
    slope_chart, alt.vconcat(bar_plot,year_selector + year_text),
).resolve_scale(
    x='independent'
)

combined_chart