In [150]:
import pandas as pd
import altair as alt
from vega_datasets import data

In [155]:
PATH_SHOOTINGS = 'datasets/MassShootingsComplete.csv'
PATH_COUNTY = '' # state, population, year
PATH_SCHOOLS = 'datasets/Schoolincidents.csv'
PATH_FIPS = 'datasets/county-fips.csv'
source = data.unemployment.url
counties = alt.topo_feature(data.us_10m.url, 'counties')
print(counties)
print(alt.LookupData(source, 'id', ['rate']))

UrlData({
  format: TopoDataFormat({
    feature: 'counties',
    type: 'topojson'
  }),
  url: 'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/us-10m.json'
})
LookupData({
  data: 'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/unemployment.tsv',
  fields: ['rate'],
  key: 'id'
})


In [143]:
df_schools = pd.read_csv(PATH_SCHOOLS)

In [144]:
data = {
    'City Or County': ['Los Angeles', 'Cook', 'Harris', 'Maricopa', 'San Diego'],
    'State': ['California', 'Illinois', 'Texas', 'Arizona', 'California'],
    'Year': [2020, 2020, 2020, 2020, 2020],
    'Population': [10039107, 5150233, 4713325, 4485414, 3338330]
}
fake_population = pd.DataFrame(data)
print(fake_population.head())

  City Or County       State  Year  Population
0    Los Angeles  California  2020    10039107
1           Cook    Illinois  2020     5150233
2         Harris       Texas  2020     4713325
3       Maricopa     Arizona  2020     4485414
4      San Diego  California  2020     3338330


In [145]:
# Replace Incident Date for Day Month Year

def split_date(df):
    df['Incident Date'] = pd.to_datetime(df['Incident Date'])
    df['Year'] = df['Incident Date'].dt.year
    df['Month'] = df['Incident Date'].dt.month
    df.drop(labels='Incident Date', axis=1, inplace=True)
    return df



### Mass shootings and ratios

In [146]:
df_shootings = pd.read_csv(PATH_SHOOTINGS) # id, year, county, state

df_shootings = split_date(df_shootings)

df_shootings = df_shootings.groupby(['City Or County', 'State', 'Year', 'Month']).size().reset_index(name='Shootings')

#df_county = pd.read_csv(PATH_COUNTY) # county, state, year, population
df_population = fake_population

# join population
df_shootings = pd.merge(df_shootings, df_population, on=['City Or County', 'State', 'Year'], how='left')

# compute ratio
df_shootings['Ratio'] = df_shootings['Shootings'] / df_shootings['Population']


print(df_shootings.tail())


     City Or County     State  Year  Month  Shootings  Population  Ratio
4309           Yuma   Arizona  2022      6          1         NaN    NaN
4310           Yuma   Arizona  2023      1          1         NaN    NaN
4311           Yuma   Arizona  2023      5          1         NaN    NaN
4312           Zion  Illinois  2020      8          1         NaN    NaN
4313           Zion  Illinois  2022     12          1         NaN    NaN


### Schools

In [147]:
df_schools = pd.read_csv(PATH_SCHOOLS)
df_schools = split_date(df_schools)
df_schools.drop(['Address', 'Business/Location Name'], axis=1, inplace=True)
df_schools = df_schools.groupby(['City Or County', 'State', 'Year', 'Month']).size().reset_index(name='School Incidents')
print(df_schools.loc[df_schools['School Incidents'] == max(df_schools['School Incidents'])])

    City Or County    State  Year  Month  School Incidents
750   Indianapolis  Indiana  2023      8                 5


### Merge shootings and schools

In [148]:
print(df_shootings.columns)
print(df_schools.columns)

df_complete = pd.merge(df_shootings, df_schools, on=['City Or County', 'State', 'Year', 'Month'], how='inner')
print(df_complete.columns)

Index(['City Or County', 'State', 'Year', 'Month', 'Shootings', 'Population',
       'Ratio'],
      dtype='object')
Index(['City Or County', 'State', 'Year', 'Month', 'School Incidents'], dtype='object')
Index(['City Or County', 'State', 'Year', 'Month', 'Shootings', 'Population',
       'Ratio', 'School Incidents'],
      dtype='object')


In [160]:
# q2
df_fips = pd.read_csv(PATH_FIPS)
df_fips.rename(mapper={'fips': 'FIPS', 'county_name':'County', 'state_name': 'State'}, axis=1, inplace=True)
print(df_fips.head())

   FIPS   County    State
0  1000  Alabama  Alabama
1  1001  Autauga  Alabama
2  1003  Baldwin  Alabama
3  1005  Barbour  Alabama
4  1007     Bibb  Alabama
