In [8]:
import pandas as pd
import matplotlib.pyplot as plt

In [9]:
shooters = pd.read_csv('Data/Stanford_MSA_Database.csv')

OK, so there's lots of things we can do with the dataset given that it's a full 335 rows with 55 columns of descriptive text data each. Let's start by coming up with some basic questions we want answered:
- What kinds of weapons are most popular, and where?
- Where are guns most readily accessible in the country?
- How do warning signs change with age and other factors?
- Does lethality change with age?
- Have shootings become, on the whole, more lethal over time?
- How do the number of fatalities compare to the total number of victims?
- Has the number of guns used in a shooting changed on average over time?
- What happens to the shooter, and how does that change with age and race? Does their fate change substantially in incidences of school shootings?
- How do the motivations of mass shooters differ with geography?
- Does military experience alter the motive?

In [10]:
shooters

Unnamed: 0,CaseID,Title,Location,City,State,Latitude,Longitude,Number of Civilian Fatalities,Number of Civilian Injured,Number of Enforcement Fatalities,...,Data Source 3,Data Source 4,Data Source 5,Data Source 6,Data Source 7,Military Experience,Class,Depreciation,Notes,Edit Date
0,1,University of Texas at Austin,"Austin, Texas",Austin,Texas,30.198887,-97.844159,15,32,1,...,http://news.google.com/newspapers?id=lkk0AAAAI...,http://news.google.com/newspapers?id=PPUjAAAAI...,http://books.google.com/books?id=ClYEAAAAMBAJ&...,,,Yes,SPK,1,,6/8/2016
1,2,Rose-Mar College of Beauty,"Mesa, Arizona",Mesa,Arizona,33.422687,-111.816320,5,1,0,...,http://www.nydailynews.com/news/crime/beauty-s...,http://books.google.com/books?id=Cre7qsswRiwC&...,,,,Unknown,MS,1,,6/8/2016
2,3,New Orleans Police Shootings,"New Orleans, Louisiana",New Orleans,Louisiana,30.068724,-89.931474,4,8,5,...,http://www.trutv.com/library/crime/notorious_m...,http://books.google.com/books?id=TfEDmROcZwEC&...,,,,Unknown,SPK,1,,6/22/2016
3,4,Clara Barton Elementary School,"Chicago, Illinois",Chicago,Illinois,41.839280,-87.688181,1,3,0,...,http://www.leagle.com/xmlResult.aspx?page=1&xm...,,,,,Unknown,MS,1,,6/21/2016
4,5,Olean High School,"Olean, New York",Olean,New York,42.081854,-78.432139,3,7,0,...,http://www.newswithviews.com/Erica/Carle10.htm,http://books.google.com/books?id=ZuKoSskEWyIC&...,"""Olean High School Shooting"". Larrie Benton Za...",,,Unknown,MS,1,,6/21/2016
5,6,Los Angeles Computer Learning Center,"Los Angeles, California",Los Angeles,California,34.176221,-118.539954,1,6,1,...,http://www.newspapers.com/newspage/15323499/,,,,,Unknown,MS,1,,6/22/2016
6,7,Cal State Fullerton,"Fullerton, California",Fullerton,California,33.884042,-117.927850,7,2,0,...,http://www.dailytitan.com/2011/11/csuf-massacr...,http://criminalminds.wikia.com/wiki/Edward_All...,http://www.dailytitan.com/2011/11/csuf-massacr...,,,Unknown,MS,1,,6/21/2016
7,8,Grover Cleveland Elementary School,"San Diego, California",San Diego,California,32.863573,-117.128163,2,8,0,...,http://www.examiner.com/article/the-san-diego-...,http://signofthetimes.yuku.com/topic/1258#.Uvb...,https://www.mail-archive.com/seeknfind@ashlist...,,,Unknown,MS,1,,6/22/2016
8,9,University of South Carolina,"Columbia, South Carolina",Columbia,South Carolina,34.050988,-80.820775,2,5,0,...,http://news.google.com/newspapers?nid=1891&dat...,http://news.google.com/newspapers?nid=1338&dat...,,,,Unknown,MS,1,,6/21/2016
9,10,Valley High School,"Las Vegas, Nevada",Las Vegas,Nevada,36.189319,-115.326487,1,2,0,...,http://www.leagle.com/xmlResult.aspx?page=1&xm...,http://news.google.com/newspapers?nid=1345&dat...,http://www.lasvegassun.com/news/1999/apr/23/sl...,,,Unknown,MS,1,,6/21/2016


In [11]:
len(shooters)

335

In [12]:
list(shooters)

['CaseID',
 'Title',
 'Location',
 'City',
 'State',
 'Latitude',
 'Longitude',
 'Number of Civilian Fatalities',
 'Number of Civilian Injured',
 'Number of Enforcement Fatalities',
 'Number of Enforcement Injured',
 'Total Number of Fatalities',
 'Total Number of Victims',
 'Description',
 'Date',
 'Day of Week',
 'Date - Detailed',
 'Shooter Name',
 'Number of shooters',
 'Shooter Age(s)',
 'Average Shooter Age',
 'Shooter Sex',
 'Shooter Race',
 'Type of Gun - Detailed',
 'Type of Gun - General',
 'Number of Shotguns',
 'Number of Rifles',
 'Number of Handguns',
 'Total Number of Guns',
 'Number of Automatic Guns',
 'Number of Semi-Automatic Guns',
 'Fate of Shooter at the scene',
 'Fate of Shooter',
 "Shooter's Cause of Death",
 'School Related',
 'Place Type',
 'Relationship to Incident Location',
 'Targeted Victim/s - Detailed',
 'Targeted Victim/s - General',
 'Possible Motive - Detailed',
 'Possible Motive - General',
 'History of Mental Illness - Detailed',
 'History of Mental

I'm curious what's going to happen if I run a market basket analysis. Let's clean up the data a little more and see what happens. I'm working off of the following tutorial:
http://pbpython.com/market-basket-analysis.html

In [13]:
cleaned_shooters = shooters.copy(deep=True)

#Clean gun types
cleaned_shooters['Type of Gun - General'] = cleaned_shooters['Type of Gun - General'].str.strip()
cleaned_shooters['Type of Gun - General'] = cleaned_shooters['Type of Gun - General'].str.lower()
cleaned_shooters['Type of Gun - General'].unique()

array(['multiple guns', 'handgun', 'shotgun', 'rifle', 'unknown',
       'semi-automatic rifle', '9-mm'], dtype=object)

In [14]:
#Clean gun counts
guncount_types = ['Number of Shotguns','Number of Handguns','Number of Rifles']
for guncount_type in guncount_types:
    cleaned_shooters[guncount_type].replace('Unknown', 0, inplace=True)

In [15]:
#Clean military experience
cleaned_shooters['Military Experience'] = cleaned_shooters['Military Experience'].str.lower()
cleaned_shooters['Military Experience'].replace('yes', True, inplace=True)
cleaned_shooters['Military Experience'].replace(['no', 'unknown'], False, inplace=True)

In [16]:
#Set civilian + law enforcement casualties, injuries to bools
cleaned_shooters['Number of Civilian Fatalities'] = cleaned_shooters['Number of Civilian Fatalities'] > 0
cleaned_shooters['Number of Civilian Injured'] = cleaned_shooters['Number of Civilian Injured'] > 0
cleaned_shooters['Number of Enforcement Fatalities'] = cleaned_shooters['Number of Enforcement Fatalities'] > 0
cleaned_shooters['Number of Enforcement Injured'] = cleaned_shooters['Number of Enforcement Injured'] > 0
cleaned_shooters['High Civilian Casualties'] = (shooters['Number of Civilian Fatalities'] + shooters['Number of Civilian Injured']) > 10

In [17]:
#Corner cases (from errors)
cleaned_shooters['Number of Shotguns'].replace('Handgun', 0, inplace=True)
cleaned_shooters['Number of Rifles'].replace('0 (1)', 0, inplace=True)
cleaned_shooters['Number of Handguns'].replace('2 (1)', 0, inplace=True)

In [18]:
#Set gun types to bools
cleaned_shooters['Number of Shotguns'] = pd.to_numeric(cleaned_shooters['Number of Shotguns']) > 0
cleaned_shooters['Number of Rifles'] = pd.to_numeric(cleaned_shooters['Number of Rifles']) > 0
cleaned_shooters['Number of Handguns'] = pd.to_numeric(cleaned_shooters['Number of Handguns']) > 0

In [19]:
#Clean fate of shooter results
cleaned_shooters['Fate of Shooter'].replace('Custody / Escaped', 'Custody/Escaped', inplace=True)

In [20]:
len(cleaned_shooters.loc[
    (cleaned_shooters['Fate of Shooter'].isnull()) | 
    (cleaned_shooters['Fate of Shooter'] == 'FALSE')
])

3

In [21]:
#Drop the three where we don't know the fate of the shooter
cleaned_shooters = cleaned_shooters.loc[
    (~cleaned_shooters['Fate of Shooter'].isnull()) & 
    ~(cleaned_shooters['Fate of Shooter'] == 'FALSE')
]

In [22]:
#Change 'Unknowns' to 'No' for history of mental illness (it's not a great assumption, but a necessary one)
cleaned_shooters['History of Mental Illness - General'].replace('Unknown', 'No', inplace=True)

#Change to True/False scheme
cleaned_shooters['History of Mental Illness - General'].replace('Yes', True, inplace=True)
cleaned_shooters['History of Mental Illness - General'].replace('No', False, inplace=True)

In [23]:
shooters['Place Type'].str.lower().unique()

array(['college/university/adult education', 'government facility',
       'primary school', 'secondary school',
       'retail/wholesale/services facility', 'entertainment venue',
       'restaurant/cafe?', 'company/factory/office',
       'retail/ wholesale/services facility\nand primary school',
       'public transportation', 'residential home/neighborhood',
       'military facility', 'street/highway', 'place of worship',
       'park/wildness', 'retail/ wholesale/services facility',
       'residential home/neighborhood \nand street/highway',
       'medical/care',
       'retail/wholesale/services facility\n/residential home/neighborhood',
       'residential home/neighborhood,\nretail/ wholesale/services facility',
       'restaurant/cafeé', 'restaurant/cafe', 'park/wilderness',
       'unknown', 'residential home'], dtype=object)

In [24]:
len(cleaned_shooters.loc[cleaned_shooters['Place Type'] == 'company/factory/office'])

0

In [25]:
#Cleaning/merging certain values so our data is simplified
cleaned_shooters['Place Type'] = shooters['Place Type'].str.lower()

In [26]:
#We'll iterate over our desired features and flatten them s.t.
#each unique value has a column, and if the incident has that value
#we'll just assign the matching column a 1 and the other ones a 0
marketbasket = cleaned_shooters[[
    'Number of Civilian Fatalities',
    'Number of Civilian Injured',
    'Number of Enforcement Fatalities',
    'Number of Enforcement Injured',
    'Military Experience',
    'Number of Shotguns',
    'Number of Rifles',
    'Number of Handguns',
    'High Civilian Casualties',
    'History of Mental Illness - General'
]]


#Get the logical inverses as well
marketbasket['No Civilian Fatalities'] = ~marketbasket['Number of Civilian Fatalities']
marketbasket['No Civilian Injured'] = ~marketbasket['Number of Civilian Injured']
marketbasket['No Enforcement Fatalities'] = ~marketbasket['Number of Enforcement Fatalities']
marketbasket['No Enforcement Injured'] = ~marketbasket['Number of Enforcement Injured']
marketbasket['No History of Mental Illness - General'] = ~marketbasket['History of Mental Illness - General']


##TODO: Need to finish generating the rest of the dataframe by expanding out unique values across columns
desired_features = [
    'Fate of Shooter',
    'Fate of Shooter at the scene',
    'Possible Motive - General'
]

for feature in desired_features:
    for val in cleaned_shooters[feature].unique():
        marketbasket[feature + ' ' + str(val)] = (cleaned_shooters[feature] == val)

marketbasket

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

Unnamed: 0,Number of Civilian Fatalities,Number of Civilian Injured,Number of Enforcement Fatalities,Number of Enforcement Injured,Military Experience,Number of Shotguns,Number of Rifles,Number of Handguns,High Civilian Casualties,History of Mental Illness - General,...,Possible Motive - General Drug use,Possible Motive - General Drug use/Financial difficulties,Possible Motive - General Gender,Possible Motive - General Financial difficulties,"Possible Motive - General Drug use, Robbery",Possible Motive - General Domestic Dispute,Possible Motive - General Social Dispute,Possible Motive - General nan,Possible Motive - General social Dispute,Possible Motive - General Terminated
0,True,True,True,False,True,True,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
1,True,True,False,False,False,False,False,True,False,True,...,False,False,False,False,False,False,False,False,False,False
2,True,True,True,True,False,False,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
3,True,True,False,False,False,False,False,True,False,True,...,False,False,False,False,False,False,False,False,False,False
4,True,True,False,False,False,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,True,True,True,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
6,True,True,False,False,False,False,True,False,False,True,...,False,False,False,False,False,False,False,False,False,False
7,True,True,False,True,False,False,True,False,False,True,...,False,False,False,False,False,False,False,False,False,False
8,True,True,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
9,True,True,False,False,False,False,False,True,False,True,...,False,False,False,False,False,False,False,False,False,False


In [27]:
#Apply apriori market basket algorithm
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
apriori_res = apriori(marketbasket, min_support=0.05, use_colnames=True)

In [28]:
#Create display copy of apriori results
apriori_res_display = apriori_res.copy(deep=True)
apriori_res_display['itemsets'] = apriori_res_display['itemsets'].apply(lambda x: ''.join(e + ', ' for e in x))

In [29]:
pd.options.display.max_colwidth = 300
apriori_res_display.sort_values('support')

Unnamed: 0,support,itemsets
958,0.051205,"Number of Civilian Fatalities, No Enforcement Injured, No History of Mental Illness - General, Possible Motive - General Domestic dispute,"
941,0.051205,"Number of Civilian Fatalities, No Enforcement Fatalities, Fate of Shooter Deceased, Possible Motive - General Domestic dispute,"
908,0.051205,"Number of Civilian Fatalities, No Civilian Injured, No Enforcement Injured, Fate of Shooter at the scene Custody,"
1849,0.051205,"Number of Civilian Injured, No Enforcement Fatalities, No Enforcement Injured, Fate of Shooter at the scene Deceased, Possible Motive - General Unknown,"
1860,0.051205,"Number of Civilian Injured, No Enforcement Fatalities, Fate of Shooter Deceased, Fate of Shooter at the scene Deceased, Possible Motive - General Multiple motives,"
878,0.051205,"Number of Civilian Fatalities, High Civilian Casualties, No Enforcement Injured, No History of Mental Illness - General,"
1870,0.051205,"Number of Civilian Injured, No Enforcement Injured, Fate of Shooter Deceased, Fate of Shooter at the scene Deceased, Possible Motive - General Unknown,"
1871,0.051205,"Number of Civilian Injured, No Enforcement Injured, Fate of Shooter Custody, Fate of Shooter at the scene Custody, Possible Motive - General Social dispute,"
858,0.051205,"Number of Civilian Fatalities, Number of Handguns, No Enforcement Injured, Possible Motive - General Mental illness,"
852,0.051205,"Number of Civilian Fatalities, Number of Handguns, No Enforcement Fatalities, Possible Motive - General Domestic dispute,"


In [30]:
#Display full dataset for apriori results, limited to itemsets of <=3 elements
max_rows_orig = pd.options.display.max_rows
pd.options.display.max_rows = 1000
apriori_res.loc[[len(itemset) <= 3 for itemset in apriori_res['itemsets']]]

Unnamed: 0,support,itemsets
0,0.834337,[Number of Civilian Fatalities]
1,0.807229,[Number of Civilian Injured]
2,0.063253,[Number of Enforcement Injured]
3,0.111446,[Military Experience]
4,0.138554,[Number of Shotguns]
5,0.23494,[Number of Rifles]
6,0.575301,[Number of Handguns]
7,0.153614,[High Civilian Casualties]
8,0.289157,[History of Mental Illness - General]
9,0.165663,[No Civilian Fatalities]


In [31]:
#Reset number of rows that can be shown
pd.options.display.max_rows = max_rows_orig

Great, all of our data is wrapped up in the marketbasket and we have apriori support values for all potential combinations of different variables down to the case where 5% of incidents exhibited a given combination. 

I'm going to start by drilling down into data involving members of the military. It's an interesting twist that someone who would fight to defend the country would turn their gun on the general population. We'll see what shakes out here.

In [32]:
#How many incidents had a shooter w/ military experience?
len(marketbasket.loc[marketbasket['Military Experience'] == True])

37

In [33]:
#Select market basket results with 'Military Experience'
military_apriori = apriori_res.loc[['Military Experience' in elem for elem in apriori_res['itemsets']]]
military_apriori

Unnamed: 0,support,itemsets
3,0.111446,[Military Experience]
27,0.108434,"[Number of Civilian Fatalities, Military Experience]"
48,0.081325,"[Number of Civilian Injured, Military Experience]"
69,0.075301,"[Military Experience, Number of Handguns]"
70,0.099398,"[Military Experience, No Enforcement Fatalities]"
71,0.093373,"[Military Experience, No Enforcement Injured]"
72,0.063253,"[Military Experience, No History of Mental Illness - General]"
73,0.075301,"[Military Experience, Fate of Shooter Deceased]"
74,0.072289,"[Military Experience, Fate of Shooter at the scene Deceased]"
197,0.078313,"[Number of Civilian Fatalities, Number of Civilian Injured, Military Experience]"


In [34]:
#Number of cases where the shooter had military experience and died on the scene
len(marketbasket.loc[(marketbasket['Military Experience'] == True) & (marketbasket['Fate of Shooter at the scene Deceased'] == True)])

24

In [42]:
#How often do shooters w/o military experience die on the scene?
deceased_no_military_experience = marketbasket.loc[
    (marketbasket['Fate of Shooter at the scene Deceased'] == True) &
    (marketbasket['Military Experience'] == False)
]
no_military_experience = marketbasket.loc[marketbasket['Military Experience'] == False]
float(len(deceased_no_military_experience)) / float(len(no_military_experience))

0.39661016949152544

In [36]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [43]:
%%R
#Test for statistical significance
cm1 <- matrix(c(24, 117, 13, 178),
                nrow = 2,
                dimnames = list(c("Military", "Not Military"), c("Deceased", "Not Deceased"))) 
addmargins(as.matrix(cm1), c(1,2))
fisher.test(cm1)

From the command line:


Fisher's Exact Test for Count Data<br/>
data:  cm1<br/>
**p-value = 0.004469**<br/>
alternative hypothesis: true odds ratio is not equal to 1<br/>
95 percent confidence interval:<br/>
 1.309101 6.243371<br/>
sample estimates:<br/>
odds ratio<br/>
  2.799742<br/>


**In other words, our observed difference is very statistically significant.**

Here's a breakdown of the results from my military experience analysis:

- 37 of the 335 incidents in question involved a shooter with military experience (~11%).
    - According to an analysis by FiveThirtyEight, it's estimated that 7.3% of all living Americans served in the armed forces.
    - I'm going to chalk up that gap in percentage of shooters relative to the overall population to small sample size. There's no way to know exactly how many mass shootings have occurred since 1960, but if we had all of them then I'd guess that this gap would shrink.
    
- **In 64.8% of incidents where the shooter had military experience, they died on the scene**
    - That's a huge uptick from the general case: 43.4% of all shooters die on the scene
    - 39.7% of shooters with no military experience die on the scene
    - According to the Fisher test, our p-value for that difference is .004469; *very* statistically significant.
    - Military experience seems to be a large contributing factor in why someone would die taking a final stand.
    - In 79% of these cases, no officers were injured or killed. How did these guys provoke the cops to the point where they were killed on the spot without actually injuring or killing any of them?
    
- 81% of cases w/ military experience result in officer injury or death
    - General case: 91.6% of cases result in no enforcement injuries or deaths
    - But again, a 10% shift in this subset is a matter of 3-4 cases, probably not enough to go off of
    - In any event, it's surprising that a mass shooter with military experience wouldn't target law enforcement if their actions are in any way related to an institutional resentment
        - But then again I should probably drill down into that; what were the motives for the military guys?

In [None]:
num_officers_not_targeted = len(shooters.loc[(shooters['Number of Enforcement Fatalities'] == 0) & (shooters['Number of Enforcement Injured'] == 0)])
print("Officers were injured or killed ", float(len(shooters) - num_officers_not_targeted) / float (len(shooters)), " of the time.")

Interesting. Nearly every example of a shooting where they had military experience resulted in no officer casualties. That being said, it looks roughly proportional to the number of shooters without military experience that did/didn't target law enforcement. In other words, military experience doesn't appear to make a significant difference in who they choose to target.

In [None]:
from builtins import any as b_any

#Get motive results, remove ones where we don't know their motive or there are multiple ill-defined motives
motive_apriori = apriori_res.loc[[b_any('Possible Motive - General' in e for e in elem) for elem in apriori_res['itemsets']]]
# motive_apriori = motive_apriori[['Possible Motive - General Unknown' not in elem for elem in motive_apriori['itemsets']]]
# motive_apriori = motive_apriori[['Possible Motive - General Multiple motives' not in elem for elem in motive_apriori['itemsets']]]
motive_apriori
# motive_apriori.sort_values('support')

The market basket analysis gives us the following insights:
- Law enforcement is injured in just ~6.3% of mass shootings; officers die in 3.9% of incidents.
- Officers were neither injured nor killed in 307 out of 335 incidents (91.6%).
    - Is that discrepancy because law enforcement is adequately trained for these kinds of situations?
    - Or is it because the shooters aren't actively targeting law enforcement?
    - I'm guessing the latter, because the analysis points out that a civilian dies in 83.6% of shootings; if they were targeting law enforcement too, they'd probably have a higher 'success' rate
- 11% of the shooters had military experience
- There's a papertrail for mental illness in only 28.9% of cases

In [None]:
#Group the dataset by year
shooters['Date'] = pd.to_datetime(shooters['Date'])
dategroups = shooters.groupby(shooters.Date.dt.year)


#Iterate over groups, count each gun type
for name, grouped in dategroups:
    gun_counts = grouped['Type of Gun - General'].count().transpose()
    print(name)
    print(gun_counts)
    


In [None]:
from altair import *
chart = Chart(data).mark_area(
    stacked='center',
).encode(
    color=Color('series:N',
        scale=Scale(
            range='category20b',
        ),
    ),
    x=X('date:T',
        axis=Axis(
            axisWidth=0.0,
            format='%Y',
            labelAngle=0.0,
            tickSize=0.0,
        ),
        scale=Scale(
            nice='month',
        ),
        timeUnit='yearmonth',
    ),
    y=Y('sum(count):Q',
        axis=False,
    ),
).configure_cell(
    height=200.0,
    width=300.0,
)

Alright, here's an idea: let's look at the density of different types of shootings across the country based on motive first, then other variables.

In [None]:
#Let's start by looking at how many examples there are for each motive
shooters['Possible Motive - General'] = shooters['Possible Motive - General'].str.lower()
motive_groups = shooters.groupby('Possible Motive - General')
for name, group in motive_groups:
    print(name, ": ", len(group))

I suppose it's a blessing that there are too few examples of any one kind of shooting to use. In any event, it means that standard density estimation techniques are out the window, except when considering all examples.

In [None]:
from mpl_toolkits.basemap import Basemap

COLORS=['#1DACE8','#1C366B','#F24D29','#E5C4A1','#C4CFD0']
SELECTED_GROUPS = [
    'social dispute',
    'terminated/denied/reprimanded',
    'domestic dispute',
    'mental illness',
    'political/religious ideals'
]

fig, ax = plt.subplots()
shooter_map = Basemap(llcrnrlon=-145.5,llcrnrlat=1.,urcrnrlon=-2.566,urcrnrlat=46.352,\
            rsphere=(6378137.00,6356752.3142),
            resolution='l',area_thresh=1000.,projection='merc',
            lat_1=50.,lon_0=-107.,ax=ax)
shooter_map.drawcoastlines()
shooter_map.drawcountries()
shooter_map.fillcontinents(color = 'black')
shooter_map.drawmapboundary()


##TODO: fix the coordinates and map projection

#Plot the shooting motivation groups we've indicated above
color_counter = 0
for name, group in motive_groups:
    if name in SELECTED_GROUPS:
        shooter_map.plot(
            group['Latitude'], 
            group['Longitude'],
#             color=COLORS[color_counter],
            'bo',
            markersize=20
        )
        color_counter += 1

plt.show()