<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Names-of-Mass-Shooters" data-toc-modified-id="Names-of-Mass-Shooters-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Names of Mass Shooters</a></span></li><li><span><a href="#Identity-Analysis-of-Mass-Shooters-from-2015-17" data-toc-modified-id="Identity-Analysis-of-Mass-Shooters-from-2015-17-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Identity Analysis of Mass Shooters from 2015-17</a></span></li><li><span><a href="#Caption-Analysis-of-Mass-Shooters-from-2015-17" data-toc-modified-id="Caption-Analysis-of-Mass-Shooters-from-2015-17-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Caption Analysis of Mass Shooters from 2015-17</a></span></li><li><span><a href="#Timeline-of-when-the-word-&quot;shooting&quot;-was-mentioned-in-the-news" data-toc-modified-id="Timeline-of-when-the-word-&quot;shooting&quot;-was-mentioned-in-the-news-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Timeline of when the word "shooting" was mentioned in the news</a></span></li></ul></div>

In [None]:
from esper.prelude import *
from esper.widget import *
from esper.identity import *
from esper.topics import *
from esper.spark_util import *
from esper.plot_util import *
from esper.major_canonical_shows import MAJOR_CANONICAL_SHOWS

# Names of Mass Shooters

In [None]:
shootings = [
    ('Muhammad Youssef Abdulazeez', 'Chattanooga', 'Jul 16, 2015'),
    ('Chris Harper-Mercer', 'Umpqua Community College', 'Oct 1, 2015'),
    ('Robert Lewis Dear Jr', 'Colorado Springs - Planned Parenthood', 'Nov 27, 2015'),
    ('Syed Rizwan Farook', 'San Bernardino', 'Dec 2, 2015'), 
    ('Tashfeen Malik', 'San Bernardino', 'Dec 2, 2015'),
    ('Dylann Roof', 'Charleston Shurch', 'Jun 17, 2015'),
    ('Omar Mateen', 'Orlando Nightclub', 'Jun 12, 2016'),
    ('Micah Xavier Johnson', 'Dallas Police', 'Jul 7-8, 2016'),
    ('Gavin Eugene Long', 'Baton Rouge Police', 'Jul 17, 2016'),
    ('Esteban Santiago-Ruiz', 'Ft. Lauderdale Airport', 'Jan 6, 2017'),
    ('Willie Corey Godbolt', 'Lincoln County', 'May 28, 2017'),
    ('Stephen Paddock', 'Las Vegas', 'Oct 1, 2017'),
    ('Devin Patrick Kelley', 'San Antonio Church', 'Nov 5, 2017')
]
orm_set = { x.name for x in Thing.objects.filter(name__in=[s[0].lower() for s in shootings]) }
for s in shootings:
    assert s[0].lower() in orm_set, '{} is not in the database'.format(s)

In [None]:
for name, event, date in shootings:
    faces = [
        x.face for x in FaceIdentity.objects.filter(
            probability__gt=0.95, 
            identity__name=name.lower()
        ).order_by('?')[:10]
    ]
    print('[{}] {} - {}'.format(date, event, name))
    im = faces_to_tiled_img(faces, cols=10)
    imshow(im)
    plt.show()

# Identity Analysis of Mass Shooters from 2015-17

In [None]:
face_identity_models = [FaceIdentityModel.load(name=name) for name, _, _ in shootings]

In [None]:
for model in sorted(face_identity_models, key=lambda x: x.exp_positives_total):
    print('{:<30} : {:0.2f} faces'.format(model.name, model.exp_positives_total))

In [None]:
screen_times_by_show = [get_screen_time_by_show(m) for m in face_identity_models]

In [None]:
channel_name_cmap = { 
    'CNN': 'DarkBlue', 
    'FOXNEWS': 'DarkRed', 
    'MSNBC': 'DarkGreen'
}

canonical_show_cmap = {
    v['show__canonical_show__name'] : channel_name_cmap[v['channel__name']]
    for v in Video.objects.distinct(
        'show__canonical_show'
    ).values('show__canonical_show__name', 'channel__name')
}
    
plot_matrix(
    ['{}\n[{}] {}'.format(a, c, b) for a, b, c in shootings], 
    screen_times_by_show,
    'Screentime Comparison Across Shows for Mass Shootings in 2015-17', 
    'Show Name', 
    'Person',
    categories=list(
        sorted(MAJOR_CANONICAL_SHOWS, key=lambda x: (canonical_show_cmap[x], x))
    ),
    category_color_map=canonical_show_cmap,
    primary_scale=1./6.,
    marker='d'
)

# Caption Analysis of Mass Shooters from 2015-17

In [None]:
name_to_extra_phrases = {
    'Willie Corey Godbolt': ['Willie Godbolt'],
    'Chris Harper-Mercer': ['Harper-Mercer', 'Chris Harper Mercer'],
    'Gavin Eugene Long': ['Gavin Long'],
    'Devin Patrick Kelley': ['Devin Kelley'],
    'Micah Xavier Johnson': ['Micah Johnson'],
    'Esteban Santiago-Ruiz': ['Santiago-Ruiz', 'Esteban Santiago Ruiz'],
    'Robert Lewis Dear Jr': ['Robert Dear'],
    'Stephen Paddock': [],
    'Muhammad Youssef Abdulazeez': ['Muhammad Abdulazeez'],
    'Tashfeen Malik': [],
    'Syed Rizwan Farook': ['Syed Farook'],
    'Dylann Roof': [],
    'Omar Mateen': []
}

caption_mentions_by_show = []
for name, event, date in shootings:
    caption_mentions_by_show.append(
        get_caption_mentions_by_show(
            [name.upper(), *[x.upper() for x in name_to_extra_phrases[name]]]
        )
    )

In [None]:
plot_matrix(
    ['{}\n[{}] {}'.format(a, c, b) for a, b, c in shootings], 
    screen_times_by_show,
    'Comparison Across Shows for Mass Shootings in 2015-17', 
    'Show Name', 
    'Person',
    secondary_values_by_category=caption_mentions_by_show,
    categories=list(
        sorted(MAJOR_CANONICAL_SHOWS, key=lambda x: (canonical_show_cmap[x], x))
    ),
    value_names=['Screentime', 'Name Mentions'],
    category_color_map=canonical_show_cmap,
    primary_scale=1./6., secondary_scale=2.,
    marker='d',
    secondary_marker='o'
)

# Timeline of when the word "shooting" was mentioned in the news

In [None]:
shooting_phrases = ['shooting', 'shooter', 'gunman']
shooting_mentions = caption_search([x.upper() for x in shooting_phrases])[0]

In [None]:
from collections import defaultdict
from datetime import datetime

MONTH_TO_INT = { 
     m : i + 1 for i, m in enumerate(
        [
            'jan', 'feb', 'mar', 'apr', 'may', 
            'jun', 'jul', 'aug', 'sep', 'oct',
            'nov', 'dec'
        ]
    )
} 

def parse_date_string(s):
    month, day, year = s.lower().replace(',','').split(' ')
    month = MONTH_TO_INT[month]
    day = int(day.split('-')[0])
    year = int(year)
    return datetime(year=year, month=month, day=day)

def plot_shooting_mentions():
    video_id_map = { v.id : v for v in Video.objects.all() }
    channel_id_map = { c.id : c.name for c in Channel.objects.all() }
    
    def get_dt_key(dt):
        return datetime(year=dt.year, month=dt.month, day=dt.day) 
    
    channel_to_data = defaultdict(lambda: defaultdict(int))
    for video_id, video_mentions in shooting_mentions.items():
        video = video_id_map[video_id]
        channel_to_data[
            channel_id_map[video.channel_id]
        ][get_dt_key(video.time)] += len(video_mentions)
      
    channels = [x for x in channel_to_data]

    plot_time_series(
        channels, [channel_to_data[k] for k in channels],
        'Mentions of "{}" Over Time'.format(', '.join(shooting_phrases)),
        'Number of Caption Mentions',
        plotstyle='-', linewidth=0.5,
        discrete_events=[(s[1], parse_date_string(s[2])) for s in shootings]
    )

plot_shooting_mentions()