In [5]:
# install dependencies  
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import spacy
import en_core_web_sm
import pandas as pd

In [24]:
ranked_reports = {
    "Rank 1 - Police Presence": [
        "policeman, policewoman, law enforcement",
        "police officer, cop, five-o, fuzz, DHS",
    ],
    "Rank 2 - Empty-hand": [
        "policeman, policewoman, law enforcement",
        "police officer, cop, five-o, fuzz, DHS",
        "pushed and shoved with shields",
        "grabs, holds and joint locks",
        "punch and kick",
    ],
    "Rank 3 - Blunt Force": [
        "policeman, policewoman, law enforcement",
        "police officer, cop, five-o, fuzz, DHS",
        "rubber bullets",
        "riot rounds",
        "batons",
    ],
    "Rank 4 - Chemical & Electric": [
        "policeman, policewoman, law enforcement",
        "police officer, cop, five-o, fuzz, DHS",
        "tear gas",
        "pepper spray",
        "flashbangs, stun grenade",
        "chemical sprays",
        "Conducted energy devices, CED or tazor",
    ],
    "Rank 5 - Lethal Force": [
        "policeman, policewoman, law enforcement",
        "police officer, cop, five-o, fuzz, DHS",
        "shoot and kill",
        "open fire",
        "deadly force",
        "fatal",
        "dies",
    ],
}

In [25]:
ranked_reports1 = {
    "Rank 1 - Police Presence": [
        "policeman", "policewoman", "law enforcement",
        "police officer, cop, five-o, fuzz, DHS", 
        "protester", "FPS", "officer",
        "Federal Protective Services",
    ],
    "Rank 2 - Empty-hand": [
        "policeman", "policewoman", "law enforcement",
        "police officer", "cop", "five-o", "fuzz, DHS",
        "pushed and shoved with shields", "officer",
        "grabs, holds and joint locks",
        "punch and kick", "thrown to the ground", "hit",
        "charge a protester", "tackle to the ground", 
        "kneel on", "arrest", "protester",
        "FPS", "Federal Protective Services", "zip-ties",
        "police chase and attack", "kicking him", 
        "threw him to the ground", "handcuff him", 
        "kneeling on a protester", "pinning down", 
        "tackle", "shoved to the ground", "violent",
        "officer shove"

    ],
    "Rank 3 - Blunt Force": [
        "policeman", "policewoman", "law enforcement",
        "police officer", "cop", "five-o", "fuzz", "DHS",
        "rubber bullets", "officer",
        "riot rounds",
        "batons", "blood", "hit", "arrest",
        "protester", "FPS", 
        "Federal Protective Services", 
        "strike with baton", "violent",


    ],
    "Rank 4 - Chemical & Electric": [
        "policeman", "policewoman", "law enforcement",
        "police officer", "cop", "five-o", "fuzz", "DHS",
        "tear gas", "officer",
        "pepper spray",
        "flashbangs", "stun grenade",
        "chemical sprays",
        "Conducted energy devices, CED or tazor",
        "blood", "arrest", "protester", "FPS", 
        "Federal Protective Services", "pepper balls",
        "using munitions on prosters", "struck by a round",
        "fire pepper balls and tear gas", 
        "struck in chest by projectile", "violent", 
        "munition", "firing a riot gun", "paintball gun",
        "shots are fired", "fire explosives", 
        "fire impact munitions",


    ],
    "Rank 5 - Lethal Force": [
        "policeman", "policewoman", "law enforcement",
        "police officer", "cop", "five-o", "fuzz", "DHS",
        "shoot and kill", "protester",
        "open fire", "FPS", "officer",
        "Federal Protective Services",
        "deadly force", "fatal",
        "dies", 'kill', "arrest", "violent", 
        "shot and killed",

    ],
}

In [26]:
class TextMatcher:
    """ Generic NLP Text Matching Model """

    class Tokenizer:
        """ Standard SpaCy Tokenizer """
        nlp = spacy.load("en_core_web_sm")

        def __call__(self, text: str) -> list:
            return [
                token.lemma_.lower() for token in self.nlp(text)
                if not token.is_stop and not token.is_punct
            ]

    def __init__(self, train_data: dict, ngram_range=(1, 3), max_features=8000):
        """ Model training on live data at init """
        self.lookup = {k: ' '.join(v) for k, v in train_data.items()}
        self.name_index = list(self.lookup.keys())
        self.tfidf = TfidfVectorizer(
            ngram_range=ngram_range,
            tokenizer=self.Tokenizer(),
            max_features=max_features,
        )
        self.knn = NearestNeighbors(
            n_neighbors=1,
            leaf_size =15,
            metric='cosine',
            algorithm='brute',
            n_jobs=-1,
        ).fit(self.tfidf.fit_transform(self.lookup.values()).todense())
        self.baseline, _ = self._worker('')

    def _worker(self, user_input: str):
        """ Prediction worker method - internal only """
        vec = self.tfidf.transform([user_input]).todense()
        return (itm[0][0] for itm in self.knn.kneighbors(vec))

    def __call__(self, user_input: str) -> str:
        """ Callable object for making predictions """
        dist, idx = self._worker(user_input)
        if dist != self.baseline:
            return self.name_index[int(idx)]
        else:
            return 'Rank 0 - No Police Presence'

In [27]:
# model for ranked reports and ranked reports1
textmatcher = TextMatcher(ranked_reports)
textmatcher1 = TextMatcher(ranked_reports1)

In [28]:
# sample text to run through model
text = """
During a protest at which both pro-police and Black Lives Matter protesters were present near West 7th and Lincoln, members of the two protests appear to exchange words in an intersection. Police charge a protester, a 14 year old girl according to the poster, tackle her to the ground, kneel on her and arrest her.
"""

In [29]:
# sample text using text matching through ranked reports
textmatcher(text)

'Rank 1 - Police Presence'

In [30]:
# sample text using text matching through ranked reports 1
textmatcher1(text)

'Rank 2 - Empty-hand'

In [31]:
# pull in reddit data for applying model
df = pd.read_csv("reddit_data.csv", index_col=0)
df.head()

Unnamed: 0,id,dates,added_on,links,case_id,city,state,lat,long,title,description,tags,force_rank
0,1,2020-05-30 00:00:00.000000,2021-02-26 20:45:37.445121,"['https://www.youtube.com/watch?v=dPTr54DdTdY', 'https://www.reddit.com/r/Eugene/comments/gtbsvq/eugene_police_teargasses_downtown/']",or-eugene-2,Eugene,Oregon,44.049913,-123.097391,Peaceful protester tear gassed,"Footage shows a small cluster of protesters chanting ""I can't breathe"" near Lincoln and Broadway. At 1:15 in the footage, without warning, police fire tear gas, causing most protesters flee. Some canisters can be seen thrown back towards police.\r\n\r\nPolice then proceed down the street. More tear gas is fired at 4:30 in the footage.","['less-lethal', 'protester', 'tear-gas']",Rank 4 - Chemical & Electric
1,2,2020-05-30 00:00:00.000000,2021-02-26 20:45:37.446934,"['https://twitter.com/IwriteOK/status/1266907719635632129', 'https://twitter.com/MrOlmos/status/1266916861267996673']",or-portland-1,Portland,Oregon,45.515586,-122.676994,Police violently break up peaceful protest,The Police try to break up the peaceful protest by resorting to hitting individuals with batons.,"['baton', 'beat', 'protester', 'strike']",Rank 1 - Police Presence
2,3,2020-05-30 00:00:00.000000,2021-02-26 20:45:37.447560,['https://www.youtube.com/watch?v=01oWE24O9Zw&feature=emb_title'],or-portland-2,Portland,Oregon,45.515468,-122.676571,Officer pepper-sprays protester,An officer shoves and pepper-sprays a protester standing on the other side of a barrier.,"['less-lethal', 'pepper-spray', 'protester', 'shove', 'spray']",Rank 4 - Chemical & Electric
3,4,2020-05-31 00:00:00.000000,2021-02-26 20:45:37.448138,"['https://www.youtube.com/watch?v=R7fqWI41vQY', 'https://www.reddit.com/r/Eugene/comments/guxme1/footage_of_epd_shooting_eugene_weekly_reporter/', 'https://twitter.com/hwhouston1/status/1267338971291373568', 'https://www.dailyemerald.com/news/police-fire-pellets-tear-gas-at-late-night-protesters-sunday-police-chief-responds/article_cd4a4450-a49a-11ea-a6fd-73b64efe150a.html']",or-eugene-3,Eugene,Oregon,44.044728,-123.079238,Reporter shot with tear gas canister,"Footage shows police giving curfew dispersal order to protesters. At one point, the filmer and his fellow journalist identify themselves as a reporter. One officer says, ""It doesn't matter."" Several subsequent officers do accept their press credentials without issue. They continue to monitor the protest and are later joined by a reporter for the Eugene Weekly.\r\n\r\nFootage around 2 minutes shows the Eugene Weekly reporter struck at close range with a tear gas canister, that then falls to the ground and explodes.\r\n\r\nA tweet from the reporter and an article from the Daily Emerald claim the reporter was hit by both a tear gas canister and rubber bullets","['journalist', 'less-lethal', 'rubber-bullet', 'shoot', 'tear-gas', 'tear-gas-canister']",Rank 4 - Chemical & Electric
4,5,2020-06-01 00:00:00.000000,2021-02-26 20:45:37.448824,"['https://www.reddit.com/r/Eugene/comments/gurr9r/police_shoot_projectile_from_moving_vehicle_hit/', 'https://streamable.com/9h2tk3']",or-eugene-1,Eugene,Oregon,0.0,0.0,Officer shoots projectile from moving vehicle,"An officer shoots a projectile out of a moving vehicle, striking pedestrian near eyeball.\r\n\r\nThis incident took place at in Eugene, OR. Specific area not identified.","['less-lethal', 'projectile', 'protester', 'shoot', 'vehicle']",Rank 5 - Lethal Force


In [32]:
# create column with applied model
df["force_rank1"] = df['description'].apply(lambda x:textmatcher(x))

In [33]:
# breakdown of new columns with catergorization
df['force_rank1'].value_counts()

Rank 4 - Chemical & Electric    488
Rank 1 - Police Presence        244
Rank 2 - Empty-hand             231
Rank 3 - Blunt Force            201
Rank 5 - Lethal Force           113
Rank 0 - No Police Presence      19
Name: force_rank1, dtype: int64

In [34]:
df['force_rank'].value_counts()

Rank 4 - Chemical & Electric    387
Rank 1 - Police Presence        359
Rank 2 - Empty-hand             205
Rank 5 - Lethal Force           160
Rank 3 - Blunt Force            106
Rank 0 - No Police Presence      79
Name: force_rank, dtype: int64

In [23]:
df['description'][df['force_rank1'] == "Rank 1 - Police Presence"]

9                                                                                                                                                                                                                              During a protest at which both pro-police and Black Lives Matter protesters were present near West 7th and Lincoln, members of the two protests appear to exchange words in an intersection. Police charge a protester, a 14 year old girl according to the poster, tackle her to the ground, kneel on her and arrest her.
21                                                                                                                                                                                                                                                     Body cam footage from a police officer shows police approaching person on a bike, believed to be a protester leaving the protest. The two officers pull the protester from the bike to make an arrest, throwing the bik

In [22]:
pd.set_option('display.max_colwidth', None)

In [35]:
df

Unnamed: 0,id,dates,added_on,links,case_id,city,state,lat,long,title,description,tags,force_rank,force_rank1
0,1,2020-05-30 00:00:00.000000,2021-02-26 20:45:37.445121,"['https://www.youtube.com/watch?v=dPTr54DdTdY', 'https://www.reddit.com/r/Eugene/comments/gtbsvq/eugene_police_teargasses_downtown/']",or-eugene-2,Eugene,Oregon,44.049913,-123.097391,Peaceful protester tear gassed,"Footage shows a small cluster of protesters chanting ""I can't breathe"" near Lincoln and Broadway. At 1:15 in the footage, without warning, police fire tear gas, causing most protesters flee. Some canisters can be seen thrown back towards police.\r\n\r\nPolice then proceed down the street. More tear gas is fired at 4:30 in the footage.","['less-lethal', 'protester', 'tear-gas']",Rank 4 - Chemical & Electric,Rank 4 - Chemical & Electric
1,2,2020-05-30 00:00:00.000000,2021-02-26 20:45:37.446934,"['https://twitter.com/IwriteOK/status/1266907719635632129', 'https://twitter.com/MrOlmos/status/1266916861267996673']",or-portland-1,Portland,Oregon,45.515586,-122.676994,Police violently break up peaceful protest,The Police try to break up the peaceful protest by resorting to hitting individuals with batons.,"['baton', 'beat', 'protester', 'strike']",Rank 1 - Police Presence,Rank 3 - Blunt Force
2,3,2020-05-30 00:00:00.000000,2021-02-26 20:45:37.447560,['https://www.youtube.com/watch?v=01oWE24O9Zw&feature=emb_title'],or-portland-2,Portland,Oregon,45.515468,-122.676571,Officer pepper-sprays protester,An officer shoves and pepper-sprays a protester standing on the other side of a barrier.,"['less-lethal', 'pepper-spray', 'protester', 'shove', 'spray']",Rank 4 - Chemical & Electric,Rank 4 - Chemical & Electric
3,4,2020-05-31 00:00:00.000000,2021-02-26 20:45:37.448138,"['https://www.youtube.com/watch?v=R7fqWI41vQY', 'https://www.reddit.com/r/Eugene/comments/guxme1/footage_of_epd_shooting_eugene_weekly_reporter/', 'https://twitter.com/hwhouston1/status/1267338971291373568', 'https://www.dailyemerald.com/news/police-fire-pellets-tear-gas-at-late-night-protesters-sunday-police-chief-responds/article_cd4a4450-a49a-11ea-a6fd-73b64efe150a.html']",or-eugene-3,Eugene,Oregon,44.044728,-123.079238,Reporter shot with tear gas canister,"Footage shows police giving curfew dispersal order to protesters. At one point, the filmer and his fellow journalist identify themselves as a reporter. One officer says, ""It doesn't matter."" Several subsequent officers do accept their press credentials without issue. They continue to monitor the protest and are later joined by a reporter for the Eugene Weekly.\r\n\r\nFootage around 2 minutes shows the Eugene Weekly reporter struck at close range with a tear gas canister, that then falls to the ground and explodes.\r\n\r\nA tweet from the reporter and an article from the Daily Emerald claim the reporter was hit by both a tear gas canister and rubber bullets","['journalist', 'less-lethal', 'rubber-bullet', 'shoot', 'tear-gas', 'tear-gas-canister']",Rank 4 - Chemical & Electric,Rank 4 - Chemical & Electric
4,5,2020-06-01 00:00:00.000000,2021-02-26 20:45:37.448824,"['https://www.reddit.com/r/Eugene/comments/gurr9r/police_shoot_projectile_from_moving_vehicle_hit/', 'https://streamable.com/9h2tk3']",or-eugene-1,Eugene,Oregon,0.000000,0.000000,Officer shoots projectile from moving vehicle,"An officer shoots a projectile out of a moving vehicle, striking pedestrian near eyeball.\r\n\r\nThis incident took place at in Eugene, OR. Specific area not identified.","['less-lethal', 'projectile', 'protester', 'shoot', 'vehicle']",Rank 5 - Lethal Force,Rank 5 - Lethal Force
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1291,2032,2020-12-04 00:00:00.000000,2021-02-26 15:57:29.550788,['https://twitter.com/929_julian/status/1337531637026971649'],mn-minneapolis-39,Minneapolis,Minnesota,44.934225,-93.262402,Police arrest and injure protester,"During a small protest on December 4th, police made several arrests, mostly for obstructing the legal system. During one of these arrests at Chicago and 38th, an officer singled out a black protester watching the arrest. The officer approaches the protester, who begins to step away. The officer then chases the protester and throws him to the ground, allegedly dislocating his shoulder.","['arrest', 'protester', 'tackle', 'throw']",Rank 1 - Police Presence,Rank 1 - Police Presence
1292,2033,2020-12-06 00:00:00.000000,2021-02-26 15:57:29.573944,"['https://twitter.com/PplsCityCouncil/status/1335651332179386368', 'https://twitter.com/PplsCityCouncil/status/1335814255212314624', 'https://www.reddit.com/r/PublicFreakout/comments/k8a6iy/chaos_erupts_as_lapd_officers_swings_wildly_at/', 'https://www.reddit.com/r/PublicFreakout/comments/k8jlae/la_cops_doing_what_they_do_best/']",ca-losangeles-65,Los Angeles,California,34.063557,-118.318640,Protesters outside Mayor's home beaten with batons,"On the 13th day of protests outside the Getty House, the official Los Angeles mayoral residence, police charged protesters, striking them and legal observers with batons.\r\n\r\nProtests have been intended to show popular discontent with the possibility of President-Elect Biden nominating Mayor Garcetti to a position in his cabinet.","['baton', 'legal-observer', 'protester', 'strike']",Rank 3 - Blunt Force,Rank 3 - Blunt Force
1293,2034,2021-01-09 00:00:00.000000,2021-02-26 15:57:29.596777,['https://twitter.com/waterspider__/status/1348050947507970051'],ca-sandiego-9,San Diego,California,32.796060,-117.255302,Police assault and shoot at BLM counter-protesters,"At dueling rallies in Pacific Beach between Pro-Trump supporters and Black Lives Matter counter-protesters, riot police lined up facing the BLM group. Footage taken after the beginning of a scuffle shows police with batons beating a BLM protester on the ground. Police can be heard firing pepper balls at protesters throughout the footage. The protester is then released without arrest.","['baton', 'beat', 'less-lethal', 'pepper-ball', 'projectile', 'protester', 'shoot', 'strike']",Rank 5 - Lethal Force,Rank 3 - Blunt Force
1294,2035,,2021-02-26 15:57:29.619122,"['https://twitter.com/DomSkyeRN/status/1270185401060294656', 'https://twitter.com/DomSkyeRN/status/1270193973462695936']",tx-houston-3,Houston,Texas,29.769473,-95.364922,Officer pushs protester that is filming,"A protester is filming and asking ""why?"", referring to protesters being arrested. An officer then pushs the protester that is filming, knocking the camera to the ground.\r\n\r\nThe incident appears to have occurred near the junction of I-45 and I-10 by White Oak Bayou Greenway.","['arrest', 'property-destruction', 'protester', 'push', 'shove']",Rank 1 - Police Presence,Rank 1 - Police Presence
