# Import file

In [None]:
import pandas as pd
import json

df = pd.read_csv("Mass Shootings Dataset.csv", encoding="ISO-8859-1")
df = pd.DataFrame(df).dropna()
df

Unnamed: 0,S#,Title,Location,Date,Summary,Fatalities,Injured,Total victims,Mental Health Issues,Race,Gender,Latitude,Longitude
11,12,"Ferguson, MO Drive by","Ferguson, Missouri",4/29/2016,A group of 15 to 20 people was gathered for a ...,0,4,4,Unknown,Unknown,Unknown,38.744217,-90.305391
12,13,"Forestville, Maryland Drive-by",42486,4/26/2016,Shooter shot from his car at people standing o...,1,4,5,Unknown,Unknown,Unknown,38.845113,-76.874972
13,14,"Halifax County, VA",42484,4/24/2016,Male shooter fired into crown when a deputy po...,0,6,6,Unknown,Black American or African American,Male,36.765971,-78.928344
14,15,Tire-Slashing revenge escalation,42481,4/21/2016,Shooter was angry over fact that rival familie...,0,4,4,Unknown,Black American or African American,Male,39.290385,-76.612189
15,16,Chicago Rap video Shootout,42479,4/19/2016,Group of young men were in a park at 2 in the ...,1,4,5,Unknown,Unknown,Unknown,41.878114,-87.629798
...,...,...,...,...,...,...,...,...,...,...,...,...,...
393,394,Clara Barton Elementary School,"Chicago, Illinois",1/17/1974,"On January 17, 1974, a 14-year-old student ent...",1,3,4,Yes,Unknown,Male,41.839280,-87.688181
394,395,New Orleans Police Shootings,"New Orleans, Louisiana",12/31/1972,"On New Year's Eve in 1972, a 23-year-old ex-Na...",10,13,22,Yes,Black American or African American,Male,30.068724,-89.931474
395,396,St. Aloysius Church,"Spokane, Washington",11/11/1971,"On November 11, 1971, a former MIT student ent...",2,4,5,Yes,White American or European American,Male,47.673674,-117.415984
396,397,Rose-Mar College of Beauty,"Mesa, Arizona",11/12/1966,"On November 12, 1966, an 18-year-old high scho...",5,1,6,Yes,White American or European American,Male,33.422687,-111.816320


# Name-entity Recognition (for Word Cloud)

In [None]:
!pip install flair

In [None]:
output = df.to_json(orient="records")
output = json.loads(output)
output

[{'Date': '4/29/2016',
  'Fatalities': 0,
  'Gender': 'Unknown',
  'Injured': 4,
  'Latitude': 38.744217,
  'Location': 'Ferguson, Missouri',
  'Longitude': -90.305391,
  'Mental Health Issues': 'Unknown',
  'Race': 'Unknown',
  'S#': 12,
  'Summary': 'A group of 15 to 20 people was gathered for a memorial for a family member when two cars drove by and opened fire. Four people were injured. No suspects or specific motive or targets.',
  'Title': 'Ferguson, MO Drive by',
  'Total victims': 4},
 {'Date': '4/26/2016',
  'Fatalities': 1,
  'Gender': 'Unknown',
  'Injured': 4,
  'Latitude': 38.845113,
  'Location': '42486, ',
  'Longitude': -76.874972,
  'Mental Health Issues': 'Unknown',
  'Race': 'Unknown',
  'S#': 13,
  'Summary': "Shooter shot from his car at people standing on the street at 1:30 AM. Police don't believe the woman who died was the target, and do not believe the shooting was random.",
  'Title': 'Forestville, Maryland Drive-by',
  'Total victims': 5},
 {'Date': '4/24/201

In [None]:
from flair.data import Sentence
from flair.models import SequenceTagger

# load tagger
tagger = SequenceTagger.load("flair/ner-english-large")

# print predicted NER spans
print('The following NER tags are found:')

# iterate over entities and print
count = 0
for s in df['Summary']:
    # make example sentence
    sentence = Sentence(s)
    # predict NER tags
    tagger.predict(sentence)
    # get the results
    results = sentence.to_dict(tag_type='ner')["entities"]
    temp = []
    for r in results:
        temp.append({"text": r["text"], "labels": r["labels"][0].value})
    print(temp)
    output[count]["NER"] = temp
    count += 1



2021-11-22 18:12:32,439 loading file /root/.flair/models/ner-english-large/07301f59bb8cb113803be316267f06ddf9243cdbba92a4c8067ef92442d2c574.554244d3476d97501a766a98078421817b14654496b86f2f7bd139dc502a4f29
The following NER tags are found:
[]
[]
[]
[]
[]
[]
[{'text': 'Alabama', 'labels': 'LOC'}]
[]
[]
[]
[{'text': 'South Shore', 'labels': 'LOC'}, {'text': 'Chicago', 'labels': 'LOC'}]
[]
[{'text': 'Virginia', 'labels': 'LOC'}, {'text': 'Greyhound', 'labels': 'LOC'}]
[]
[]
[{'text': 'Alabama', 'labels': 'LOC'}]
[]
[{'text': 'Sherman', 'labels': 'LOC'}, {'text': 'TX', 'labels': 'LOC'}]
[{'text': 'Iraq', 'labels': 'LOC'}]
[{'text': 'Kentucky', 'labels': 'LOC'}]
[{'text': 'Spring Break', 'labels': 'MISC'}]
[{'text': 'Wetumpka', 'labels': 'LOC'}, {'text': 'Alabama', 'labels': 'LOC'}]
[]
[]
[]
[]
[{'text': 'Trenton', 'labels': 'LOC'}, {'text': 'NJ', 'labels': 'LOC'}, {'text': 'Acura', 'labels': 'ORG'}]
[]
[]
[]
[{'text': 'Kansas City', 'labels': 'LOC'}, {'text': 'Kansas', 'labels': 'LOC'}, {'t

In [7]:
with open('mass_shootings.json', 'w') as json_file:
    json.dump(output, json_file, indent=4)