In [1]:
# All imports here

import csv
import pandas as pd
import itertools
import math
import string
import re
import urllib3
import json
from bs4 import BeautifulSoup
from google.colab import files

In [3]:
# Create dataframe using 2020PB source

# Import and normalize the JSON file 
df_2020pb = pd.read_json('https://raw.githubusercontent.com/2020PB/police-brutality/data_build/all-locations-v2.json')
df_2020pb = pd.json_normalize(df_2020pb['data'])
df_2020pb = df_2020pb.drop(columns=['edit_at'])
df_2020pb = df_2020pb.rename(columns={'description':'desc', 'links':'src', 'name':'title'})
df_2020pb['lat'] = ""
df_2020pb['long'] = ""

df_2020pb.head()

Unnamed: 0,src,state,city,desc,tags,geolocation,title,date,date_text,id,lat,long
0,[{'url': 'https://www.youtube.com/watch?v=s7MM...,Washington,Olympia,Footage shows a few individuals break off from...,"[arrest, less-lethal, projectile, protester, s...",,Police respond to broken windows with excessiv...,2020-05-31,May 31st,wa-olympia-1,,
1,[{'url': 'https://mobile.twitter.com/chadloder...,Washington,Seattle,Officer pins protester with his knee on his ne...,"[arrest, knee-on-neck, protester]",,Officer pins protester by pushing his knee int...,2020-05-30,May 30th,wa-seattle-1,,
2,[{'url': 'https://twitter.com/gunduzbaba1905/s...,Washington,Seattle,A couple of police officers are seen beating a...,"[beat, protester, punch]",,Police beat unarmed man on the ground,2020-05-31,May 31st,wa-seattle-2,,
3,[{'url': 'https://www.reddit.com/r/Bad_Cop_No_...,Washington,Seattle,A police officer randomly and indiscriminately...,"[less-lethal, pepper-spray, protester]",,Police indiscriminately pepper spray peaceful ...,2020-05-31,May 31st,wa-seattle-3,,
4,[{'url': 'https://www.fox10phoenix.com/news/vi...,Washington,Seattle,Police pepper sprays a young child who is seen...,"[child, inhumane-treatment, less-lethal, peppe...",,Police pepper spray young child,2020-05-31,May 31st,wa-seattle-4,,


In [4]:
# upload, Extract and clean the data from the 846 API

url="https://api.846policebrutality.com/api/incidents"
http = urllib3.PoolManager()
response = http.request('GET', url)
soup = BeautifulSoup(response.data, "html.parser")

json_846 = json.loads(soup.text)

json_846



{'data': [{'city': 'Wauwatosa',
   'data': None,
   'date': '2020-10-09T07:00:00.000000Z',
   'description': None,
   'geocoding': {'lat': '43.0494572', 'long': '-88.0075875'},
   'id': '6ac4d9c0-0c99-11eb-976e-8b9823ff7978',
   'links': ['https://twitter.com/TMJ4Stephanie/status/1314736308372287488',
    'https://twitter.com/CearronBagenda/status/1314734800352301056',
    'https://twitter.com/RicoReporting/status/1314735865919307777',
    'https://twitter.com/BenJordan3/status/1314735112060309509',
    'https://twitter.com/arson_ist/status/1314933427968389121',
    'https://www.wisn.com/article/police-use-tear-gas-to-disperse-protesters-in-wauwatosa/34333420#'],
   'pb_id': 'wa-wauwatosa-3',
   'state': 'Wisconsin',
   'tags': ['less-lethal', 'projectile', 'protester', 'shoot', 'tear-gas'],
   'title': 'Police tear gas protesters'},
  {'city': 'Wauwatosa',
   'data': None,
   'date': '2020-10-08T07:00:00.000000Z',
   'description': None,
   'geocoding': {'lat': '43.0494572', 'long': '

In [5]:
# Retrieve data from the json_846 'data' key
incidents = json_846['data']

# Create dataframe from the 846 API incident data
df_846 = pd.DataFrame(incidents)

# Check the top 5 rows of the new dataframe
df_846.head()

Unnamed: 0,id,pb_id,state,city,date,title,description,links,data,tags,geocoding
0,6ac4d9c0-0c99-11eb-976e-8b9823ff7978,wa-wauwatosa-3,Wisconsin,Wauwatosa,2020-10-09T07:00:00.000000Z,Police tear gas protesters,,[https://twitter.com/TMJ4Stephanie/status/1314...,,"[less-lethal, projectile, protester, shoot, te...","{'lat': '43.0494572', 'long': '-88.0075875'}"
1,4c75fde0-0a92-11eb-9f79-73ebeebd7e8e,wa-wauwatosa-2,Wisconsin,Wauwatosa,2020-10-08T07:00:00.000000Z,National Guard officers violently arrest journ...,,[https://twitter.com/WISN_Caroline/status/1314...,,"[arrest, baton, journalist, strike, throw]","{'lat': '43.0494572', 'long': '-88.0075875'}"
2,4c4b6280-0a92-11eb-bc34-6f8a44c55d04,wi-wauwatosa-1,Wisconsin,Wauwatosa,2020-10-07T07:00:00.000000Z,Police fire tear gas and pepper balls at prote...,,[https://twitter.com/RicoReporting/status/1314...,,"[less-lethal, pepper-ball, projectile, protest...","{'lat': '43.0494572', 'long': '-88.0075875'}"
3,ca1d2120-0a51-11eb-a7e0-8d81c4ff38a1,ny-newyorkcity-108,New York,New York City,2020-10-07T07:00:00.000000Z,NYPD officer slaps a protesters phone,,[https://twitter.com/chrisgelardi/status/13142...,,"[protester, threaten]","{'lat': '40.7127753', 'long': '-74.0059728'}"
4,8fbd51a0-0903-11eb-abb8-375be62bfb48,or-portland-387,Oregon,Portland,2020-10-06T07:00:00.000000Z,Police violently arrest several protesters for...,,[https://twitter.com/1misanthrophile/status/13...,,"[arrest, grab, protester]","{'lat': '45.5051064', 'long': '-122.6750261'}"


In [6]:
# Replace single source in 2020PB dataset with multiple sources list from 846 API

def mult_links(df_846, df_2020pb):
    for i in range(len(df_846)):
        for j in range(len(df_2020pb)):
            if df_846['pb_id'][i] == df_2020pb['id'][j]:
                df_2020pb['src'][j] = df_846['links'][i]
                

mult_links(df_846, df_2020pb)

df_2020pb.head()

Unnamed: 0,src,state,city,desc,tags,geolocation,title,date,date_text,id,lat,long
0,[https://www.youtube.com/watch?v=s7MM1VauRHo],Washington,Olympia,Footage shows a few individuals break off from...,"[arrest, less-lethal, projectile, protester, s...",,Police respond to broken windows with excessiv...,2020-05-31,May 31st,wa-olympia-1,,
1,[https://mobile.twitter.com/chadloder/status/1...,Washington,Seattle,Officer pins protester with his knee on his ne...,"[arrest, knee-on-neck, protester]",,Officer pins protester by pushing his knee int...,2020-05-30,May 30th,wa-seattle-1,,
2,[https://twitter.com/gunduzbaba1905/status/126...,Washington,Seattle,A couple of police officers are seen beating a...,"[beat, protester, punch]",,Police beat unarmed man on the ground,2020-05-31,May 31st,wa-seattle-2,,
3,[https://www.reddit.com/r/Bad_Cop_No_Donut/com...,Washington,Seattle,A police officer randomly and indiscriminately...,"[less-lethal, pepper-spray, protester]",,Police indiscriminately pepper spray peaceful ...,2020-05-31,May 31st,wa-seattle-3,,
4,[https://www.fox10phoenix.com/news/video-shows...,Washington,Seattle,Police pepper sprays a young child who is seen...,"[child, inhumane-treatment, less-lethal, peppe...",,Police pepper spray young child,2020-05-31,May 31st,wa-seattle-4,,


In [9]:
# Populate lat and long information from geocoding dictionary in 846

def populate_geocode(df_846, df_2020pb):
    for i in range(len(df_846)):
        for j in range(len(df_2020pb)):
            if df_846['pb_id'][i] == df_2020pb['id'][j]:
                df_2020pb['lat'][j] = df_846['geocoding'][i]['lat']
                df_2020pb['long'][j] = df_846['geocoding'][i]['long']
                df_2020pb['geolocation'][j] = df_846['geocoding'][i]

populate_geocode(df_846, df_2020pb)

df_2020pb.head()

Unnamed: 0,src,state,city,desc,tags,geolocation,title,date,date_text,id,lat,long
0,[https://www.youtube.com/watch?v=s7MM1VauRHo],Washington,Olympia,Footage shows a few individuals break off from...,"[arrest, less-lethal, projectile, protester, s...","{'lat': '47.0378741', 'long': '-122.9006951'}",Police respond to broken windows with excessiv...,2020-05-31,May 31st,wa-olympia-1,47.0378741,-122.9006951
1,[https://mobile.twitter.com/chadloder/status/1...,Washington,Seattle,Officer pins protester with his knee on his ne...,"[arrest, knee-on-neck, protester]","{'lat': '47.6062095', 'long': '-122.3320708'}",Officer pins protester by pushing his knee int...,2020-05-30,May 30th,wa-seattle-1,47.6062095,-122.3320708
2,[https://twitter.com/gunduzbaba1905/status/126...,Washington,Seattle,A couple of police officers are seen beating a...,"[beat, protester, punch]","{'lat': '47.6062095', 'long': '-122.3320708'}",Police beat unarmed man on the ground,2020-05-31,May 31st,wa-seattle-2,47.6062095,-122.3320708
3,[https://www.reddit.com/r/Bad_Cop_No_Donut/com...,Washington,Seattle,A police officer randomly and indiscriminately...,"[less-lethal, pepper-spray, protester]","{'lat': '47.6062095', 'long': '-122.3320708'}",Police indiscriminately pepper spray peaceful ...,2020-05-31,May 31st,wa-seattle-3,47.6062095,-122.3320708
4,[https://www.fox10phoenix.com/news/video-shows...,Washington,Seattle,Police pepper sprays a young child who is seen...,"[child, inhumane-treatment, less-lethal, peppe...","{'lat': '47.6062095', 'long': '-122.3320708'}",Police pepper spray young child,2020-05-31,May 31st,wa-seattle-4,47.6062095,-122.3320708


In [10]:
# Create csv from live data

df_2020pb.to_csv('latest_incidents.csv')

In [11]:
# JSONify dataframe

result = df_2020pb.to_json(orient="records")
parsed = json.loads(result)
json.dumps(parsed)

