# Overview

- import recent trending tweets using tweepy
- run nlp model to identify police use of force
  - limit to tweets to know hashtags with examples of police force for release 1
- save scoped tweets into temp storage, ex: sqlite to avoid rate limits
- run nlp/ner model on topic scoped tweets to find location of reported incident
- use geopy or google rest api to get lat and lon of found gpe

## tweepy config

In [2]:
!pip install tweepy



In [3]:
import tweepy
import re
import plotly.express as px
import pandas as pd

In [None]:
API_KEY = input('api_key: ')
API_SECRET_KEY = input('api_secret_key: ')

In [6]:
auth = tweepy.OAuthHandler(API_KEY, API_SECRET_KEY)

In [7]:
api = tweepy.API(auth)

## scoped tweets to keywords "black lives matter

In [8]:
blm_search = api.search("black lives matter",count=100)

In [9]:
def extract_hash_tags(s):
    return set(part[1:] for part in s.split() if part.startswith('#'))

In [10]:
blm_search.count

100

In [None]:
dir(blm_search)

In [11]:
hashtags = []
tweet_text = []
user_name = []
user_location = []
for i in blm_search:
  tweet_text.append(i.text)
  user_name.append(i.user.name)
  user_location.append(i.user.location)
  hashtags.append(extract_hash_tags(i.text))


In [12]:
df = pd.DataFrame({'user_name': user_name,
                   'user_location': user_location,
                   'tweet_text':tweet_text,
                   'tags': hashtags
                   })


In [13]:
df.head(10)

Unnamed: 0,user_name,user_location,tweet_text,tags
0,👤,,RT @Lopez83__: lets not forget BLACK LIVES STI...,{}
1,AD&D DM🧢,Banished To The Man Cave,"Black Lives Matter DEFENDS Chicago Looters, Sa...",{}
2,dina🐉,,RT @mushrxom_bby: black lives still matter. bl...,{}
3,▪︎,,RT @Lopez83__: lets not forget BLACK LIVES STI...,{}
4,Steph Paul 🇭🇹,"Georgia, USA",RT @jacquelinel0wry: black lives still matter....,{}
5,⟬⟭𝗠𝗮𝘂𝗿𝗲𝗲𝗻🥺♡⟭⟬⁷,🏳️‍🌈,RT @jacquelinel0wry: black lives still matter....,{}
6,SAGE 🦋,,RT @morganmsk_: Good Afternoon \nBlack Lives M...,{}
7,BNN - Breitbear News Network,Fuck Off Mountain,RT @CattHarmony: At Beverly Hills #WalkAway ra...,{WalkAway}
8,Jordyn,,RT @Lopez83__: lets not forget BLACK LIVES STI...,{}
9,Elizabeth Lee 🇺🇸,,RT @ScottPresler: @JoeBiden @KamalaHarris I’m ...,{}


## nlp/ner: find location entities

In [None]:
# from google.colab import files
# uploaded = files.upload()

In [None]:
import spacy

## city to latitude and lon

## twitter trends exploration

In [None]:
trends = api.trends_available()

In [None]:
for i in trends[:1]:
  # print(dir(i))
  result = api.trends_place(i.get('woeid'))

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

counties["features"][0]

## Mapping

In [None]:
px.choropleth(locations=["CA", "TX", "NY"], locationmode="USA-states", color=[1,2,3], scope="usa")

In [None]:
import plotly.graph_objects as go

import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_february_us_airport_traffic.csv')
df['text'] = df['airport'] + '' + df['city'] + ', ' + df['state'] + '' + 'Arrivals: ' + df['cnt'].astype(str)

fig = go.Figure(data=go.Scattergeo(
        lon = df['long'],
        lat = df['lat'],
        text = df['text'],
        mode = 'markers',
        marker_color = df['cnt'],
        ))

fig.update_layout(
        title = 'Most trafficked US airports<br>(Hover for airport names)',
        geo_scope='usa',
    )
fig.show()