# Are users home locations in Zone A (of Brevard?) 

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [3]:
input_directory = "/data/chime/geo/matthew/brevard_zone_a_stage_2_temporal_clusters_with_home"
output_directory = "/data/chime/geo/matthew/brevard_zone_a_geovulnerable"

#### Import the ZoneA Geometry

In [5]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open('../ZoneA_Geometry/ZoneA/OGRGeoJSON.shp','r')
pol = c.next(); zoneA = shape(pol['geometry']).buffer(0); zoneA.is_valid

True

#### Import the user metadata DF (Phasing this out)

In [6]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [7]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,tweets,uid,user
0,1.0,14231,75153082,Andrewthemark
1,3.0,4934,54342579,frankieciv608
2,2.0,4566,45193878,NewYorkPuck


## 0. Import all of the individual user dataframes

In [8]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 1258 users in /data/chime/geo/zone_a_users_stage_2_temporal_clusters_with_home


In [24]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [25]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 1258, 100%

In [26]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [28]:
x = users[100]
print(len(x))
x.head(1)

962


Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,1.0,"{""coordinates"": [-73.9298395, 40.8522721481818...","[-73.9298417, 40.8522943]",2012-09-06 09:14:28+00:00,2,,POINT (-73.9298417 40.8522943),4.0,,This movie is hilarious lol,,243638286525034496,303492377,AntPulido


In [29]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zoneA)

False

## 1. Determine who's home cluster center is in Zone A

In [30]:
def insideZoneA(p):
    if p==None:
        return False
    else:
        return p.within(zoneA)

In [32]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZoneA(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 118 vulnerable users and 1140 non-vulnerable

In [33]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [36]:
for idx, u in enumerate(vuln):
    with open(output_directory+"/"+u.user[0].lower()+'.geojson','w') as oFile:
        oFile.write(u.to_json(default=json_util.default))
    sys.stderr.write("\r{0} processed".format(idx+1))

118 processed

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [39]:
for uName in sorted([u.user[0].lower() for u in vuln]):
    print(uName)

_an_oak_tree_
adrianamisoul
agate86
aladesnr
alliegallo
amanda_xtelle
anas_a1
arnellmilton
beatzzzz
beliebinfinate
bencashfarouq
brianaaanicolee
bridgetekelly
britescrnmedia
carminenyc
cavemanrobles
chinkeyeyez3
chissyn
chrisrodemeyer
cnysurvivor1
continuumcycles
cooper_smith
dcamz5
dee9996
dem_callmi_tutu
dj2020
dj_vasquezz
dommydom24
eddiegeenyc
edelsingh
eelain212
emmarocks
explicit_clutch
fanlyeduardo
g0ttalov3him
garyalonynyc
georgiaafowler
georgieeeninjaa
gkor29
grady_iv
heatherjeanhd
honeyberk
jaclyn_collins
jamesshowell
japan2021
jcelona_
jevicak
joemckinzie
john_flip11
jojohankey
jonclarkjr
jonmcl
julianandsara
justmealiseo
katcatoure
kflow94
kmfmack
ladygaga11228
lalahearts
laumc822
laurakazam
lauren_morra
lindseyhankes
loneladyrider
lunaparkconeyis
m3lissasays
maderised
mallorymcmorrow
marky_mark34
marotin507
mikeviktorenko
mkduffy518
moishgeorge86
morgansteve
mrspuertorico
muchotoomucho
murphym45
myluvisking
mzmimi_82
newyorkcpt
nickvargas6
nyicecream
nyloves_1d
nzavaa
objo

In [40]:
len(vuln)

118