# Are users home locations in Vulnerable Locations?

In [16]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [47]:
input_directory  = "/data/chime/geo2/PROCESSED/FL/duvall_county_Stage2/"
output_directory = "/data/chime/geo2/PROCESSED/FL/duvall_county_Stage3/"
zoneGeometry     = "../EvacuationZones/Florida/DuvallCountyPlus_Hull.geojson"

#### Import the ZoneA Geometry

In [48]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
zone = shape(json.load(open(zoneGeometry, 'r'))['geometry']); zone.is_valid

True

#### Import the user metadata DF (Phasing this out)

In [49]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [50]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,home_cluster_coords,tweets,uid,user
0,1.0,"{""type"": ""Point"", ""coordinates"": [-81.65339545...",8067,62050708,JacksonvilleCP
1,3.0,"{""type"": ""Point"", ""coordinates"": [-81.49451000...",7010,195193753,JobsDirectJAX
2,134.0,"{""type"": ""Point"", ""coordinates"": [-78.01829999...",3523,4754740136,ebbtidebot


## 0. Import all of the individual user dataframes

In [51]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 340 users in /data/chime/geo2/PROCESSED/FL/duvall_county_Stage2/


In [52]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [54]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 340, 100%%

In [55]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [56]:
x = users[10]
print(len(x))
x.head(1)

1053


Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,-1,,"[-81.64144, 30.67153]",2016-08-01 10:06:31+00:00,2,,POINT (-81.64144 30.67153),17.0,,All lanes blocked for a car fire in #Yulee on ...,,760054092580265984,250304006,TotalTrafficJAX


In [57]:
zone.area

0.5438495261440683

In [58]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zone)

True

## 1. Determine who's home cluster center is in ZONE

In [59]:
def insideZone(p):
    if p==None:
        return False
    else:
        return p.within(zone)

In [60]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZone(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 247 vulnerable users and 93 non-vulnerable

In [61]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [62]:
def safe_mapping(p):
    if p==None or np.isnan(p).any():
        return None
    else:
        return mapping(p)
def safe_json_export(args):
    df, path = args
    df = df.copy()
    uName = df.head(1).user.values[0].lower()
    df['date'] = df['date'].apply(lambda t: datetime.datetime.strftime(t,'%Y-%m-%dT%H:%M:%SZ'))

    clean = df.where((pd.notnull(df)), None)
    geojson = {"type":"FeatureCollection","features":[]}
    for _, row in clean.iterrows():
        geom = safe_mapping(row.geometry)
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':row.to_dict()
                    }
        del feature['properties']['geometry']
        geojson['features'].append(feature)
    
    with open(path+"/"+uName+'.geojson','w') as oFile:
        json.dump(geojson, oFile) 

In [63]:
for idx, u in enumerate(vuln):
    safe_json_export((u,output_directory))
    sys.stderr.write("\r{0} processed".format(idx+1))

247 processed

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [64]:
for uName in sorted([u.user[0].lower() for u in vuln]):
    print(uName)

11hartk
13pc43
933thebeatjamz
_jacksonvillefl
abcbeerkenr
acheise1
aimsterskitz0rz
allurejjin522
amariraelyn
amillionmag
angeltoroortiz
angelwed
annomalleys
apeers31
arnav__
artedeleon
asummerray
baristaonduty
benhensley19
berrygood_farms
biggiesolls
bmhurtig1111
bo23phil
boss_hairstylin
bossladyterrie
britt_hester
butthuttjax
bykimberlyjane
cainjax
calvinator7
canisdirus
carm624
castellani
cerena_levene
chefsweetpete
classickmusic
cole513
creativeliza
crookedxchris
crprze
cyclesofjax
daammnblac_
dads4life
dancetrancejax
danmeysing
darealcrunk
dauphmk
deborahregister
delishkebabs
devouringjax
dhallii65
dhlacalle
dig_arch
djdrdoom
djlossofocus
djmoneygud
djr3ign
djscrubbjax
dkopp904
doomnation93
dpfrazier22
drcjhenley
dredeas904
drruss888
efgcouture
elevationexp
elkem27ccs
emmy_armadafc
ericp904
eujacksonville
evilzorac
expreaperjabub
floridaboiler
floridafit1
floridahorizon
fmcnajobs
franruchalski
freakville03
freshjax
genedotcom
geolocationms
ginak92
grandivacooks
granolabandz
gswagg9

In [65]:
len(vuln)

247

In [67]:
len(good_data)

66

In [66]:
def good_storm_tweets(userDF):
    return len(userDF.query("date > 201610030000 & date < 201610050000")) 
    
good_data = []
res = []
for idx, u in enumerate(vuln):
    if good_storm_tweets(u) > 2:
        good_data.append(u)
    
    sys.stderr.write("\r"+str(idx))

sys.stderr.write("\r"+str(len(good_data))+"                             ")

for uName in sorted([u.user[0].lower() for u in good_data]):
    print(uName)

208

_jacksonvillefl
acheise1
aimsterskitz0rz
baristaonduty
bossladyterrie
bykimberlyjane
cainjax
cole513
crprze
dancetrancejax
devouringjax
dhallii65
djmoneygud
dpfrazier22
efgcouture
fmcnajobs
freakville03
genedotcom
geolocationms
grandivacooks
iampoeticsent
iembot_jax
infinitybeautyl
infosrv
iteeroy
jacksonvillecp
jagator21
jagdrummer
jagsbethany
jagspoolboy
jamiescottsmith
jasoncrownover
jaxbizexchange
jeffreybezore
jessicaj4488
jkolessar
jnfpsd
jobsdirectjax
josephineshaute
katmeredith
khirevenge
kourtneygrey
lep_stewart
lucasbelue
maxcieen
mztonimichelle
odd_rod
p_smoov
phatgyrlsnoop
prettyricky_bdm
raineydacutie
samjuneau
sbcjacksonville
seibellaimage
smilesspamadame
soponor
stephaniedanle1
tarahcoastal
tenleydietrich
therealdemetrus
tlhodo
totaltrafficjax
victorriverajr
wadehtaylor
whats_uptuck
wnsantos


20921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524666                             

# Create rules file from UID