# Are users home locations in Vulnerable Locations?

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [12]:
input_directory  = "/data/chime/geo2/PROCESSED/FL/indian_martin_lucie_counties_Stage2/"
output_directory = "/data/chime/geo2/PROCESSED/FL/indian_martin_lucie_counties_Stage3/"
zoneGeometry     = "../EvacuationZones/Florida/martin_indian_lucie_hull.geojson"

#### Import the ZoneA Geometry

In [14]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open(zoneGeometry,'r')
zone = shape(json.load(open(zoneGeometry,'r'))['geometry']); zone.is_valid

True

#### Import the user metadata DF (Phasing this out)

In [15]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [16]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,home_cluster_coords,tweets,uid,user
0,134.0,"{""coordinates"": [-78.01829999999997, 33.914999...",3523,4754740136,ebbtidebot
1,7.0,"{""coordinates"": [-80.19179020000009, 25.761679...",1786,563319506,CVSHealthJobs
2,51.0,"{""coordinates"": [-77.36635380000003, 35.612661...",1691,3144822634,SpeedwayJobs


## 0. Import all of the individual user dataframes

In [17]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 85 users in /data/chime/geo2/PROCESSED/FL/indian_martin_lucie_counties_Stage2/


In [18]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [19]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 85, 100%

In [20]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [21]:
x = users[10]
print(len(x))
x.head(1)

534


Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,1,"{""coordinates"": [-80.3822, 27.707399999999907]...","[-80.3822, 27.7074]",2016-08-01 08:08:31+00:00,1,,POINT (-80.3822 27.7074),9.0,,#My#daily #Inspiration\nCourage is what it tak...,,760024397801410560,28884479,DrJosefina


In [22]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zone)

False

## 1. Determine who's home cluster center is in ZONE

In [23]:
def insideZone(p):
    if p==None:
        return False
    else:
        return p.within(zone)

In [24]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZone(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 40 vulnerable users and 45 non-vulnerable

In [25]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [26]:
def safe_mapping(p):
    if p==None or np.isnan(p).any():
        return None
    else:
        return mapping(p)
def safe_json_export(args):
    df, path = args
    df = df.copy()
    uName = df.head(1).user.values[0].lower()
    df['date'] = df['date'].apply(lambda t: datetime.datetime.strftime(t,'%Y-%m-%dT%H:%M:%SZ'))

    clean = df.where((pd.notnull(df)), None)
    geojson = {"type":"FeatureCollection","features":[]}
    for _, row in clean.iterrows():
        geom = safe_mapping(row.geometry)
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':row.to_dict()
                    }
        del feature['properties']['geometry']
        geojson['features'].append(feature)
    
    with open(path+"/"+uName+'.geojson','w') as oFile:
        json.dump(geojson, oFile) 

In [27]:
for idx, u in enumerate(vuln):
    safe_json_export((u,output_directory))
    sys.stderr.write("\r{0} processed".format(idx+1))

40 processed

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [28]:
for uName in sorted([u.user[0].lower() for u in vuln]):
    print(uName)

akafitvero
avantking
badassyogini
bandajoan
bansheeprincess
bevsmithsays
bonusmum
discoverelc
discovermartin
dlhanna62
drjosefina
excoastal
iamdonewinn
iistaysmiling
islamoradabeer
julietelken
kellygirl2004
kellyhannavideo
kimspearsgroup
michaelwagner99
mumondrun
natehphoto
navysealmuseum
patisserievb
queenja2009
rdweaversr
rossg9989
seanclaesgens
springnettles
stevepfiester
styleencorestua
tbatikdesign
tchomesonline
teamsidekick
tonymbaker
trainervickk
vero_vine_
vinman1348
visitthesouth
willjack_u


In [29]:
len(vuln)

40

In [31]:
def good_storm_tweets(userDF):
    return len(userDF.query("date > 201610030000 & date < 201610050000")) 
    
good_data = []
res = []
for idx, u in enumerate(vuln):
    if good_storm_tweets(u) > 2:
        good_data.append(u)
    
    sys.stderr.write("\r"+str(idx))

sys.stderr.write("\r"+str(len(good_data))+"                             ")

for uName in sorted([u.user[0].lower() for u in good_data]):
    print(uName)

avantking
bandajoan
bansheeprincess
bonusmum
drjosefina
kellygirl2004
styleencorestua
trainervickk
vero_vine_


01234567891011121314151617181920212223242526272829303132333435363738399                             

# Create rules file from UID