# Are users home locations in Vulnerable Locations?

In [13]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [14]:
input_directory  = "/data/chime/geo2/PROCESSED/NYC/ZoneA_Stage2/"
output_directory = "/data/chime/geo2/PROCESSED/NYC/ZoneA_Stage3/"
zoneGeometry     = "../EvacuationZones/NewYorkCity/EvacZoneA.shp"

#### Import the ZoneA Geometry

In [15]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open(zoneGeometry,'r')
pol = c.next(); zone = shape(pol['geometry']).buffer(0); zone.is_valid

True

#### Import the user metadata DF (Phasing this out)

In [16]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [17]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,home_cluster_coords,tweets,uid,user
0,1.0,"{""type"": ""Point"", ""coordinates"": [-73.69247457...",14231,75153082,Andrewthemark
1,1.0,"{""type"": ""Point"", ""coordinates"": [-73.82028165...",4934,54342579,frankieciv608
2,1.0,"{""type"": ""Point"", ""coordinates"": [-74.19745138...",4566,45193878,NewYorkPuck


## 0. Import all of the individual user dataframes

In [18]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 1917 users in /data/chime/geo2/PROCESSED/NYC/ZoneA_Stage2/


In [19]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [20]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 1917, 100%

In [24]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [25]:
x = users[100]
print(len(x))
x.head(1)

848


Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,1,"{""type"": ""Point"", ""coordinates"": [-74.08464905...","[-74.08390504, 40.60790322]",2012-09-01 00:33:31+00:00,5,,POINT (-74.08390504 40.60790322),1.0,,I love mashed potatoes !!!!!“@crissygee: well ...,,241695245476171776,256365699,richietymee


In [26]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zone)

False

## 1. Determine who's home cluster center is in ZONE

In [27]:
def insideZone(p):
    if p==None:
        return False
    else:
        return p.within(zone)

In [28]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZone(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 233 vulnerable users and 1684 non-vulnerable

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [29]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

In [17]:
def safe_mapping(p):
    if p==None or np.isnan(p).any():
        return None
    else:
        return mapping(p)
def safe_json_export(args):
    df, path = args
    df = df.copy()
    uName = df.head(1).user.values[0].lower()
    df['date'] = df['date'].apply(lambda t: datetime.datetime.strftime(t,'%Y-%m-%dT%H:%M:%SZ'))

    clean = df.where((pd.notnull(df)), None)
    geojson = {"type":"FeatureCollection","features":[]}
    for _, row in clean.iterrows():
        geom = safe_mapping(row.geometry)
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':row.to_dict()
                    }
        del feature['properties']['geometry']
        geojson['features'].append(feature)
    
    with open(path+"/"+uName+'.geojson','w') as oFile:
        json.dump(geojson, oFile) 

In [18]:
for idx, u in enumerate(vuln):
    safe_json_export((u,output_directory))
    sys.stderr.write("\r{0} processed".format(idx+1))

233 processed

In [36]:
vuln[100].head(1)

Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,-1,,"[-74.07549477, 40.64054012]",2012-09-04 12:06:17+00:00,2,,POINT (-74.07549477000001 40.64054012),3.0,,http://t.co/dPXe0sdU\r\rI HAVE A DREAM TOO; Ma...,,242956743615852544,151639074,UNIVER_SOUL


# This is more users than I'd hoped, let's filter by a few variables

In [40]:
#SANDY
_landfall_str = '201210300000' # Remember, this is UTC
_start_str    = '201210290000' # (ie when was the evacuation ordered (8pm the 28th)
_end_str      = '201210310000'
_landfall = pd.Timestamp(_landfall_str)
_start    = pd.Timestamp(_start_str)
_end      = pd.Timestamp(_end_str)

In [46]:
def good_storm_tweets(userDF):
    return len(userDF.query("date > 201210300000 & date < 201210310000")) 
    
good_data = []
res = []
for idx, u in enumerate(vuln):
    if good_storm_tweets(u) > 2:
        good_data.append(u)
    
    sys.stderr.write("\r"+str(idx))

186

[]


187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232

In [47]:
len(good_data)

74

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [48]:
for uName in sorted([u.user[0].lower() for u in good_data]):
    print(uName)

_an_oak_tree_
adrianamisoul
afranks3
agreatbigcity
amanda_xtelle
ariramku
arnellmilton
bobspivak1
brendad1121
brianaaanicolee
carminenyc
cindychipz
ckanal
cooper_smith
danaamathews
dommydom24
eddiegeenyc
eelain212
elisesp
ericabrooke12
faridkader
fdnybagpiper
gabesantacruz10
garyalonynyc
georgieeeninjaa
gkor29
honeyberk
ikebrooker
ivanper4
jahmezzdagawd
jameslkimmel
jamesmarotta_
jamiealexandraa
jcelona_
justmealiseo
kat_pugacheva
lalahearts
laurakazam
laurenatkiehls
lightoutsrock
lindseyhankes
lmoskus
lucida_console
lutherriggs_dj
maderised
marisa_fuller
marky_mark34
max_not_mark
mikedizon
mrspuertorico
myluvisking
mzmimi_82
nmassa1208
noahsussman
nzavaa
pattyyunen
rafat
raydelrae
readyrock7
realjustinkim
realsarp
rockawaytrading
sarahphara_tw
sephology
sfloridia98
skiftnews
smoshysydney
sotnakny
stepliana
therealgowanus
thisisdanstweet
unclonghorn
zacrivera
zaffi


In [18]:
len(vuln)

356

# Create rules file from UID

In [19]:
rules = []
these_rules = []
for idx,u in enumerate(vuln):

    rule = "from:"+u.uid[0]
    these_rules.append(rule)
    
    if idx%25==0 and idx>0:
        rules.append(" OR ".join(these_rules))
        these_rules = []

output = []
for r in rules:
    output.append({"value":r})

with open('../../GNIP/Sandy/NJ_GeoVulnerable_Contextual/rules.json','w') as oFile:
    json.dump(output, oFile)