# Are users home locations in Vulnerable Locations?

In [20]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [21]:
input_directory  = "/data/chime/geo2/PROCESSED/NJ/SouthernBarrier_Stage2/"
output_directory = "/data/chime/geo2/PROCESSED/NJ/SouthernBarrier_Stage3/"
zoneGeometry     = "../EvacuationZones/NewJersey/NJ_SouthernBarrier.shp"

#### Import the ZoneA Geometry

In [22]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open(zoneGeometry,'r')
pol = c.next(); zone = shape(pol['geometry']).buffer(0); zone.is_valid

  This is separate from the ipykernel package so we can avoid doing imports until


True

#### Import the user metadata DF (Phasing this out)

In [23]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [24]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,home_cluster_coords,tweets,uid,user
0,1.0,"{""type"": ""Point"", ""coordinates"": [-74.90687011...",3601,627593291,Cwalks3
1,1.0,"{""type"": ""Point"", ""coordinates"": [-74.17527019...",2495,37794867,SAVKANG
2,2.0,"{""type"": ""Point"", ""coordinates"": [-74.72309872...",2173,339306571,Shell_Babii


## 0. Import all of the individual user dataframes

In [25]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 100 users in /data/chime/geo2/PROCESSED/NJ/SouthernBarrier_Stage2/


In [26]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [27]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 100, 100%

In [28]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [29]:
x = users[10]
print(len(x))
x.head(1)

997


Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,-1,,"[-74.82572468, 38.98089102]",2012-09-02 13:18:56+00:00,9,,POINT (-74.82572467999999 38.98089102),1.0,,It's soo boring here until @cujokevlaff gets here,,242250258040647680,69482115,Korobellis20


In [30]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zone)

True

## 1. Determine who's home cluster center is in ZONE

In [31]:
def insideZone(p):
    if p==None:
        return False
    else:
        return p.within(zone)

In [32]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZone(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 49 vulnerable users and 51 non-vulnerable

In [33]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [26]:
def safe_mapping(p):
    if p==None or np.isnan(p).any():
        return None
    else:
        return mapping(p)
def safe_json_export(args):
    df, path = args
    df = df.copy()
    uName = df.head(1).user.values[0].lower()
    df['date'] = df['date'].apply(lambda t: datetime.datetime.strftime(t,'%Y-%m-%dT%H:%M:%SZ'))

    clean = df.where((pd.notnull(df)), None)
    geojson = {"type":"FeatureCollection","features":[]}
    for _, row in clean.iterrows():
        geom = safe_mapping(row.geometry)
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':row.to_dict()
                    }
        del feature['properties']['geometry']
        geojson['features'].append(feature)
    
    with open(path+"/"+uName+'.geojson','w') as oFile:
        json.dump(geojson, oFile) 

In [34]:
for idx, u in enumerate(vuln):
    safe_json_export((u,output_directory))
    sys.stderr.write("\r{0} processed".format(idx+1))

NameError: name 'safe_json_export' is not defined

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [35]:
for uName in sorted([u.user[0].lower() for u in vuln]):
    print(uName)

08243
adtwenty7
aimee_sh
alisonvitti
amandastilts
ameliaianiro
amkowski11
appiemallous
asap_donahue
britfreds
brucknerchase
chippiesue
cookecapemay
coolinlike_
cwalks3
daniellemillen1
devindrich28
elizuhbitch
greekiegrl25
gregparadee
haley__brooke
hewitt_ray
jboraski
jdfromugc
jobon0
kcgirl2003
korobellis20
kristensully88
lalaurenashleyy
leaahbitchh
lobsterrob
loper30030x
luis_minor7th
lynsiepatterson
madsmilees
mialpanzini
ottomcvey
realtweet_renna
rufftooth
sammaye
shell_babii
synister3314
th3_stat3m3nt
the_goose_609
thedean24
theescottyutz
tinithatsme
xosarahbearaxo
yoasisyogurt


In [36]:
def good_storm_tweets(userDF):
    return len(userDF.query("date > 201210300000 & date < 201210310000")) 
    
good_data = []
res = []
for idx, u in enumerate(vuln):
    if good_storm_tweets(u) > 2:
        good_data.append(u)
    
    sys.stderr.write("\r"+str(idx))

print(str(len(good_data)))
print()
for uName in sorted([u.user[0].lower() for u in good_data]):
    print(uName)

0123456789101112131415161718192021222324252627

24

adtwenty7
amandastilts
ameliaianiro
amkowski11
asap_donahue
britfreds
coolinlike_
cwalks3
devindrich28
elizuhbitch
gregparadee
haley__brooke
jobon0
korobellis20
lobsterrob
lynsiepatterson
madsmilees
mialpanzini
ottomcvey
realtweet_renna
rufftooth
th3_stat3m3nt
the_goose_609
xosarahbearaxo


282930313233343536373839404142434445464748

# Create rules file from UID

In [None]:
rules = []
these_rules = []
for idx,u in enumerate(vuln):

    rule = "from:"+u.uid[0]
    these_rules.append(rule)
    
    if idx%25==0 and idx>0:
        rules.append(" OR ".join(these_rules))
        these_rules = []

output = []
for r in rules:
    output.append({"value":r})

with open('../../GNIP/Sandy/NJ_GeoVulnerable_Contextual/rules.json','w') as oFile:
    json.dump(output, oFile)