# Are users home locations in Vulnerable Locations?

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [2]:
input_directory  = "/data/chime/geo2/PROCESSED/NJ/CentralInterior_Stage2/"
output_directory = "/data/chime/geo2/PROCESSED/NJ/CentralInterior_Stage3/"
zoneGeometry     = "../EvacuationZones/NewJersey/NJ_CentralInterior.shp"

#### Import the ZoneA Geometry

In [3]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open(zoneGeometry,'r')
pol = c.next(); zone = shape(pol['geometry']).buffer(0); zone.is_valid

  This is separate from the ipykernel package so we can avoid doing imports until


True

#### Import the user metadata DF (Phasing this out)

In [4]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [5]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,home_cluster_coords,tweets,uid,user
0,1.0,"{""coordinates"": [-73.69247457249924, 40.673434...",14231,75153082,Andrewthemark
1,2.0,"{""coordinates"": [-74.40499262545818, 39.386719...",3857,318103926,lanceeryann
2,1.0,"{""coordinates"": [-73.81296028845227, 40.776988...",3778,190488649,giaco1025


## 0. Import all of the individual user dataframes

In [6]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 196 users in /data/chime/geo2/PROCESSED/NJ/CentralInterior_Stage2/


In [7]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [8]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 196, 100%%

In [9]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [10]:
x = users[100]
print(len(x))
x.head(1)

186


Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,1,"{""coordinates"": [-74.47722239625, 39.367818025...","[-74.47627774, 39.36818787]",2012-09-27 17:52:56+00:00,4,,POINT (-74.47627774 39.36818787),2.0,,get me the fuck out of here,,251378905343213568,368597676,lexi_derrickson


In [11]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zone)

False

## 1. Determine who's home cluster center is in ZONE

In [12]:
def insideZone(p):
    if p==None:
        return False
    else:
        return p.within(zone)

In [13]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZone(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 93 vulnerable users and 103 non-vulnerable

In [14]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [16]:
def safe_mapping(p):
    if p==None or np.isnan(p).any():
        return None
    else:
        return mapping(p)
def safe_json_export(args):
    df, path = args
    df = df.copy()
    uName = df.head(1).user.values[0].lower()
    df['date'] = df['date'].apply(lambda t: datetime.datetime.strftime(t,'%Y-%m-%dT%H:%M:%SZ'))

    clean = df.where((pd.notnull(df)), None)
    geojson = {"type":"FeatureCollection","features":[]}
    for _, row in clean.iterrows():
        geom = safe_mapping(row.geometry)
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':row.to_dict()
                    }
        del feature['properties']['geometry']
        geojson['features'].append(feature)
    
    with open(path+"/"+uName+'.geojson','w') as oFile:
        json.dump(geojson, oFile) 

In [15]:
for idx, u in enumerate(vuln):
    safe_json_export((u,output_directory))
    sys.stderr.write("\r{0} processed".format(idx+1))

NameError: name 'safe_json_export' is not defined

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [16]:
for uName in sorted([u.user[0].lower() for u in vuln]):
    print(uName)

alyssagabrielaa
amirtoptier
arielwoodson
b_weezy
badblasian
benfwillems
bennettsrants
benz2020
birddreams
blarro91
bobbytpodg
bree_gardner
bry_coons
bubsxgreen
chodemuncher96
coolkym
devashanandpara
elbparker
enancemorbillz
espritnomade
floxily
fornicateera
gmillett
greekconcertpro
haleymight
heather_joness
heyimgina
hottoddy___
iamhannahcesen
iilaur3nn
imeelicioustea
james_thompsonn
janeecouture
jason613wilson
jeff_revelle
jeremygano
jesterplay
joel__22
john_scalera
jordandebarth
juliannarose3x
kempkempoh
king_obey
koranny_609
laurenxturner
leannesamm
louie_orlando
marieshantee
mariog263
maslife
melanieevelyna
mikeyg41393
mirandalamey
mmirags73
molllyx0
ohhhmister
pattilicious62
pauldooner
prettyjocky
rachcakesxo
rachelambrose1
rayellisiii
rfugee08
rhifabel
rissassuite
rozay_che
sblong007
semievilgenius
sjersey_girl
snukumsss
spilup9
summers_shaun
surfer4lifenj
susanrdunn
sweetdaddyds
taaywalsh
tayfaisst10
tayosterman
tcrimz
teatimelovely
thatbitch_mamas
theckman
thelizziebean
therigh

In [17]:
len(vuln)

93

In [18]:
def good_storm_tweets(userDF):
    return len(userDF.query("date > 201210300000 & date < 201210310000")) 
    
good_data = []
res = []
for idx, u in enumerate(vuln):
    if good_storm_tweets(u) > 2:
        good_data.append(u)
    
    sys.stderr.write("\r"+str(idx))

sys.stderr.write("\r"+str(len(good_data)))
for uName in sorted([u.user[0].lower() for u in good_data]):
    print(uName)

48

alyssagabrielaa
amirtoptier
arielwoodson
bennettsrants
blarro91
bree_gardner
bry_coons
devashanandpara
enancemorbillz
espritnomade
fornicateera
gmillett
haleymight
heather_joness
heyimgina
hottoddy___
iamhannahcesen
iilaur3nn
james_thompsonn
jeff_revelle
jesterplay
john_scalera
juliannarose3x
laurenxturner
leannesamm
marieshantee
maslife
pattilicious62
prettyjocky
rayellisiii
rfugee08
semievilgenius
sjersey_girl
surfer4lifenj
tayfaisst10
tcrimz
thatbitch_mamas
thelizziebean
therightist
tubbysofly
xoxolulubrownn
yamzdoee
zaksetzer


495051525354555657585960616263646566676869707172737475767778798081828384858687888990919243

# Create rules file from UID

In [20]:
rules = []
these_rules = []
for idx,u in enumerate(vuln):

    rule = "from:"+u.uid[0]
    these_rules.append(rule)
    
    if idx%25==0 and idx>0:
        rules.append(" OR ".join(these_rules))
        these_rules = []

output = []
for r in rules:
    output.append({"value":r})

with open('../../GNIP/Sandy/NJ_GeoVulnerable_Contextual/rules.json','w') as oFile:
    json.dump(output, oFile)