# Are users home locations in Vulnerable Locations?

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [2]:
input_directory  = "/data/chime/geo2/PROCESSED/NJ/NorthernBarrier_Stage2/"
output_directory = "/data/chime/geo2/PROCESSED/NJ/NorthernBarrier_Stage3/"
zoneGeometry     = "../EvacuationZones/NewJersey/NJ_NorthernBarrier.shp"

#### Import the ZoneA Geometry

In [3]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open(zoneGeometry,'r')
pol = c.next(); zone = shape(pol['geometry']).buffer(0); zone.is_valid

  This is separate from the ipykernel package so we can avoid doing imports until


True

#### Import the user metadata DF (Phasing this out)

In [4]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [5]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,home_cluster_coords,tweets,uid,user
0,1.0,"{""type"": ""Point"", ""coordinates"": [-73.69247457...",14231,75153082,Andrewthemark
1,1.0,"{""type"": ""Point"", ""coordinates"": [-74.17544402...",3115,250117107,AmandaaTar
2,1.0,"{""type"": ""Point"", ""coordinates"": [-74.06380627...",2885,37586102,OfAllThingsAli


## 0. Import all of the individual user dataframes

In [6]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 601 users in /data/chime/geo2/PROCESSED/NJ/NorthernBarrier_Stage2/


In [7]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [8]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 601, 100%%

In [9]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [10]:
x = users[100]
print(len(x))
x.head(1)

427


Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,1,"{""type"": ""Point"", ""coordinates"": [-73.98805997...","[-73.98840071, 40.29086906]",2012-10-07 18:55:53+00:00,10,,POINT (-73.98840070999999 40.29086906),1.0,,What to wear tonight,,255018629521874945,271088738,TasteMy_Ink


In [11]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zone)

False

## 1. Determine who's home cluster center is in ZONE

In [12]:
def insideZone(p):
    if p==None:
        return False
    else:
        return p.within(zone)

In [13]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZone(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 304 vulnerable users and 297 non-vulnerable

In [14]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [15]:
def safe_mapping(p):
    if p==None or np.isnan(p).any():
        return None
    else:
        return mapping(p)
def safe_json_export(args):
    df, path = args
    df = df.copy()
    uName = df.head(1).user.values[0].lower()
    df['date'] = df['date'].apply(lambda t: datetime.datetime.strftime(t,'%Y-%m-%dT%H:%M:%SZ'))

    clean = df.where((pd.notnull(df)), None)
    geojson = {"type":"FeatureCollection","features":[]}
    for _, row in clean.iterrows():
        geom = safe_mapping(row.geometry)
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':row.to_dict()
                    }
        del feature['properties']['geometry']
        geojson['features'].append(feature)
    
    with open(path+"/"+uName+'.geojson','w') as oFile:
        json.dump(geojson, oFile) 

In [14]:
for idx, u in enumerate(vuln):
    safe_json_export((u,output_directory))
    sys.stderr.write("\r{0} processed".format(idx+1))

NameError: name 'safe_json_export' is not defined

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [15]:
for uName in sorted([u.user[0].lower() for u in vuln]):
    print(uName)

2caseface7
_asvplady
_daniellevaldez
acwelch
alawsonn
alexis_seitz
alexrosenkranz
allii_95
allitimms
amandawehnke
amberaalonzo
ameezys_girl_74
americabheleem
amyy1026
amyywalshh
andrew821
angelmariex0
angie_was_heree
aprilydavis
arexharris
arozcoy
ashleyguy__
asianbrizl3
ay_cayzie
ayo_tahj
ayymaloney
barbershairport
bbygrl_xoxo
bbytripk
belmardays
benirvine
bennehx3
bfagan6
bigffrank
billyhoward423
bkhomesteader
bouchersteph13
brian_perks
brii_fi
briiiv
brittni_linn
brooklynbridgex
broxlan
brycenihill
c_it_my_way
calllmejas_
cameren6
camposr1979
caprimcqueen
carleylarrison
carlosmorillo
caseadilia
catherinep3rry
catymogo
chandlernichols
charlottenagy
chettyoubetcha
chipzsullivan
chris22_steve
chrisalicious85
chrissie115
claire_reffitt
cmccluskey18
coastalartists
comfortsleepnj
connor_jryan
coopersteinjess
courtneyhabick
cravejams
crystalcadalzo
crystaldmac
d_fitz_187
daddyjerzey
dan_lukawski
dannielleung
david_pascale
debhanaway
deniseanndixon
dentinodana
dev_burns
devenmccarthy
dinama

In [18]:
len(vuln)

304

In [18]:
def good_storm_tweets(userDF):
    return len(userDF.query("date > 201210300000 & date < 201210310000")) 
    
good_data = []
res = []
for idx, u in enumerate(vuln):
    if good_storm_tweets(u) > 2:
        good_data.append(u)
    
    sys.stderr.write("\r"+str(idx))

print()
print(str(len(good_data)))
for uName in sorted([u.user[0].lower() for u in good_data]):
    print(uName)


258


72
acwelch
alexis_seitz
allii_95
amyywalshh
ashleyguy__
ayo_tahj
barbershairport
brian_perks
briiiv
brooklynbridgex
c_it_my_way
carleylarrison
caseadilia
catherinep3rry
chandlernichols
coopersteinjess
cravejams
crystaldmac
devenmccarthy
djphilnappen
dominicksolazzo
donaldglenn12
dwells6222
eatmeimasian
faithnapolitano
filmfresser
frank_spin
fuentes_freddy
giggleyjordan
godsfetus
gotmoney1
jaclyn_desi
jeffscottgould
jennnelli
jennsurvive
jillianlgreen
jsthunder25
kaitlinedgerly
karlymcnamara
katmaloney1
kellycsogi
kellymackiee
laurensanti18
laurentenenbaum
leeannderthol
leemitchh
lennyseverino
lya_anderson
mad_ballerr_max
mariabmontgum
mattphadams
mike_caprio
missquixotic
morgandube
mynameisluissss
neverenoughmktg
nickbreezyyy
niiicolecapurso
ofallthingsali
pg_yolo
pjkelly18
rebecccamarilyn
rydoyylee
samk732
sarnooo
seanpkennedy2
stephdigiorgio
tayylloor922
theepinkprinces
theonlyjone
yussfigueroa
zackhoff3


259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303

# Create rules file from UID

In [19]:
rules = []
these_rules = []
for idx,u in enumerate(vuln):

    rule = "from:"+u.uid[0]
    these_rules.append(rule)
    
    if idx%25==0 and idx>0:
        rules.append(" OR ".join(these_rules))
        these_rules = []

output = []
for r in rules:
    output.append({"value":r})

with open('../../GNIP/Sandy/NJ_GeoVulnerable_Contextual/rules.json','w') as oFile:
    json.dump(output, oFile)