# Examining the Geographically Vulnerable (v2)

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [2]:
input_directory = "/data/chime/geo2/PROCESSED/NYC/ZoneA_Stage3/"
zoneGeometry     = "../EvacuationZones/NewYorkCity/EvacZoneA.shp"

#### Import the Zone Geometry

In [3]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open(zoneGeometry,'r')
pol = c.next(); zone = shape(pol['geometry']).buffer(0); zone.is_valid

True

## 0. Import all of the individual user dataframes

In [4]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 233 users in /data/chime/geo2/PROCESSED/NYC/ZoneA_Stage3/


In [5]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [6]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 233, 100%

## The `users` array is an array of GeoDataFrames

In [7]:
users.sort( key=lambda x: len(x), reverse=True ) 

In [9]:
user_names = [x.user[0] for x in users]
user_names[:5]

['NZavaa', '_an_oak_tree_', 'MyLuvisKING', 'brianaaanicolee', 'objorionto']

Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80524868, 40.7950463]",2012-09-09 19:49:57+00:00,10,,POINT (-73.80524868000001 40.7950463),2.0,,@DaRealJimmyHils @OnPointDHH lmaooo ummm yes i...,,244885375221379072,284851512,NZavaa
1,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80524798, 40.79504563]",2012-09-09 19:50:29+00:00,10,0.094988,POINT (-73.80524798 40.79504563),,0.002968,@OnPointDHH @DaRealJimmyHils id trust Romo ove...,32.0,244885509946609666,284851512,NZavaa
2,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80524238, 40.7950373]",2012-09-09 19:52:08+00:00,10,1.039314,POINT (-73.80524238 40.7950373),,0.010498,"@Allen_Strk dude hes good, but what has he don...",99.0,244885924939431936,284851512,NZavaa
3,2,"{""type"": ""Point"", ""coordinates"": [-73.80392010...","[-73.8032221, 40.8116525]",2012-09-09 19:52:29+00:00,10,1855.334991,POINT (-73.8032221 40.8116525),,88.349285,@Allen_Strk threat and you cant make it passed...,21.0,244886011421798400,284851512,NZavaa
4,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80479295, 40.79521275]",2012-09-09 19:55:31+00:00,10,1832.792126,POINT (-73.80479295000001 40.79521275),,10.070286,@Allen_Strk i think Matt Ryan is fuckin beast....,182.0,244886776056000512,284851512,NZavaa
5,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80603617, 40.7950805]",2012-09-09 19:59:34+00:00,10,105.682588,POINT (-73.80603617 40.7950805),,0.434908,@Dirty30DaCorta RG3 just anally raped Brees,243.0,244887793178910720,284851512,NZavaa
6,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80557747, 40.7950764]",2012-09-09 20:02:07+00:00,10,38.616172,POINT (-73.80557747 40.7950764),,0.252393,@DaRealJimmyHils @OnPointDHH its the Bills tho...,153.0,244888436710977537,284851512,NZavaa
7,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80551542, 40.79509125]",2012-09-09 20:02:24+00:00,10,5.478170,POINT (-73.80551542000001 40.79509125),,0.322245,@DaRealJimmyHils @OnPointDHH at the end of the...,17.0,244888507770888192,284851512,NZavaa
8,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80526523, 40.79516668]",2012-09-09 20:04:31+00:00,10,22.669726,POINT (-73.80526523 40.79516668),,0.178502,"@DaRealJimmyHils i no what u mean, i aint even...",127.0,244889041399590912,284851512,NZavaa
9,1,"{""type"": ""Point"", ""coordinates"": [-73.80519066...","[-73.80524642, 40.7951754]",2012-09-09 20:05:01+00:00,10,1.856720,POINT (-73.80524642 40.7951754),,0.061891,"@Dirty30DaCorta him and Garcon, are nothing tt...",30.0,244889165563568129,284851512,NZavaa


In [13]:
def get_home_cluster_center(userDF):
    return shape(json.loads(userDF.query('cluster=='+str(userDF.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[0]).within(zone)

False

## 1. Determine who's home cluster center is in ZONE

In [14]:
def insideZone(p):
    if p==None:
        return False
    else:
        return p.within(zone)

In [15]:
vuln = []
non_vuln = []
for idx, u in enumerate(users):
    if (insideZone(get_home_cluster_center(u))):
        vuln.append(u)
    else:
        non_vuln.append(u)
    sys.stderr.write("\r"+str(idx+1))
sys.stderr.write("\rDone...")
sys.stderr.write("Identified {0} vulnerable users and {1} non-vulnerable".format(len(vuln),len(non_vuln)))

Done...Identified 233 vulnerable users and 1684 non-vulnerable

In [16]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

## 2. Write out just the GeoVulnerable, just in case we need them for something later

In [17]:
def safe_mapping(p):
    if p==None or np.isnan(p).any():
        return None
    else:
        return mapping(p)
def safe_json_export(args):
    df, path = args
    df = df.copy()
    uName = df.head(1).user.values[0].lower()
    df['date'] = df['date'].apply(lambda t: datetime.datetime.strftime(t,'%Y-%m-%dT%H:%M:%SZ'))

    clean = df.where((pd.notnull(df)), None)
    geojson = {"type":"FeatureCollection","features":[]}
    for _, row in clean.iterrows():
        geom = safe_mapping(row.geometry)
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':row.to_dict()
                    }
        del feature['properties']['geometry']
        geojson['features'].append(feature)
    
    with open(path+"/"+uName+'.geojson','w') as oFile:
        json.dump(geojson, oFile) 

In [18]:
for idx, u in enumerate(vuln):
    safe_json_export((u,output_directory))
    sys.stderr.write("\r{0} processed".format(idx+1))

233 processed

# 3. Prepare for Analysis

All of these users should already exist in a format prepared for visualizing, these users can be pasted into Google Sheets

In [19]:
for uName in sorted([u.user[0].lower() for u in vuln]):
    print(uName)

831tobias
_an_oak_tree_
adrianamisoul
aeclearwater
afranks3
agate86
agreatbigcity
aladesnr
amanda_xtelle
anessava
annyrr_
antgetsbitches
ariramku
arnellmilton
arojass
aviva4ever
azzi_84
beatzzzz
beliebinfinate
bencashfarouq
bennygordo
bigjoe1022
bobspivak1
bohunkca
brendad1121
brianaaanicolee
bridgetekelly
britescrnmedia
broyumad
bryanfuhr
byronbradshaw
carlysamp
carminenyc
cavemanrobles
celinejade_
cem3
chinkeyeyez3
chissyn
chris_legaspi
chrisrodemeyer
cindychipz
ckanal
cnysurvivor1
continuumcycles
cooper_smith
coreybhale
damianmonzillo
danaamathews
danialvarezpr
dargamogirl
dark_iceberg
dcamz5
dee9996
dem_callmi_tutu
diogomontes
dj2020
dj_vasquezz
dommydom24
downtownmag
drewgould
drwho131
e_m0n3y
eddiegeenyc
edelsingh
eelain212
elisesp
emmarocks
ericabrooke12
erinfetherston
ethanfixell
explicit_clutch
fanlyeduardo
faridkader
fdnybagpiper
followmdm
g0ttalov3him
gabesantacruz10
garyalonynyc
georgiaafowler
georgieeeninjaa
gkor29
goldjenna
gryffindor_kate
haileygilesacts
hairwelove
heath

In [18]:
len(vuln)

356

# Create rules file from UID

In [19]:
rules = []
these_rules = []
for idx,u in enumerate(vuln):

    rule = "from:"+u.uid[0]
    these_rules.append(rule)
    
    if idx%25==0 and idx>0:
        rules.append(" OR ".join(these_rules))
        these_rules = []

output = []
for r in rules:
    output.append({"value":r})

with open('../../GNIP/Sandy/NJ_GeoVulnerable_Contextual/rules.json','w') as oFile:
    json.dump(output, oFile)