# Are users home locations in Zone A?

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np; import geopandas as gpd
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime
from bson import json_util

In [3]:
input_directory = "/data/chime/geo/sandy_zone_a/stage_2_temporal_clusters_with_home"
output_directory = "/data/chime/geo/sandy_zone_a/geo_vulnerable_users"

#### Import the ZoneA Geometry

In [4]:
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
c = fiona.open('../ZoneA_Geometry/ZoneA/OGRGeoJSON.shp','r')
pol = c.next(); zoneA = shape(pol['geometry']).buffer(0); zoneA.is_valid

True

#### Import the user metadata DF

In [11]:
_user_meta = pd.read_json(input_directory+'/temporal_clustered_user_meta.json')

In [14]:
_user_meta = _user_meta.sort_index()
_user_meta.head(3)

Unnamed: 0,home_cluster,tweets,uid,user
0,1.0,72,165486875,Alliegallo
1,,35,20529586,AlexanderRea
2,,32,20088745,AdamGabbatt


## 0. Import all of the individual user dataframes

In [21]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 3 users in /data/chime/geo/sandy_zone_a/stage_2_temporal_clusters_with_home


In [22]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return gpd.GeoDataFrame(tweets)

In [23]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in[:200]]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 3, 100%

In [24]:
users.sort( key=lambda x: len(x), reverse=True ) 

`users` is an array of user dataframes. Now find which users have _home locations_ in Zone A

In [40]:
def get_home_cluster_center(x):
    return shape(json.loads(x.query('cluster=='+str(x.home_cluster_id.values[0])).cluster_center.values[0]))

get_home_cluster_center(users[1])

<shapely.geometry.point.Point at 0x7fe3060b9320>

## Determine who's home cluster center is in Zone A

In [41]:
def insideZoneA(p):
    if p==None:
        return False
    else:
        return p.within(zoneA)

In [66]:
_user_meta['inZoneA'] = _user_meta.apply(lambda row: insideZoneA(row['hc_center']), axis=1)

In [67]:
_user_meta.query('inZoneA').head(3)

Unnamed: 0,HomeCluster,tweets,user,hc_center,inZoneA
4,2.0,3462,NZavaa,POINT (-73.80392010800064 40.81081586865448),True
15,2.0,2573,_an_oak_tree_,POINT (-74.00711654755784 40.74712723907435),True
42,2.0,1718,MyLuvisKING,POINT (-74.01307974015074 40.71548709588853),True


In [68]:
list(_user_meta.query('inZoneA').head(5).hc_center.apply(lambda x: json.dumps(mapping(x))))

['{"coordinates": [-73.80392010800064, 40.81081586865448], "type": "Point"}',
 '{"coordinates": [-74.00711654755784, 40.74712723907435], "type": "Point"}',
 '{"coordinates": [-74.01307974015074, 40.715487095888534], "type": "Point"}',
 '{"coordinates": [-73.79006214377567, 40.85626400344152], "type": "Point"}',
 '{"coordinates": [-74.0056795964912, 40.74782570175439], "type": "Point"}']

In [20]:
vulnerable_users = list(_user_meta.query('inZoneA').user)

In [None]:
os.mkdir('../working_data/geovulnerable_users')

In [214]:
from bson import json_util
for idx, u in enumerate(users):
    if u.user[0] in vulnerable_users:
        with open('../working_data/geovulnerable_users/'+u.user[0].lower()+'.geojson','w') as oFile:
            oFile.write(u.to_json(default=json_util.default))
    sys.stderr.write("\r{0} processed".format(idx))

4858 processed

# Write out simpler GeoJSON for visualizations of GeoVulnerable

In [34]:
geovulnerable_users = []
for idx, u in enumerate(users):
    if u.user[0] in vulnerable_users:
        geovulnerable_users.append(u)
    sys.stderr.write("\r{0} processed".format(idx))

4858 processed

In [35]:
len(geovulnerable_users)

118

In [36]:
os.mkdir('/data/www/jennings/geovulnerable/inZoneA')

In [41]:
def pad0(num):
    if num<10:
        return "0"+str(num)
    else:
        return str(num)

In [52]:
for u in geovulnerable_users:
    file = name = u['user'][0].lower()
    g = u[['geometry','text','user','date']].copy()
    first_date = g['date'][0]
    last_date  = g['date'][len(g)-1]
    g['time'] = g['date'].apply(lambda x: datetime.datetime.strftime(x, '%Y-%m-%dT%H:%M:%SZ'))
    g['h']    = g['date'].apply(lambda x: int((x - first_date).total_seconds()/3600))
    g['m']    = g['date'].apply(lambda x: int((x - first_date).total_seconds()/60))
#     with open("/data/www/jennings/geovulnerable/inZoneA/"+file+".geojson",'w') as oFile:
#         oFile.write(g.to_json(default=json_util.default))
    start = "{0}-{1}-{2}T{3}:{4}".format(first_date.year,pad0(first_date.month),pad0(first_date.day),pad0(first_date.hour),pad0(first_date.minute))
    end   = last_date.date()
    print(name + "\t" +"http://www.townsendjennings.com/geojson-tweets?geojson=http://epic-analytics.cs.colorado.edu:9000/jennings/geovulnerable/inZoneA/{0}.geojson&start={1}&end={2}&unit=minutes".format(file, start, end))

nzavaa	http://www.townsendjennings.com/geojson-tweets?geojson=http://epic-analytics.cs.colorado.edu:9000/jennings/geovulnerable/inZoneA/nzavaa.geojson&start=2012-09-09T19:49&end=2012-11-08&unit=minutes
_an_oak_tree_	http://www.townsendjennings.com/geojson-tweets?geojson=http://epic-analytics.cs.colorado.edu:9000/jennings/geovulnerable/inZoneA/_an_oak_tree_.geojson&start=2012-09-01T00:41&end=2012-11-29&unit=minutes
myluvisking	http://www.townsendjennings.com/geojson-tweets?geojson=http://epic-analytics.cs.colorado.edu:9000/jennings/geovulnerable/inZoneA/myluvisking.geojson&start=2012-10-06T21:56&end=2012-11-29&unit=minutes
brianaaanicolee	http://www.townsendjennings.com/geojson-tweets?geojson=http://epic-analytics.cs.colorado.edu:9000/jennings/geovulnerable/inZoneA/brianaaanicolee.geojson&start=2012-09-01T04:47&end=2012-11-29&unit=minutes
objorionto	http://www.townsendjennings.com/geojson-tweets?geojson=http://epic-analytics.cs.colorado.edu:9000/jennings/geovulnerable/inZoneA/objorionto

In [69]:
_user_meta.query("user=='AdrianaMisoul'")

Unnamed: 0,HomeCluster,tweets,user,hc_center,inZoneA
1730,2.0,102,AdrianaMisoul,POINT (-74.00504726378374 40.70733905216217),True


In [70]:
os.mkdir('/data/www/jennings/geovulnerable/meta')

In [81]:
for idx, u in _user_meta.query('HomeCluster>0').iterrows():
    file = u.user.lower()
    
    featColl = {"type":"Feature","geometry":mapping(u.hc_center),
                "properties":{"user":u.user, "tCount": u.tweets}}
    
    with open("/data/www/jennings/geovulnerable/meta/"+file+".geojson",'w') as oFileMeta:
        json.dump(featColl, oFileMeta)

In [87]:
hc_test = pd.DataFrame(_user_meta.query("HomeCluster>0"))

In [89]:
hc_test.head()

Unnamed: 0,HomeCluster,tweets,user,hc_center,inZoneA
0,1.0,14231,Andrewthemark,POINT (-73.69255456263605 40.67338539101333),False
1,3.0,4934,frankieciv608,POINT (-73.81751579057131 40.82403772085716),False
2,2.0,4566,NewYorkPuck,POINT (-74.19747629434777 40.55514102555725),False
3,1.0,4008,DerekBFelix,POINT (-74.19747539022777 40.55502571900404),False
4,2.0,3462,NZavaa,POINT (-73.80392010800064 40.81081586865448),True


In [90]:
df = gpd.GeoDataFrame(hc_test,geometry='hc_center')

In [93]:
df.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7f94e4bab4a8>

In [94]:
%matplotlib inline

In [99]:
df['text'] = df['user']
df['time'] = df['tweets']

In [100]:
with open('/data/www/jennings/home_locations_1200.geojson','w') as oFile:
    oFile.write(df.to_json())

In [101]:
_users_with_centroids[0].head()

Unnamed: 0,cluster,coords,date,day_cluster,geo_delta,geometry,speed,text,time_delta,user,cluster_center
0,1,"[-73.69149255, 40.67361288]",2012-09-04 00:21:21+00:00,5,,POINT (-73.69149255000001 40.67361288),,Why cant cole be joined?,,Andrewthemark,POINT (-73.69255456263605 40.67338539101333)
1,1,"[-73.69149255, 40.67361288]",2012-09-04 00:22:43+00:00,5,0.0,POINT (-73.69149255000001 40.67361288),0.0,Um... Wut.,82.0,Andrewthemark,POINT (-73.69255456263605 40.67338539101333)
2,1,"[-73.69149255, 40.67361288]",2012-09-04 00:22:57+00:00,5,0.0,POINT (-73.69149255000001 40.67361288),0.0,THEYRE CHANTING LETS GO SHEAMUS!,14.0,Andrewthemark,POINT (-73.69255456263605 40.67338539101333)
3,1,"[-73.69149255, 40.67361288]",2012-09-04 00:24:06+00:00,5,0.0,POINT (-73.69149255000001 40.67361288),0.0,Why are there only 3 commentators in vince's m...,69.0,Andrewthemark,POINT (-73.69255456263605 40.67338539101333)
4,1,"[-73.69153706, 40.67357022]",2012-09-04 00:27:03+00:00,5,6.04912,POINT (-73.69153706 40.67357022),0.034176,Why would they take out lawler and mathews?,177.0,Andrewthemark,POINT (-73.69255456263605 40.67338539101333)


In [102]:
len(_users_with_centroids)

4859