# Turn a directory into something that can be easily visualized on a map!

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np;
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime

In [2]:
input_directory  = "/data/chime/geo/sandy_new_jersey_geovulnerable_contextual_stage2"
output_directory = "/data/www/chime/movement-derivation/sandy_new_jersey_geo_vulnerable_contextual"

In [3]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 329 users in /data/chime/geo/sandy_new_jersey_geovulnerable_contextual_stage2


In [4]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    
    df = pd.DataFrame(tweets)
    
    df = df.sort_values(by='date')
    
    first_date = df['date'][0]
    
    df['time'] = df['date'].apply(lambda x: datetime.datetime.strftime(x, '%Y-%m-%dT%H:%M:%SZ'))
    df['h']    = df['date'].apply(lambda x: int((x - first_date).total_seconds()/3600))
    df['m']    = df['date'].apply(lambda x: int((x - first_date).total_seconds()/60))
    if not q is None:
        q.put(1)
    return df

In [5]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 329, 100%

# Write `geojson` files for the web

In [6]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

In [7]:
def write_geojson(args):
    u, output_directory, q = args
    file = u['user'][0].lower()
    geojson = {'type':'FeatureCollection', 'features':[]}
    for _, row in u.where(pd.notnull(u), None).iterrows():
        if pd.notnull(row.geometry):
            geom = mapping(row.geometry)
        else:
            geom = None
        feature = {'type':'Feature',
                   'geometry':geom,
                   'properties':{'time':row['time'],
                                 'h':row['h'],
                                 'm':row['m'],
                                 'user':row['user'],
                                 'text':row['text'],
                                 's':row['speed'],
                                 'c':row['cluster']
                                 }
            }
        geojson['features'].append(feature)
    
    with open(output_directory+"/"+file+".geojson",'w') as oFile:
        json.dump(geojson,oFile)
        
    if q is not None:
        q.put(1)
    return 1

In [8]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, output_directory, q) for i in users]
result = p.map_async(write_geojson, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
result.get()
p.close()

Processed: 329, 100%

In [9]:
l = []
for u in users:
    l.append(u.user[0].lower())

In [10]:
for x in sorted(l):
    print(x)

08243
1stfitnessmc
_daniellevaldez
aclive35
adtwenty7
aimee_sh
alanjonesbeats
alex_florke
alexrosenkranz
alreadywifedup1
alyssagabrielaa
ameliaianiro
americabheleem
amkowski11
amyy1026
amyywalshh
angie_was_heree
anniexdarling
appiemallous
aprilydavis
arozcoy
aschwerzy
ayo_tahj
b_bott84
b_mazzz
badblasian
barbershairport
bbygrl_xoxo
bbytripk
beanie5469
bellax3
belmardays
benirvine
billyhoward423
bkhomesteader
bleedblue0415
brian_perks
brii_fi
brittni_linn
broxlan
brucknerchase
brycenihill
c_it_my_way
caittfoley
calllmejas_
camposr1979
caseadilia
catymogo
chandlernichols
cheerbowsndhoes
chettyoubetcha
chippiesue
chris22_steve
chrisalicious85
chrissie115
chrisstineey
chuck430
chugggss
claire_reffitt
cmccluskey18
coachhollywoodp
coastalartists
cookecapemay
coolinlike_
coolkym
courtneyhabick
cravejams
creamstaffa
cwalks3
cybernjm
d_fitz_187
daddyjerzey
damageathletics
daniellemillen1
darrianrojas
debhanaway
deelooch
defrizzel
dinamarie8
djphilnappen
djsonatra
dominicksolazzo
donovanism
doub