# Turn a directory into something that can be easily visualized on a map!

In [1]:
import pandas as pd; import numpy as np; from multiprocessing import Pool, Manager; import numpy as np;
import fiona, shapely; from osgeo import ogr; from shapely.geometry import mapping, shape
import matplotlib.pyplot as plt; import seaborn as sns
import matplotlib, os, json, sys, time, datetime

In [2]:
input_directory  = "/data/chime/geo/sandy_zone_a/stage_2_temporal_clusters_with_home"
output_directory = "/data/www/chime/tmp"

In [3]:
users_in = sorted(os.listdir(input_directory))
users_in = [x for x in users_in if x != "temporal_clustered_user_meta.json"]
print("Found {0} users in {1}".format(len(users_in), input_directory))

Found 3 users in /data/chime/geo/sandy_zone_a/stage_2_temporal_clusters_with_home


In [4]:
def loader_function(args):
    uFile, path, q = args
    u = json.load(open(path+"/"+uFile,'r'))
    tweets = []
    for t in u['features']:
        if t['geometry']:
            t['properties']['geometry'] = shape(t['geometry'])
        t['properties']['date'] = pd.Timestamp(t['properties']['date'])
        tweets.append(t['properties'])
    q.put(1)
    return pd.DataFrame(tweets)

In [5]:
#Parallel runtime
p = Pool(30)
m = Manager()
q = m.Queue()

args = [(i, input_directory, q) for i in users_in[:200]]
result = p.map_async(loader_function, args)

# monitor loop
while True:
    if result.ready():
        break
    else:
        size = q.qsize()
        sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(size, size/len(args)*100))
        time.sleep(0.5)
sys.stderr.write("\rProcessed: {0}, {1:.3g}%".format(q.qsize(), q.qsize()/len(args)*100))
users = result.get()
p.close()

Processed: 3, 100%

In [6]:
users[0]

Unnamed: 0,cluster,cluster_center,coords,date,day_cluster,geo_delta,geometry,home_cluster_id,speed,text,time_delta,tweet_id,uid,user
0,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1811384, 40.6225762]",2012-09-07 14:28:15+00:00,3,,,1.0,,@NickyRomey and have you had your salad tossed...,,244079641286025217,149765173,_DidIStudder
1,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1811384, 40.6225762]",2012-09-07 14:28:45+00:00,3,0.0,POINT (-74.18113839999999 40.6225762),,0.0,@HSutherland218 thank you Heather :-) have a g...,30.0,244079766200807424,149765173,_DidIStudder
2,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1811384, 40.6225762]",2012-09-07 14:29:03+00:00,3,0.0,POINT (-74.18113839999999 40.6225762),,0.0,@Hargitay1NY thanks liz. Enjoy your weekend,18.0,244079840087646208,149765173,_DidIStudder
3,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1811384, 40.6225762]",2012-09-07 14:29:37+00:00,3,0.0,POINT (-74.18113839999999 40.6225762),,0.0,@linnyitssn thanks buddy. Big Game tonight. Ti...,34.0,244079983973257216,149765173,_DidIStudder
4,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1814229, 40.6224266]",2012-10-27 03:43:27+00:00,6,29.210682,POINT (-74.1814229 40.6224266),,7e-06,@CatRP haven't heard anything yet. I do know i...,4281230.0,262036764653387776,149765173,_DidIStudder
5,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1815029, 40.6223801]",2012-10-27 23:44:03+00:00,11,8.504291,POINT (-74.1815029 40.6223801),,0.000118,@CatRP I hope you are safe when she hits,72036.0,262338903858638848,149765173,_DidIStudder
6,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1815029, 40.6223801]",2012-10-27 23:44:24+00:00,11,0.0,POINT (-74.1815029 40.6223801),,0.0,"@CatRP just south Jersey, it's mandatory",21.0,262338992605908992,149765173,_DidIStudder
7,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1815029, 40.6223801]",2012-10-27 23:46:03+00:00,11,0.0,POINT (-74.1815029 40.6223801),,0.0,While Fla vs UGA was enjoyable. The REAL SEC g...,99.0,262339405958750209,149765173,_DidIStudder
8,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1815029, 40.6223801]",2012-10-28 03:05:03+00:00,12,0.0,POINT (-74.1815029 40.6223801),,0.0,@CatRP kinda like Cashman lol,11940.0,262389486778986496,149765173,_DidIStudder
9,1.0,"{""type"": ""Point"", ""coordinates"": [-74.18144071...","[-74.1815788, 40.6223442]",2012-10-29 04:36:42+00:00,12,7.547879,POINT (-74.1815788 40.6223442),,8.2e-05,@CatRP couldn't agree more. \nTurned out to be...,91899.0,262774939327807488,149765173,_DidIStudder


# Write `geojson` files for the web

In [7]:
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

In [8]:
def pad0(num):
    if num<10:
        return "0"+str(num)
    else:
        return str(num)

In [None]:
for u in users:
    file = name = u['user'][0].lower()
    g = u[['geometry','text','user']].copy()
    first_date = u['date'][0]
    last_date  = u['date'][len(u)-1]
    g['time'] = u['date'].apply(lambda x: datetime.datetime.strftime(x, '%Y-%m-%dT%H:%M:%SZ'))
    g['h']    = u['date'].apply(lambda x: int((x - first_date).total_seconds()/3600))
    g['m']    = u['date'].apply(lambda x: int((x - first_date).total_seconds()/60))
    with open(output_directory+"/"+file+".geojson",'w') as oFile:
        oFile.write(g.to_json())
    start = "{0}-{1}-{2}T{3}:{4}".format(first_date.year,pad0(first_date.month),pad0(first_date.day),pad0(first_date.hour),pad0(first_date.minute))
    end   = last_date.date()
    print(name + "\t" +"http://www.townsendjennings.com/geojson-tweets?geojson=http://epic-analytics.cs.colorado.edu:9000/"+output_directory.replace('/data/www/','')+"/{0}.geojson&start={1}&end={2}&unit=minutes&inspector=true".format(file, start, end))