In [9]:
import __init__
#
from helping_functions import draw_grid_on_map
#
from taxi_common.file_handling_functions import get_all_files, load_pickle_file
#
import plotly.plotly as py
import pandas as pd
import folium

# Zone generation
* Define zones which split Singapore in grid form
    * 96 columns and 53 rows
* Area of a grid is 0.5km X 0.5km

In [11]:
from taxi_common.sg_grid_zone import get_sg_grid_xy_points
#
x_points, y_points = get_sg_grid_xy_points()
xc, yc = (x_points[0] + x_points[-1]) / float(2), (y_points[0] + y_points[-1]) / float(2)

map_osm = folium.Map(location=[yc, xc], zoom_start=11)
map_osm = draw_grid_on_map(map_osm, x_points, y_points)
map_osm

# Data processing
## Considered trip instance for analysis 
* Time frames
    * Monday to Thursday
    * **2:00PM to 11:00PM**
* **Only full-time drivers** (one shift drivers who don't share his vehicle with others)
* **Ignore last-mile trips** (less than 2km)

## Trip instance process
* Find the specific zone where the trip occured, based on the start location (GPS coordinates) and end location
    * For simplicity, record zone's grid coordinates
* Save each day's instances in a file; the following is an example

In [8]:
from community_analysis import trip_dir
df = pd.read_csv('%s/0901/20090101.csv' % trip_dir)
df.head()

Unnamed: 0,time,did,start-long,start-lat,end-long,end-lat,distance,duration,fare,si,sj,ei,ej
0,1231135200,33404,103.82989,1.35936,103.84033,1.38369,3.7,360,480,49,27,51,32
1,1231135200,33567,103.80901,1.32492,103.76879,1.33745,4.9,840,700,44,19,36,22
2,1231135200,25824,103.76673,1.38488,103.78828,1.31031,11.4,960,920,35,32,40,16
3,1231135200,20504,103.69427,1.34096,103.84461,1.284,20.7,1560,1800,19,22,52,10
4,1231135200,35196,103.90901,1.33148,103.9889,1.31425,12.4,960,1000,67,20,85,17


## Daily link process
* A link can be generated if two drivers pick up passengers at the same zone within 30 minutes
    * Each zone has a queue data structure which saves driver ids in arrival order
* Link's weight increase whenever the two drivers who form the link pick up passengers at the same location within 30 minutes
![link_weight_increment](src/link_weight_increment.png)
    * A driver can pick up two passenger at the same zone within 30 minutes 
        * The number of linkage can increase more than one
    * Ensure only one linkage increment when a driver pick a passenger one
* After counting all link's weight, **ignore links whose weight is less than two**
* An example

In [24]:
from community_analysis import ld_dir
link_daily = load_pickle_file('%s/0901/20090101.pkl' % ld_dir)
for i, (did0, num_trip, link) in enumerate(link_daily):
    print "The total number of trips in a day of driver %s is %d" % (did0, num_trip)
    for num_encounter, did1 in sorted([(num_encounter, did1) for did1, num_encounter in link.iteritems()], reverse=True)[:5]:
        print '\t Encounter with driver %s, %d times' % (did1, num_encounter)
    if i == 1:
        break
                      

The total number of trips in a day of driver 35234 is 6
	 Encounter with driver 31327, 4 times
	 Encounter with driver 2961, 3 times
	 Encounter with driver 26586, 3 times
	 Encounter with driver 12173, 3 times
	 Encounter with driver 9293, 2 times
The total number of trips in a day of driver 35543 is 6
	 Encounter with driver 5566, 2 times
	 Encounter with driver 37204, 2 times
	 Encounter with driver 33635, 2 times
	 Encounter with driver 29313, 2 times
	 Encounter with driver 15594, 2 times


## Annual link process
* Aggregate daily links
    * For each link, count the number of day the link appeared in a year
    * Current dataset is Y2009 (11 months considered, except December)
    * 184 days considered for the analysis
* Filter out links whose weight is less than a threshold value
    * The following result's threshold value is **92 days** (184 days X 0.5)
        * Somewhat strict
        * But, if the threshold is small, few (two or three) communities can be detected
        * However cannot check evolution of community properly
    * Already checked for other threshold values

'/Users/JerryHan88/PycharmProjects/taxi_projects/community_analysis/data/trips'