In [1]:
import config, pickle
import matplotlib.pyplot as plt
import numpy as np
import models
import datetime as dt
import pandas as pd



In [3]:
fluid_IP = pickle.load(open(config.fluid_incentive_periods, 'rb'))
optimal_IP = pickle.load(open((config.optimal_incentive_periods).format(0.1), 'rb'))
static_IP = pickle.load(open((config.static_incentive_periods).format(0.01), 'rb'))
dyn_CC_IP = models.get_dynamic_CC_IP(None)
rates = pickle.load(open(config.flow_rates_final, 'rb'))
base_rates = pickle.load(open(config.base_rates_final, 'rb'))
tripDF = pickle.load(open(config.tripDF_pickle, 'rb'))
rideProb = models.get_ride_prob(rates, base_rates)
AMstations = pickle.load(open(config.curr_Istations, 'rb'))

In [5]:
%matplotlib inline
dates = rates.keys()
dates.sort()
start_day, end_day = dates[0], dates[-1]
start_day = dt.datetime.strptime(start_day, '%Y_%m_%d')
end_day = dt.datetime.strptime(end_day, '%Y_%m_%d') + dt.timedelta(days = 1)
starts = tripDF[(tripDF['startDatetime'] >= start_day) & (tripDF['endDatetime'] < end_day) &
              (tripDF['minuteIndex'] >= 360) & (tripDF['minuteIndex'] < 720) & (tripDF['start status'] < 0)]

ends = tripDF[(tripDF['startDatetime'] >= start_day) & (tripDF['endDatetime'] < end_day) &
              (tripDF['minuteIndex'] >= 360) & (tripDF['minuteIndex'] < 720) & (tripDF['end status'] > 0)]

start_CC = starts['startDeltaCC']
end_CC = ends['endDeltaCC']
start_times = starts['minuteIndex'] / 60.
end_times = ends['minuteIndex'] / 60.

# scatter end station improvement vs start station improvement
plt.figure(figsize=(100,80))
plt.scatter(start_times, start_CC,alpha = 0.1, c='b')
plt.scatter(end_times, end_CC, alpha= 0.1, c='r')
plt.title("Time vs. Delta CC")
plt.xlabel("Time of ride")
plt.ylabel("Total Delta CC")
plt.ylim((-1.1, 1.1))
#plt.show()
plt.savefig('./plots/scatter_incent_CC.png')
plt.close()

In [6]:
start_day, end_day

(datetime.datetime(2016, 10, 31, 0, 0), datetime.datetime(2016, 12, 15, 0, 0))

### Plotting Dynamic_CC scatter of CC

In [30]:
static_IP2 = {}
for d in static_IP[0.0]:
    static_IP2[d] = {}
    for s in static_IP[0.0][d]:
        start = static_IP[0.0][d][s][0] / config.interval
        end = static_IP[0.0][d][s][1] / config.interval
        static_IP2[d][s] = range(start, end)
        
        
fluid_IP2 = {}
for d in fluid_IP:
    fluid_IP2[d] = {}
    for s in fluid_IP[d]:
        start = fluid_IP[d][s][0] / config.interval
        end = fluid_IP[d][s][1] / config.interval
        fluid_IP2[d][s] = range(start, end)

In [47]:
%matplotlib inline
dates = rates.keys()
dates.sort()
start_day, end_day = dates[0], dates[-1]
start_day = dt.datetime.strptime(start_day, '%Y_%m_%d')
end_day = dt.datetime.strptime(end_day, '%Y_%m_%d') + dt.timedelta(days = 1)
trips = tripDF[(tripDF['startDatetime'] >= start_day) & (tripDF['endDatetime'] < end_day) &
              (tripDF['minuteIndex'] >= 360) & (tripDF['minuteIndex'] < 720)]

IPS = [dyn_CC_IP, fluid_IP2, static_IP2]
model_names = ['Dynamic_CC', 'Fluid', 'Optimized']
plot_XY = []


for IP, model_name in zip(IPS, model_names):
    incent_trips = None
    other_trips = None
    for d in IP:
        for s in IP[d]:
            today = dt.datetime.strptime(d, '%Y_%m_%d')
            tmrw = dt.timedelta(days= 1) + today
            ctrips = trips[(trips['startDatetime'] >= today) & (trips['endDatetime'] < tmrw)]
            starts = ctrips[(ctrips['startID'] == s) & (ctrips['start status'] < 0)]
            ends = ctrips[(ctrips['endID'] == s) & (ctrips['end status'] > 0)]

            for ind in range(12, 24):
                start = config.interval * ind
                end = config.interval * (ind + 1)

                startData = starts[(starts.minuteIndex>=start) &
                                   (starts.minuteIndex < end)]

                endData = ends[(ends.minuteIndex>=start) &
                                (ends.minuteIndex < end)]

                CC_A = np.concatenate((np.array(startData['startDeltaCC']), np.array(endData['endDeltaCC'])))
                time_A = np.concatenate((np.array(startData['minuteIndex']), np.array(endData['minuteIndex'])))
                if ind in IP[d][s]:
                    if incent_trips is None:
                        incent_trips = [time_A, CC_A]
                    else:
                        incent_trips[0] = np.concatenate((incent_trips[0], time_A))
                        incent_trips[1] = np.concatenate((incent_trips[1], CC_A))
                else:
                    if other_trips is None:
                        other_trips = [time_A, CC_A]
                    else:
                        other_trips[0] = np.concatenate((other_trips[0], time_A))
                        other_trips[1] = np.concatenate((other_trips[1], CC_A))
                        
    plot_XY.append((incent_trips, other_trips)) 
    plot_incent_CC(incent_trips, other_trips, model_name)

In [46]:
import matplotlib

font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 30}

matplotlib.rc('font', **font)



def plot_incent_CC(incent_trips, other_trips, filename):
    font = {'family' : 'normal',
            'weight' : 'bold',
            'size'   : 30}

    matplotlib.rc('font', **font)
    incent_times = incent_trips[0] / 60.
    other_times = other_trips[0] / 60.

    incent_CC = incent_trips[1] * -1
    other_CC = other_trips[1] * -1

    # scatter delta cc to time.
    plt.figure(figsize=(30,20))
    plt.scatter(incent_times, incent_CC ,alpha = 0.3, s=100, c='r', label=filename)
    plt.scatter(other_times, other_CC, alpha= 0.3, s=100,  c='b', label= 'Other Trips')
    plt.title("Impact of Bike Angels")
    plt.xlabel("Time of ride (Hours)")
    plt.ylabel("Improvement in Objective")
    plt.xlim((5.9, 12.1))
    plt.ylim((-1.1, 1.1))
    plt.legend(loc = 'lower left')
    plt.savefig('./plots/scatter_CC_{0}.png'.format(filename))
    plt.close()

182
224
224


In [7]:
# Plot for each day, each station, the Incentive periods we would provide and the rides.

%matplotlib inline

static_IP_01 = static_IP[0.01]

dates = rates.keys()
dates.sort()
start_day, end_day = dates[0], dates[-1]
start_day = dt.datetime.strptime(start_day, '%Y_%m_%d')
end_day = dt.datetime.strptime(end_day, '%Y_%m_%d') + dt.timedelta(days = 1)
trips = tripDF[(tripDF['startDatetime'] >= start_day) & (tripDF['endDatetime'] < end_day) &
              (tripDF['minuteIndex'] >= 360) & (tripDF['minuteIndex'] < 720)]

for d in fluid_IP:
    for s in fluid_IP[d]:
        today = dt.datetime.strptime(d, '%Y_%m_%d')
        tmrw = dt.timedelta(days= 1) + today
        start_trips = trips[(trips['startDatetime'] >= today) & (trips['endDatetime'] < tmrw) &
                            ((trips['startID'] == s) & (trips['start status'] < 0))]
        end_trips = trips[(trips['startDatetime'] >= today) & (trips['endDatetime'] < tmrw) &
                            ((trips['endID'] == s) & (trips['end status'] > 0))]
                        
        # Only plot if has more than 5 points.
        if len(start_trips) + len(end_trips) > 5:
            fig = plt.figure()
            if len(start_trips) > 0:
                plt.scatter((start_trips['minuteIndex'] / 60.), start_trips['startDeltaCC'], c='r')
            if len(end_trips) > 0:
                plt.scatter((end_trips['minuteIndex'] / 60.), end_trips['endDeltaCC'], c='b')
            plt.title("Station:{0}, Date:{1}".format(s, d))
            plt.xlabel("Time of ride")
            plt.ylabel("Total Delta CC")
            plt.ylim((-2.1, 2.1))
            plt.xlim((5.5, 12.5))
            
            # Fluid incentive periods
            (start, end) = fluid_IP[d][s]
            noise = np.random.normal(0, 0.2)
            start = start / 60. + noise
            end = end / 60. + noise
            plt.vlines(x=start, ymin = -2.1, ymax= 2.1, colors='r', label = 'Fluid IP')
            plt.vlines(x=end, ymin = -2.1, ymax= 2.1, colors='r')
            
            # Optimal incentive periods
#             (start, end) = optimal_IP[d][s]
#             noise = np.random.normal(0, 0.2)
#             start = start / 60. + noise
#             end = end / 60. + noise
#             plt.vlines(x=start, ymin = -2.1, ymax= 2.1, colors='b')
#             plt.vlines(x=end, ymin = -2.1, ymax= 2.1, colors='b')
            
            # static hindsight incentive periods
            (start, end) = static_IP_01[d][s]
            noise = np.random.normal(0, 0.2)
            start = start / 60. + noise
            end = end / 60. + noise
            plt.vlines(x=start, ymin = -2.1, ymax= 2.1, colors='g', label= 'Static Hindsight')
            plt.vlines(x=end, ymin = -2.1, ymax= 2.1, colors='g')
            plt.legend()
            fig.savefig('./plots/scatter_CC_incent_only/date_{0}_station_{1}.png'.format(s, d))
            plt.close(fig)
            #plt.show()

In [61]:
stations = rates['2016_12_06'].keys()
IPS = [fluid_IP, static_IP[0.1], dyn_CC_IP[0.1]]
names = ['fluid', 'static', 'dynamic']
for IP, name in zip(IPS, names):
    station_set = set()
    for d in rates.keys():
        for s in stations:
            if s not in IP[d]:
                station_set.add(s)
    print name
    print station_set
                
common_stations = set(stations).intersection(set(trip_stations))
print len(common_stations)
print len(trip_stations)

fluid
set([])
static
set([])
dynamic
set([486, 517, 137, 266, 524, 526, 399, 533, 150, 3223, 408, 153, 3098, 284, 157, 393, 545, 546, 3235, 164, 167, 3242, 457, 173, 3246, 3119, 469, 3124, 342, 265, 312, 313, 195, 453, 2017, 332, 339, 3147, 3148, 462, 336, 3153, 3154, 467, 468, 3157, 470, 472, 484, 346, 493, 478, 352, 225, 482, 356, 485, 358, 359, 360, 3068, 362, 487, 237, 238, 367, 368, 488, 302, 500, 3061, 247, 2004, 383, 380, 509, 127])
147
181


In [52]:
station_fill_lvls = pickle.load(open(config.station_fill_lvls, 'rb'))
CC = pickle.load(open(config.all_CC, 'rb'))
for s in trip_stations:
    for i in xrange(12, 24):
        try:
            lvl = CC[i][s]
        except Exception as e:
            print s, i
    
            

In [50]:
import pandas as pd
starts = tripDF[(tripDF['start status'] < 0) & (tripDF.minuteIndex >= 360) & (tripDF.minuteIndex <= 720)]['startID']
ends = tripDF[(tripDF['end status'] > 0) & (tripDF.minuteIndex >= 360) & (tripDF.minuteIndex <= 720)]['endID']
all_stations = pd.concat((starts, ends))

In [51]:
trip_stations = set(all_stations.tolist())

In [62]:
pickle.dump(common_stations, open('./data_files/common_stations.p', 'wb'))