In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString
import contextily as cx
import datetime as dt
import json
import time
from sklearn import linear_model
from sklearn.linear_model import LinearRegression

from functions_file import *
print(testfunction(4))

### 1. Import (cleaned, per polygon, without parking removed) data and add basic information

In [None]:
polygons = pd.read_csv('../data/polygons11.csv') #polygons11
file_type = 'pickle'
polygon_names = polygons.name.values[polygons.name.values != 'polygon_r4']

In [None]:
#Specify values:
speed_threshold = 1 #km/h
min_mean_speed = 15 #km/h
min_diff_between_timestamps = '3s' #has to be a string
min_duration_of_stops = 5 #seconds
vehicle_type_considered_for_stop = ['Bus','Taxi', 'Medium Vehicle', 'Heavy Vehicle']

In [None]:
#########################################
save = 'off'
#run over all polygons
for polygon_name in polygon_names:
    file_name = '../output/data_clean/prepared_data_%s.pkl'%(polygon_name)
    waypoints_w_dist_mode = import_clean_data(file_name,file_type,polygon_name)
    
    #assure that geometry is of type point
    if type(waypoints_w_dist_mode.iloc[0]['geometry'])==str:
        waypoints_w_dist_mode['geometry'] = waypoints_w_dist_mode['geometry'].apply(lambda x: Point([float(i) for i in x[7:-1].split()]))
        waypoints_w_dist_mode = gpd.GeoDataFrame(waypoints_w_dist_mode, geometry=waypoints_w_dist_mode.geometry, crs="EPSG:4326")

    #filter for speed < speed_threshold
    data = waypoints_w_dist_mode[(waypoints_w_dist_mode.speed<speed_threshold)]
    
    #calculate mean speed for each timestep, merge the mean speed to each datapoint
    mspd = pd.DataFrame(waypoints_w_dist_mode[waypoints_w_dist_mode['type'].isin(['Car'])]
                        .groupby('timestamp')['speed'].mean()).reset_index().rename(columns={'speed':'mean_speed'})
    data = data.join(mspd.set_index('timestamp'), on=data['timestamp'])
    
    #filter for vehicle types
    spvec_stops = data[(data['type'].isin(vehicle_type_considered_for_stop))]

    #filter for timesteps, where the mean speed > min_mean_speed
    spvec_stops = spvec_stops[spvec_stops.mean_speed>min_mean_speed]

    print('Vehicles stopping, when the mean speed of all vehicles was bigger than %s km/h: '%min_mean_speed, str(len(spvec_stops.unique_id.unique())))

    # Calculate speed differences for every time step, calculate timestamp difference for all ids beneath the threshold
    # Where the time diff is bigger than 'min_diff_between_timestamps' seconds, a new stop id is appended

    spvec_stops['speed_diff'] = spvec_stops.groupby('unique_id')['speed'].diff()
    spvec_stops['time_diff'] = spvec_stops.groupby('unique_id')['timestamp'].diff()
    spvec_stops['td_val'] = spvec_stops['time_diff'] > min_diff_between_timestamps # min difference in consecutive timestamps to be counted as new stop
    unique_stop_id = 0
    stop_ids = []
    for i,row in spvec_stops.iterrows():
        if pd.isnull(row['time_diff']) or row['td_val']:
            unique_stop_id += 1
        stop_ids.append(unique_stop_id)
    spvec_stops['stop_id'] = stop_ids

    #calculate duration of stops, filter for minimum 'min duration of stop' second-stops
    aggr = spvec_stops.groupby('stop_id').max('timestamp')
    aggr['max_t'] = spvec_stops.groupby('stop_id')['timestamp'].max()
    aggr['min_t'] = spvec_stops.groupby('stop_id')['timestamp'].min()
    aggr['span_per_stop'] = (aggr['max_t']-aggr['min_t'])/np.timedelta64(1, 's')
    aggr['type'] = spvec_stops.groupby('stop_id')['type'].apply(lambda x: list(np.unique(x))[0])
    aggr = aggr[aggr.span_per_stop>min_duration_of_stops]     #min duration of stop
    print('Number of stopped vehicles after filtering by min duration: %s'%len(aggr))
    #aggr.sort_values(by='span_per_stop', ascending=False).head(5)

    # load q,k,v data per interval (30 secs on rolling 3 minutes)
    # add scaled up flows and densities
    intervals_MS = pd.read_pickle('../output/data_processed/processed_data_all_bypolygon.pkl')
    scalefactorsfile = '../output/data_processed/scalefactors_bypolygon.pkl'   
    # to read
    with open(scalefactorsfile, 'rb') as f:
        scalefactors = pickle.load(f)
    # scale values back up
    cs = ['q_all_MS','k_all_MS','q_all_LD','k_all_LD']
    for c in ['q_all_MS','k_all_MS','q_all_LD','k_all_LD']:
        scaledup = scaleup(intervals_MS[c],intervals_MS['polygon'],scalefactors,c)
        intervals_MS['%s_s'%c] = scaledup
        cs.append('%s_s'%c)
    intervals_MS['v2'] = intervals_MS.q_all_LD_s / intervals_MS.k_all_LD_s
    
    #add the stops to the intervals
    start = time.time()
    intervals_MS = intervals_MS[intervals_MS.polygon == polygon_name]
    intervals_MS['stop_count'] = np.zeros(len(intervals_MS))
    if len(aggr)>0:
        intervals_MS['stop_type'] = ['']*len(intervals_MS)
        intervals_MS['stop_length'] = ['']*len(intervals_MS)
        stop_types = []
        # add all columns that match the stops, vehicle type and duration to the intervals
        for i,row in aggr.iterrows():
            t_start_higher_tmin = row['min_t'] < (intervals_MS.times-pd.Timedelta('150s')) #1
            t_start_lower_tmax = row['max_t'] > (intervals_MS.times - pd.Timedelta('150s')) #2
            t_start_lower_tmin = row['min_t'] > (intervals_MS.times-pd.Timedelta('150s')) #3
            t_end_higher_tmin = row['min_t'] < (intervals_MS.times+pd.Timedelta('30s')) #4
            # if 1+2 or 3+4 is fulfilled, the stop happened within the current interval
            true = (t_start_higher_tmin & t_start_lower_tmax) | (t_start_lower_tmin & t_end_higher_tmin)
            # additionally add the vehicle type of that stop and the duration
            intervals_MS.stop_count += true
            intervals_MS['new_type'] = np.where(true, row.type, '')
            intervals_MS.stop_type= intervals_MS[['stop_type','new_type']].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1)
            intervals_MS['new_length'] = np.where(true, row.span_per_stop, '')
            intervals_MS.stop_length= intervals_MS[['new_length','stop_length']].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1)
        intervals_MS['veh_stop_types'] = intervals_MS.stop_type.str.split(',').apply(lambda x: list(filter(lambda num: num != '', x)))
        intervals_MS['veh_stop_lengths'] = intervals_MS.stop_length.str.split(',').apply(lambda x: list(filter(lambda num: num != '', x)))
        intervals_MS.veh_stop_lengths = intervals_MS.veh_stop_lengths.apply(lambda x: list(map(float,x)))
        intervals_MS = intervals_MS.drop(['stop_type','new_type','stop_length','new_length'], axis=1)
        print('Duration of calculating stops: ', (time.time()-start))
    else:
        intervals_MS['veh_stop_types'] = ['']*len(intervals_MS)
        intervals_MS['veh_stop_lengths'] = ['']*len(intervals_MS)

    # Lane Changes:
    # Import the coordinates for the lanes from csv-files.
    t = time.time()
    lanes_coords = pd.read_csv('../data/%s_Lanes.csv'%polygon_name,sep=';')
    lanes_df = pd.DataFrame(columns = ['lane_coords'])
    for id_,lane in lanes_coords.iterrows():
        lanes_df.loc[id_] = Polygon([[float(lane.OL.split(',')[1]),float(lane.OL.split(',')[0])],
                                 [float(lane.OR.split(',')[1]),float(lane.OR.split(',')[0])],
                                 [float(lane.UR.split(',')[1]),float(lane.UR.split(',')[0])],
                                 [float(lane.UL.split(',')[1]),float(lane.UL.split(',')[0])]])

    # Match the points on the link to the lanes, starting with lane 0 from the right most lane in driving direction
    waypoints_w_dist_mode['lane_id'] = [-1]*len(waypoints_w_dist_mode)
    for id_,lane_polygon in lanes_df.iterrows():
        waypoints_w_dist_mode['lane_id'] = np.where(waypoints_w_dist_mode.geometry.within(lane_polygon['lane_coords']),
                                                    id_,waypoints_w_dist_mode['lane_id'])
    # Calculate lane changes
    waypoints_w_dist_mode = waypoints_w_dist_mode[waypoints_w_dist_mode.lane_id>-1]
    waypoints_w_dist_mode['lane_id_diff'] = waypoints_w_dist_mode.groupby('unique_id')['lane_id'].diff()
    waypoints_w_dist_mode['lane_change'] = ((waypoints_w_dist_mode['lane_id_diff'] != 0) & (~(pd.isnull(waypoints_w_dist_mode['lane_id_diff']))))
    
    # Set lane changes per vehicle and interval
    # Keep (for every vehicle) the first timestamp where the lane change was performed and remove motorcycles
    l_c = waypoints_w_dist_mode[waypoints_w_dist_mode.lane_change]
    l_c = l_c[l_c['type']!='Motorcycle'].groupby('unique_id').first()
    
    # Add the stops to the intervals
    intervals_MS['lane_changes'] = np.zeros(len(intervals_MS))
    # Add all columns that match the stop, vehicles type and duration to the intervals
    for i,row in l_c.iterrows():
        t_start_lower_timestamp = row['timestamp'] > (intervals_MS.times-pd.Timedelta('150s')) #1
        t_end_higher_timestamp = row['timestamp'] < (intervals_MS.times+pd.Timedelta('30s')) #2
        #1+2 must be true
        true = (t_start_lower_timestamp & t_end_higher_timestamp)
        # additionally add the vehicle type of that stop and the duration
        intervals_MS.lane_changes += true
    if save == 'on':
        intervals_MS.to_pickle('../output/data_processed_events_%skmh_%ssec/processed_data_%s.pkl'%(min_mean_speed,min_duration_of_stops,polygon_name))
    print('Calculation of stops and lane changes for %s done!'%polygon_name)

# Load and Merge Data together

In [None]:
all_MS = pd.concat( [pd.read_pickle('../output/data_processed_events_%skmh_%ssec/processed_data_%s.pkl'%(min_mean_speed,min_duration_of_stops,p)) 
                     for p in polygon_names] )

In [None]:
all_MS.to_pickle('../output/data_processed_events_%skmh_%ssec/processed_data_all_bypolygon.pkl'%(min_mean_speed,min_duration_of_stops))