"""classical_models.ipynb
by: Archie Gertsman (arkadiy2@illinois.edu)
Project director: Richard Sowers
r-sowers@illinois.eduhttps://publish.illinois.edu/r-sowers/
Copyright 2019 University of Illinois Board of Trustees. All Rights Reserved. Licensed under the MIT license
"""

In [1]:
import sys
sys.path.append('../src/')
sys.path.append('../data/')
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from modeling_helpers import *
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_pickle('../data/block4_updated.pkl')

agg_dict={
    'xtrack_dist': ['std'],
    'avg_surr_speed': ['mean','std'],
    'lanes':['mean'],
    'len':['mean'],
    'speed':['mean','std'],
    'vehicle_density': ['mean'],
    'speed_bool': ['sum'],
    'acc_edge': ['mean','std'],
    'acc_per_edge': ['mean','std']
}
df['speed_bool'] = (df['speed']>0).astype(int)
df = df[list(agg_dict.keys()) + ['type']]
df.index = [df.index.map(lambda idx: f'{idx[0]}_{idx[1]}'),  
            df.index.get_level_values(2)]
df.index.names = ['id','road']

df

Unnamed: 0_level_0,Unnamed: 1_level_0,xtrack_dist,avg_surr_speed,lanes,len,speed,vehicle_density,speed_bool,acc_edge,acc_per_edge,type
id,road,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
4_1_1,250699362_250699984,-1.883401,10.464171,5.4,97.581,11.9046,7,1,0.113220,0.021953,Taxi
4_1_1,250699362_250699984,-1.980795,10.457843,5.4,97.581,11.8975,7,1,0.100360,0.016867,Taxi
4_1_1,250699362_250699984,-1.937041,10.452857,5.4,97.581,11.8919,7,1,0.092194,0.013188,Taxi
4_1_1,250699362_250699984,-1.893287,10.448586,5.4,97.581,11.8871,7,1,0.087837,0.010734,Taxi
4_1_1,250699362_250699984,-1.941984,10.444986,5.4,97.581,11.8831,7,1,0.080021,0.007273,Taxi
...,...,...,...,...,...,...,...,...,...,...,...
1_3_2116,300400248_8446047162,0.658316,26.005850,6.5,106.837,25.3482,2,1,-0.253254,0.362093,Car
1_3_2116,300400248_8446047162,0.764478,26.008150,6.5,106.837,25.3217,2,1,-0.255583,0.360602,Car
1_3_2116,300400248_8446047162,0.736857,26.010500,6.5,106.837,25.2946,2,1,-0.254103,0.356759,Car
1_3_2116,300400248_8446047162,0.783032,26.012950,6.5,106.837,25.2667,2,1,-0.259040,0.351797,Car


In [None]:
df_agg = df_agg[0.75*df_agg.speed_bool_count > df_agg.speed_bool_sum]

In [3]:
# min_traj_len = 300
# min_speed_ratio = 1

# def speed_ratio(grp, min_speed=0):
#     return len(grp[grp.speed > min_speed]) / len(grp)

# df = df.groupby(['id','road']) \
#     .filter(lambda grp: (len(grp) >= min_traj_len) & (speed_ratio(grp) >= min_speed_ratio))
# df
df_car_1 = df[df.type == 'Car'].copy()
df_index = df_car_1.reset_index()[['id']].drop_duplicates()
df_car_1.loc[df_car_1.reset_index(['road'],drop = True).index.isin(df_index.sample(frac = 0.5).set_index(['id']).index),'type']='Car_1'
 

In [11]:
df_agg = downsample(df, 100, 0.3, agg_dict)
df_agg = df_agg[0.75< df_agg.speed_bool_sum]
df_agg = df_agg[list(set(df_agg.columns) - set(['speed_bool_sum']))]
(X_train,y_train), (X_test,y_test) = train_test_split_vehicles(df_agg, 200)

model = Pipeline([('scaler', StandardScaler()), ('gbm', GradientBoostingClassifier())])
model.fit(X_train, y_train)

accuracy(model, X_test, y_test)

0.5125

In [12]:
df_car_1 = df[df.type == 'Car'].copy()
df_index = df_car_1.reset_index()[['id']].drop_duplicates()
df_car_1.loc[df_car_1.reset_index(['road'],drop = True).index.isin(df_index.sample(frac = 0.5).set_index(['id']).index),'type']='Taxi'
 
df_agg_1 = downsample(df_car_1,100,0.3,agg_dict)
df_agg_1 = df_agg_1[0.75< df_agg_1.speed_bool_sum]
df_agg_1 = df_agg_1[list(set(df_agg_1.columns) - set(['speed_bool_sum']))]

(X_train,y_train), (X_test,y_test) = train_test_split_vehicles(df_agg_1, 200)

model = Pipeline([('scaler', StandardScaler()), ('gbm', GradientBoostingClassifier())])
model.fit(X_train, y_train)

accuracy(model, X_test, y_test)

0.5

In [13]:
for i in range(0,10):
    df_agg = downsample(df, 100, 0.3, agg_dict)
    df_agg = df_agg[0.75< df_agg.speed_bool_sum]
    df_agg = df_agg[list(set(df_agg.columns) - set(['speed_bool_sum']))]
    (X_train,y_train), (X_test,y_test) = train_test_split_vehicles(df_agg, 200)

    model = Pipeline([('scaler', StandardScaler()), ('gbm', GradientBoostingClassifier())])
    model.fit(X_train, y_train)

    acc = accuracy(model, X_test, y_test)
    print('car, taxi accuracy:',acc)

    df_car_1 = df[df.type == 'Car'].copy()
    df_index = df_car_1.reset_index()[['id']].drop_duplicates()
    df_car_1.loc[df_car_1.reset_index(['road'],drop = True).index.isin(df_index.sample(frac = 0.5).set_index(['id']).index),'type']='Taxi'

    df_agg_1 = downsample(df_car_1,100,0.3,agg_dict)
    df_agg_1 = df_agg_1[0.75< df_agg_1.speed_bool_sum]
    df_agg_1 = df_agg_1[list(set(df_agg_1.columns) - set(['speed_bool_sum']))]

    (X_train,y_train), (X_test,y_test) = train_test_split_vehicles(df_agg_1, 200)

    model = Pipeline([('scaler', StandardScaler()), ('gbm', GradientBoostingClassifier())])
    model.fit(X_train, y_train)

    acc = accuracy(model, X_test, y_test)
    print('car, car accuracy:',acc)
    print('\n')

car, taxi accuracy: 0.5475
car, car accuracy: 0.5525


car, taxi accuracy: 0.6175
car, car accuracy: 0.5125


car, taxi accuracy: 0.5875
car, car accuracy: 0.455


car, taxi accuracy: 0.6
car, car accuracy: 0.54


car, taxi accuracy: 0.55
car, car accuracy: 0.5225


car, taxi accuracy: 0.58
car, car accuracy: 0.505


car, taxi accuracy: 0.5525
car, car accuracy: 0.5175


car, taxi accuracy: 0.5975
car, car accuracy: 0.4825


car, taxi accuracy: 0.5975
car, car accuracy: 0.54


car, taxi accuracy: 0.5725
car, car accuracy: 0.495




In [4]:
%%time

df2 = downsample(df, 30, 0.3, agg_dict)
df2

Wall time: 30 s


Unnamed: 0_level_0,Unnamed: 1_level_0,xtrack_dist_std,avg_surr_speed_mean,avg_surr_speed_std,lanes_mean,len_mean,speed_mean,speed_std,vehicle_density_mean,lon_acc_mean,lon_acc_std,lat_acc_mean,lat_acc_std,type
id,road,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1_2_1013,250691795_250699359,0.036401,34.264758,1.673595,13.0,171.188,36.948420,1.412599,4.900000,1.115960,0.228770,0.260907,0.030438,Car
1_2_1013,250691795_250699359,0.074045,36.721555,1.228342,13.0,171.188,39.722530,0.941658,4.266667,0.752687,0.066130,0.200947,0.015278,Car
1_2_1013,250691795_250699359,0.142907,34.689947,1.384964,13.0,171.188,41.438370,0.429593,4.800000,0.320303,0.284496,0.231417,0.024870,Car
1_2_1013,250691795_250699359,0.207880,34.596191,1.594841,13.0,171.188,41.755420,0.033040,5.166667,-0.007537,0.039648,0.130213,0.119594,Car
1_2_1013,250691795_250699359,0.201988,38.085776,2.150748,13.0,171.188,41.830633,0.090816,3.933333,0.067197,0.031052,-0.196260,0.123254,Car
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4_3_994,250706958_388172075,0.034688,26.904613,0.078769,6.0,87.080,26.904613,0.078769,1.000000,-0.024833,0.115887,0.049193,0.048294,Car
4_3_994,250706958_388172075,0.055689,26.664223,0.107536,6.0,87.080,26.664223,0.107536,1.000000,-0.077043,0.067189,0.140990,0.024776,Car
4_3_994,250706958_388172075,0.098903,26.490103,0.125357,6.0,87.080,26.490103,0.125357,1.000000,-0.102513,0.121679,0.154047,0.009716,Car
4_3_994,250706958_388172075,0.128613,25.676100,0.577366,6.0,87.080,25.676100,0.577366,1.000000,-0.441643,0.132444,0.124577,0.013990,Car
