In [1]:
import pandas as pd
import pandas as pd
import numpy as np
import TrajectorySegmentation as ts
import Trajectory as tr
from scipy import stats
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
%matplotlib inline

## Trajectory Library usage
In this cell, we load data.
we need to pass the column names for latitude, longitude, time and date, target (our class labels), file name and the seperator character.

In [2]:
ts_obj=ts.TrajectorySegmentation()
ts_obj.load_data(lat='latitude',lon='longitude',time_date='collected_time',
                 labels=['transportation_mode'],src='databases/geolife/geolife.csv',seperator=',')
print(ts_obj.return_row_data().shape)

loading...
latitude
longitude
None
collected_time
['transportation_mode']
databases/geolife/geolife.csv
,
                     Unnamed: 0    lid  t_user_id        lat         lon  \
collected_time                                                             
2008-03-28 14:54:40      530003  14639         10  39.894178  116.318200   
2008-03-28 14:55:14      530004  14639         10  39.894505  116.321132   
2008-03-28 14:56:13      530005  14639         10  39.894953  116.326452   
2008-03-28 14:57:12      530006  14639         10  39.894600  116.332542   
2008-03-28 14:58:11      530007  14639         10  39.889622  116.337040   

                     altitude transportation_mode  
collected_time                                     
2008-03-28 14:54:40    -777.0               train  
2008-03-28 14:55:14    -777.0               train  
2008-03-28 14:56:13    -777.0               train  
2008-03-28 14:57:12    -777.0               train  
2008-03-28 14:58:11    -777.0               train

# Check class labels

In [4]:
set(ts_obj.return_row_data().transportation_mode)

{'airplane', 'bus', 'car', 'subway', 'taxi', 'train', 'walk'}

# Segmentation
In this cell, we segment our raw trajectory. We can use segmentation methods avaiable such as segmentByLabel, Multi_Label_Segmentation and ...
Using segmentByLabel, we need to pass label as a column that has trajectory IDs.
Using multi_label_segmentation, we pass an array of columns that is going to segment our data.

In [9]:
segments, trajectorySegments=ts_obj.segmentByLabel()
#segments, trajectorySegments = ts_obj.segmentByStopMove()
#segments, trajectorySegments=ts_obj.multi_label_segmentation(labels=['t_user_id','day','transportation_mode'])
print('Number of trajectories in dataset:',len(trajectorySegments))
print('Classes in dataset:',set(ts_obj.return_row_data().transportation_mode))

Number of trajectories in dataset: 7
Classes in dataset: {'train', 'airplane', 'car', 'subway', 'walk', 'bus', 'taxi'}


# Feature Extraction
For each segment generated using the previous step, we generate point features and trajectory features and collect them in an array.

In [11]:
i=1
features=[]
for seg in range(len(trajectorySegments)):
    # only use segments longer than 10
    if(trajectorySegments[seg].shape[0]>10):
        tr_obj=tr.Trajectory(mood='df',trajectory=trajectorySegments[seg],labels=['transportation_mode'])
        
        tr_obj.point_features() # generate point_features
        f=tr_obj.segment_features()# generate segment_features
        userid=10

        f.append(userid)
        features.append(np.array(f))
        i=i+1
        if (i%300)==1:
            print(i)

  s1.append(s2, ignore_index=True)
  s1.append(s2, ignore_index=True)
  s1.append(s2, ignore_index=True)
  s1.append(s2, ignore_index=True)


In [43]:
segments

[[0, 1325],
 [1325, 36515],
 [36515, 38887],
 [38887, 59272],
 [59272, 116099],
 [116099, 497034],
 [497034, 534140]]

In [15]:
f

[0.0,
 3910399.627960215,
 33313.911647574576,
 1.869906683649378,
 246106.65129738848,
 0.819480242264316,
 1.2395525209062939,
 1.869906683649378,
 3.8787803432376258,
 41.397555285671814,
 0.0,
 2565318.336256926,
 7325.934193633077,
 1.4085229680493605,
 104302.22325490124,
 0.3583052304948839,
 0.9600568273973059,
 1.4085229680493605,
 1.984258664928493,
 3.5158761861728722,
 -2565317.599383402,
 2565317.524460249,
 -789.7067648699248,
 0.0,
 135523.61037918174,
 -1.2886049232481738,
 -0.398775896881723,
 0.0,
 0.298016494829201,
 1.1349068232561903,
 0.0,
 359.68935688756443,
 180.39179833215596,
 187.32972588271093,
 102.1304584720584,
 38.90622806635213,
 89.99999305844307,
 187.32972588271093,
 270.00000417167905,
 309.65172327865406,
 -5130635.123843651,
 3820523.3202060526,
 -366.8445177876394,
 0.007175197878407121,
 220648.09179243067,
 -1.7951231449999492,
 -0.3660730241493022,
 0.007175197878407121,
 0.7931916621169934,
 5.81281840021179,
 -357.80328442420813,
 359.68935

# save the feature extraction results

In [16]:
bearingSet=[ 'bearing_min', 'bearing_max', 'bearing_mean', 'bearing_median', 'bearing_std','bearing_p10','bearing_p25','bearing_p50','bearing_p75','bearing_p90']
speedSet=[ 'speed_min', 'speed_max', 'speed_mean', 'speed_median', 'speed_std','speed_p10','speed_p25','speed_p50','speed_p75','speed_p90']
distanceSet=[ 'distance_min', 'distance_max', 'distance_mean', 'distance_median', 'distance_std','distance_p10','distance_p25','distance_p50','distance_p75','distance_p90']
accelerationSet=['acceleration_min', 'acceleration_max', 'acceleration_mean', 'acceleration_median','acceleration_std','acceleration_p10','acceleration_p25','acceleration_p50','acceleration_p75','acceleration_p90']
jerkSet=[ 'jerk_min', 'jerk_max', 'jerk_mean', 'jerk_median', 'jerk_std','jerk_p10','jerk_p25','jerk_p50','jerk_p75','jerk_p90']
brateSet=[ 'bearing_rate_min', 'bearing_rate_max', 'bearing_rate_mean', 'bearing_rate_median', 'bearing_rate_std','bearing_rate_p10','bearing_rate_p25','bearing_rate_p50','bearing_rate_p75','bearing_rate_p90']
brate_rateSet=[ 'brate_rate_min', 'brate_rate_max', 'brate_rate_mean', 'brate_rate_median', 'brate_rate_std','brate_rate_p10','brate_rate_p25','brate_rate_p50','brate_rate_p75','brate_rate_p90']
stop_timeSet=[ 'stop_time_min', 'stop_time_max', 'stop_time_mean', 'stop_time_median', 'stop_time_std','stop_time_p10','stop_time_p25','stop_time_p50','stop_time_p75','stop_time_p90']

targetset=['cattle', 'deer', 'elk']
col=distanceSet+speedSet+accelerationSet+bearingSet+jerkSet+brateSet+brate_rateSet+stop_timeSet+['isInValid', 'isPure', 'target','stopRate','starTime', 'endTime',  'isWeekDay', 'dayOfWeek', 'durationInSeconds', 'distanceTravelled', 'startToEndDistance','startLat', 'starLon', 'endLat', 'endLon', 'selfIntersect', 'modayDistance', 'tuesdayDistance', 'wednesdayDay', 'thursdayDistance', 'fridayDistance', 'saturdayDistance', 'sundayDistance', 'stopTotal','stopTotalOverDuration', 'userId']

features_set = pd.DataFrame(features,columns=col)
features_set.to_csv('features_animal.csv')

In [17]:
features_set

Unnamed: 0,distance_min,distance_max,distance_mean,distance_median,distance_std,distance_p10,distance_p25,distance_p50,distance_p75,distance_p90,...,modayDistance,tuesdayDistance,wednesdayDay,thursdayDistance,fridayDistance,saturdayDistance,sundayDistance,stopTotal,stopTotalOverDuration,userId
0,0.0,1330152.0,32165.089565,4.737738,179401.602609,0.222453,0.693346,4.737738,9.333343,24.07436,...,0.0,0.0,0.0,0.0,29205900.0,0.0,0.0,676.0,0.000124,1
1,0.0,2959198.0,10537.579639,11.249174,126110.086762,1.779421,5.800059,11.249174,26.904254,272.417049,...,63581140.0,6765103.0,36105640.0,5055190.0,60603770.0,1895809.0,11981630.0,91131.0,0.004235,1
2,0.0,6389.984,116.946562,8.750205,484.656243,1.401148,4.018477,8.750205,17.870176,134.762475,...,0.0,0.0,0.0,0.0,0.0,162672.7,0.0,0.0,0.0,1
3,0.0,151470.9,512.009826,18.909102,4948.1421,5.345232,12.793039,18.909102,73.458213,575.173527,...,265395.8,315849.6,212008.3,456768.2,1509612.0,1161334.0,100869.9,4715859.0,0.318685,1
4,0.0,3555454.0,17673.096263,10.993637,199045.634899,0.222453,3.389692,10.993637,26.543193,208.034949,...,40265900.0,53133150.0,68494220.0,70943680.0,171589200.0,67337720.0,9068104.0,221997.0,0.010242,1
5,0.0,2993326.0,5958.789753,44.877167,60156.472777,14.598839,24.783448,44.877167,360.056969,1722.294577,...,121958200.0,113383900.0,20840380.0,67663200.0,149097100.0,19182410.0,32921720.0,311845.0,0.014218,1
6,0.0,3910400.0,33313.911648,1.869907,246106.651297,0.81948,1.239553,1.869907,3.87878,41.397555,...,68760790.0,133626700.0,150475300.0,59853590.0,145960500.0,15463310.0,10885450.0,125119.0,0.005782,1
