#GeoLife

## Importing Libraries

In [0]:
import numpy as np
import pandas as pd

import pyspark.sql.functions as f
import pyspark.sql.types as t 

from pyspark.ml import Pipeline
from pyspark.ml.feature import StringIndexer, StandardScaler, VectorAssembler
from pyspark.ml.evaluation import BinaryClassificationEvaluator, MulticlassClassificationEvaluator
from pyspark.mllib.evaluation import MulticlassMetrics
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.classification import RandomForestClassifier

## Data Collection

In [0]:
%sh
unzip /dbfs/FileStore/tables/Geolife_Trajectories_1_3.zip 

In [0]:
%sh
cp -r 'Geolife Trajectories 1.3' /dbfs/FileStore/tables

In [0]:
def load_data(user_id): 
    schema1 = t.StructType([t.StructField('StartTime', t.StringType(), True),
                            t.StructField('EndTime', t.StringType(), True),
                            t.StructField('TransportationMode', t.StringType(), True)])
    df1 = spark.createDataFrame([], schema=schema1).withColumn('ID', f.lit(None))
  
    schema2 = t.StructType([t.StructField('Latitude', t.DoubleType(), True),
                            t.StructField('Longitude', t.DoubleType(), True),
                            t.StructField('0', t.IntegerType(), True),
                            t.StructField('Altitude', t.IntegerType(), True),
                            t.StructField('Data1', t.DoubleType(), True),
                            t.StructField('Data', t.StringType(), True),
                            t.StructField('Time', t.StringType(), True)])
    df2 = spark.createDataFrame([], schema=schema2).withColumn('ID', f.lit(None))
    
    for num in np.arange(user_id):
      folder = '0' * (3 - len(str(num))) + str(num)
      
      try:
          df1 = df1.union(spark.read.csv(f'/FileStore/tables/Geolife Trajectories 1.3/Data/{folder}/labels.txt',
                          sep='\t', header=True).withColumn('ID', f.lit(int(num))))
          df2 = df2.union(spark.read.csv(f'/FileStore/tables/Geolife Trajectories 1.3/Data/{folder}/Trajectory/*.plt', 
                          schema=schema2, sep=',').withColumn("Index", f.monotonically_increasing_id()).filter('Index > 5')\
                          .drop('Index').withColumn('ID', f.lit(int(num))))
      except Exception as ex:
          pass
          
    return df1, df2

In [0]:
df_labels, df_trajectory = load_data(182)

## Data Overview

In [0]:
display(df_labels)

StartTime,EndTime,TransportationMode,ID
2007/06/26 11:32:29,2007/06/26 11:40:29,bus,10
2008/03/28 14:52:54,2008/03/28 15:59:59,train,10
2008/03/28 16:00:00,2008/03/28 22:02:00,train,10
2008/03/29 01:27:50,2008/03/29 15:59:59,train,10
2008/03/29 16:00:00,2008/03/30 15:59:59,train,10
2008/03/30 16:00:00,2008/03/31 03:13:11,train,10
2008/03/31 04:17:59,2008/03/31 15:31:06,train,10
2008/03/31 16:00:08,2008/03/31 16:09:01,taxi,10
2008/03/31 17:26:04,2008/04/01 00:35:26,train,10
2008/04/01 00:48:32,2008/04/01 00:59:23,taxi,10


In [0]:
df_labels.count()

In [0]:
display(df_trajectory)

Latitude,Longitude,0,Altitude,Data1,Data,Time,ID
39.991376,116.32641,0,354,39801.4862268518,2008-12-19,11:40:10,10
39.991358,116.326438,0,361,39801.4862384259,2008-12-19,11:40:11,10
39.991364,116.326458,0,371,39801.48625,2008-12-19,11:40:12,10
39.991391,116.326418,0,322,39801.4862962963,2008-12-19,11:40:16,10
39.991384,116.326436,0,325,39801.4863078704,2008-12-19,11:40:17,10
39.991351,116.326498,0,344,39801.4863194444,2008-12-19,11:40:18,10
39.991306,116.326614,0,348,39801.4863425926,2008-12-19,11:40:20,10
39.991305,116.326561,0,338,39801.4863541667,2008-12-19,11:40:21,10
39.991316,116.326576,0,348,39801.4863657407,2008-12-19,11:40:22,10
39.991316,116.326561,0,338,39801.4863773148,2008-12-19,11:40:23,10


In [0]:
df_trajectory.count()

## Data Cleaning

In [0]:
df_labels = df_labels.withColumn('StartTime', f.regexp_replace('StartTime', '/', '-'))\
                     .withColumn('EndTime', f.regexp_replace('EndTime', '/', '-'))
display(df_labels)

StartTime,EndTime,TransportationMode,ID
2007-06-26 11:32:29,2007-06-26 11:40:29,bus,10
2008-03-28 14:52:54,2008-03-28 15:59:59,train,10
2008-03-28 16:00:00,2008-03-28 22:02:00,train,10
2008-03-29 01:27:50,2008-03-29 15:59:59,train,10
2008-03-29 16:00:00,2008-03-30 15:59:59,train,10
2008-03-30 16:00:00,2008-03-31 03:13:11,train,10
2008-03-31 04:17:59,2008-03-31 15:31:06,train,10
2008-03-31 16:00:08,2008-03-31 16:09:01,taxi,10
2008-03-31 17:26:04,2008-04-01 00:35:26,train,10
2008-04-01 00:48:32,2008-04-01 00:59:23,taxi,10


In [0]:
df_trajectory = df_trajectory.withColumn('Time', f.concat_ws(' ', 'Data', 'Time')).drop('0', 'Data1', 'Data')
display(df_trajectory)

Latitude,Longitude,Altitude,Time,ID
39.991376,116.32641,354,2008-12-19 11:40:10,10
39.991358,116.326438,361,2008-12-19 11:40:11,10
39.991364,116.326458,371,2008-12-19 11:40:12,10
39.991391,116.326418,322,2008-12-19 11:40:16,10
39.991384,116.326436,325,2008-12-19 11:40:17,10
39.991351,116.326498,344,2008-12-19 11:40:18,10
39.991306,116.326614,348,2008-12-19 11:40:20,10
39.991305,116.326561,338,2008-12-19 11:40:21,10
39.991316,116.326576,348,2008-12-19 11:40:22,10
39.991316,116.326561,338,2008-12-19 11:40:23,10


In [0]:
data = df_trajectory.join(df_labels, on=[df_trajectory['ID'] == df_labels['ID'], 
                          df_trajectory['Time'].between(df_labels['StartTime'], df_labels['EndTime'])],
                          how='inner').drop(df_trajectory['ID'])

In [0]:
data.orderBy('ID', 'Time').write.csv('/FileStore/tables/dataset.csv', header=True)

## Feature Engineering

In [0]:
custom_schema = t.StructType([t.StructField('Latitude', t.DoubleType(), True),
                              t.StructField('Longitude', t.DoubleType(), True),
                              t.StructField('Altitude', t.IntegerType(), True),
                              t.StructField('Time', t.TimestampType(), True),
                              t.StructField('StartTime', t.TimestampType(), True),
                              t.StructField('EndTime', t.TimestampType(), True),
                              t.StructField('TransportationMode', t.StringType(), True),
                              t.StructField('ID', t.IntegerType(), True)])
data = spark.read.csv('/FileStore/tables/dataset.csv', schema=custom_schema, header=True)

In [0]:
display(data)

Latitude,Longitude,Altitude,Time,StartTime,EndTime,TransportationMode,ID
39.9965149,116.2732933,,2009-02-21T07:46:21.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965333,116.27329,,2009-02-21T07:46:23.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965483,116.2732933,,2009-02-21T07:46:25.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.996565,116.2732949,,2009-02-21T07:46:27.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965783,116.2733033,,2009-02-21T07:46:29.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965883,116.2732999,,2009-02-21T07:46:31.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965816,116.2732983,,2009-02-21T07:46:34.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965883,116.2732966,,2009-02-21T07:46:36.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965666,116.2733166,,2009-02-21T07:46:38.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128
39.9965583,116.2733266,,2009-02-21T07:46:40.000+0000,2009-02-21T07:02:45.000+0000,2009-02-21T09:06:53.000+0000,walk,128


In [0]:
data.count()

In [0]:
display(data.groupBy('TransportationMode').count())

TransportationMode,count
bus,1284449
airplane,9196
train,561037
boat,3566
taxi,243097
walk,1615614
subway,312758
car,517436
bike,959547
run,1975


In [0]:
data = data.dropDuplicates()

In [0]:
display(data.groupBy('TransportationMode').count())

TransportationMode,count
motorcycle,338
bus,1270771
airplane,9194
train,561031
boat,3566
taxi,240145
walk,1562198
subway,309706
run,1975
car,516195


In [0]:
data = data.withColumn('TransportationMode', f.regexp_replace('TransportationMode', 'car|taxi', 'cat/taxi'))

In [0]:
def get_features(latitude_a, longitude_a, latitude_b, longitude_b, time_a, time_b, radius=6371):
    latitude_a, longitude_a, latitude_b, longitude_b = map(np.radians, [latitude_a, longitude_a, latitude_b, longitude_b,])
    dist_longitude = longitude_b - longitude_a
    dist_latitude = latitude_b - latitude_a

    area = np.sin(dist_latitude / 2) ** 2 + np.cos(latitude_a) * np.cos(latitude_b) * np.sin(dist_longitude / 2) ** 2
    central_angle = 2 * np.arcsin(np.sqrt(area))

    distance = abs(central_angle * radius)
    time = abs((time_b - time_a).total_seconds()) / 3600
    velocity = distance / time if time != 0 else np.nan
    angle = np.arctan2(np.sin(longitude_b - longitude_a) * np.cos(latitude_b), np.cos(latitude_a) * np.sin(latitude_b) -\
                       np.sin(latitude_a) * np.cos(latitude_b) * np.cos(longitude_b - longitude_a))
  
    x = radius * np.cos(latitude_a) * np.cos(longitude_a)
    y = radius * np.cos(latitude_a) * np.sin(longitude_a)
    z = radius * np.sin(latitude_a)
    
    return {'Distance': float(distance), 'Time': float(time), 'Velocity': float(velocity), 'Angle': float(angle),
            'X': float(x), 'Y': float(y), 'Z': float(z)}

In [0]:
features = f.udf(get_features, t.MapType(t.StringType(), t.DoubleType()))

In [0]:
df_windows = data.groupby('ID', 'TransportationMode', f.window('Time', '30 seconds')\
                 .alias('Window')).agg(features(f.collect_list('Latitude')[0], f.collect_list('Longitude')[0],
                  f.reverse(f.collect_list('Latitude'))[0], f.reverse(f.collect_list('Longitude'))[0],
                  f.collect_list('Time')[0], f.reverse(f.collect_list('Time'))[0]).alias('map'))\
                 .orderBy('ID', 'Window')
df_windows.cache()

df_windows = df_windows.withColumn('Distance', df_windows.map['Distance'])\
                       .withColumn('Time', df_windows.map['Time'])\
                       .withColumn('Velocity', df_windows.map['Velocity'])\
                       .withColumn('Angle', df_windows.map['Angle'])\
                       .withColumn('X', df_windows.map['X'])\
                       .withColumn('Y', df_windows.map['Y'])\
                       .withColumn('Z', df_windows.map['Z'])\
                       .drop('Window', 'map')

df_windows = df_windows.filter(df_windows['Velocity'] != np.nan)

In [0]:
display(df_windows)

ID,TransportationMode,Distance,Time,Velocity,Angle,X,Y,Z
10,train,0.0,0.0005555555555555556,0.0,0.0,-2173.107578149587,4392.234745835793,4071.304261690796
10,train,0.0,0.0005555555555555556,0.0,0.0,-2173.107003412945,4392.231462631642,4071.308110470923
10,train,0.0,0.0005555555555555556,0.0,0.0,-2208.53503441826,4391.124522423796,4053.3985037639136
10,train,0.0,0.0005555555555555556,0.0,0.0,-2255.1851008848207,4395.409015551488,4022.954231252926
10,train,0.0,0.0005555555555555556,0.0,0.0,-2259.6911201072035,4459.338706097727,3949.396833189508
10,train,0.0,0.0005555555555555556,0.0,0.0,-2259.652892302582,4464.763587808039,3943.2849137859744
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.625907650106,4486.043790132089,3927.6846420917327
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.6097931779163,4486.053846076548,3927.682365781582
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.636509824749,4486.034652563979,3927.6890196089794
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.618690502275,4486.039735941088,3927.693397123236


In [0]:
display(df_windows)

ID,TransportationMode,Distance,Time,Velocity,Angle,X,Y,Z
10,train,0.0,0.0005555555555555556,0.0,0.0,-2173.107578149587,4392.234745835793,4071.304261690796
10,train,0.0,0.0005555555555555556,0.0,0.0,-2173.107003412945,4392.231462631642,4071.308110470923
10,train,0.0,0.0005555555555555556,0.0,0.0,-2208.53503441826,4391.124522423796,4053.3985037639136
10,train,0.0,0.0005555555555555556,0.0,0.0,-2255.1851008848207,4395.409015551488,4022.954231252926
10,train,0.0,0.0005555555555555556,0.0,0.0,-2259.6911201072035,4459.338706097727,3949.396833189508
10,train,0.0,0.0005555555555555556,0.0,0.0,-2259.652892302582,4464.763587808039,3943.2849137859744
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.625907650106,4486.043790132089,3927.6846420917327
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.6097931779163,4486.053846076548,3927.682365781582
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.636509824749,4486.034652563979,3927.6890196089794
10,train,0.0,0.0005555555555555556,0.0,0.0,-2244.618690502275,4486.039735941088,3927.693397123236


## Machine Learning

In [0]:
custom_schema = t.StructType([t.StructField('ID', t.IntegerType(), True),
                              t.StructField('TransportationMode', t.StringType(), True),
                              t.StructField('Distance', t.DoubleType(), True),
                              t.StructField('Time', t.DoubleType(), True),
                              t.StructField('Velocity', t.DoubleType(), True),
                              t.StructField('Angle', t.DoubleType(), True),
                              t.StructField('X', t.DoubleType(), True),
                              t.StructField('Y', t.DoubleType(), True),
                              t.StructField('Z', t.DoubleType(), True)])
X = spark.createDataFrame([], schema=custom_schema)

for mode in df_windows.select('TransportationMode').distinct().collect():
    X = X.union(df_windows.filter(df_windows['TransportationMode'] == mode.TransportationMode).orderBy(f.rand()).limit(400))

In [0]:
display(X)

ID,TransportationMode,Distance,Time,Velocity,Angle,X,Y,Z
10,train,0.4106142879520948,0.0027777777777777,147.82114366275414,1.41689408595274,-2017.9374114581724,4831.697748983057,3629.361716047265
167,train,0.1586270976449602,0.0025,63.45083905798408,-1.6613360777895358,-2216.702329625165,4268.162899467853,4178.355806467484
167,train,0.797022845081863,0.0063888888888888,124.75140183890028,-2.478040346101152,-2397.647069441995,4266.136055309598,4079.339736769135
10,train,0.1482431155052206,0.0013888888888888,106.73504316375886,-2.8636001640629285,-2407.759013375513,4705.106645229034,3557.289556745495
153,train,0.1071248446983052,0.0013888888888888,77.12988818277978,-2.9551428019134645,-2079.7100163038813,4567.603872876832,3924.467111413456
10,train,0.2594500076905379,0.0041666666666666,62.26800184572911,-1.3784526328976603,-1493.161997274245,5033.781321019119,3608.483595924333
10,train,0.326919112282042,0.0041666666666666,78.4605869476901,1.5367590164692395,-1904.438536333811,4877.845418594146,3628.963892576892
10,train,0.3744827947261112,0.0038888888888888,96.29557578671432,-0.3573855198815521,-1276.2305066241502,5009.596179621172,3723.5497594482094
10,train,0.4370838551823888,0.0027777777777777,157.35018786565996,-0.4697439734054472,-2171.2269601369685,4391.691909735431,4072.892910148601
10,train,0.0619948792563509,0.0027777777777777,22.318156532286334,-0.074181819284787,-2406.8557163002797,4682.820379861316,3587.1827177979376


In [0]:
display(X.groupBy('TransportationMode').count().orderBy('count', ascending=False))

TransportationMode,count
cat/taxi,400
bus,400
train,400
airplane,400
bike,400
boat,400
walk,400
subway,400
run,144
motorcycle,24


In [0]:
train, test = X.randomSplit([0.8, 0.2], seed=42)

In [0]:
vector_assembler = VectorAssembler(inputCols=['Time', 'Velocity', 'Angle', 'X', 'Y', 'Z'], outputCol='Vector')
scaler = StandardScaler(withMean=True, withStd=True, inputCol='Vector', outputCol='features')
label_encoder = StringIndexer(inputCol='TransportationMode', outputCol='label', stringOrderType='frequencyDesc', handleInvalid='keep') 

forest = RandomForestClassifier(featuresCol='features', labelCol='label', maxDepth=5, impurity='gini', numTrees=100, featureSubsetStrategy='auto')

In [0]:
pipeline = Pipeline(stages=[vector_assembler, scaler, label_encoder, forest])
clf_forest = pipeline.fit(train)
pred = clf_forest.transform(test)

In [0]:
display(pred.select('label').distinct())

label
7.0
4.0
0.0
6.0
2.0
1.0
5.0
3.0
8.0
9.0


In [0]:
display(pred.select('prediction').distinct())

prediction
0.0
7.0
4.0
2.0
6.0
5.0
3.0
8.0
1.0


In [0]:
f1_score =  MulticlassClassificationEvaluator(metricName='f1')
accuracy = MulticlassClassificationEvaluator(metricName='accuracy')

In [0]:
print(f'F1-score of training: {f1_score.evaluate(clf_forest.transform(train))}')
print(f'Accuracy of training: {accuracy.evaluate(clf_forest.transform(train))}')

print(f'F1-score of test: {f1_score.evaluate(pred)}')
print(f'Accuracy of test: {accuracy.evaluate(pred)}')

In [0]:
display(pred.select('TransportationMode', 'label', 'prediction'))

TransportationMode,label,prediction
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0


In [0]:
param_grid = ParamGridBuilder().addGrid(forest.maxDepth, [5, 10, 30])\
                               .addGrid(forest.numTrees, [20, 50, 100, 300]).build()

cv = CrossValidator(estimator=pipeline, estimatorParamMaps=param_grid, evaluator=f1_score, numFolds=5, parallelism=4)
clf_forest_tuned = cv.fit(train)
pred_tuned = clf_forest_tuned.transform(test)

In [0]:
print(f'F1-score of test: {f1_score.evaluate(pred_tuned)}')
print(f'Accuracy of test: {accuracy.evaluate(pred_tuned)}')

In [0]:
display(pred_tuned.select('TransportationMode', 'label', 'prediction'))

TransportationMode,label,prediction
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0
train,7.0,7.0


In [0]:
df_pred = pred.toPandas()
d = df_pred[['TransportationMode', 'label']].drop_duplicates().set_index('label').to_dict()['TransportationMode']
metrics = MulticlassMetrics(pred_tuned.select('label', 'prediction').rdd.map(lambda x: tuple(map(float, x))))
confusion_matrix = metrics.confusionMatrix().toArray()
labels = [d[i] for i in metrics.call('labels')]
pd.DataFrame(confusion_matrix , index=labels, columns=labels)

Unnamed: 0,cat/taxi,airplane,subway,boat,bus,bike,walk,train,run,motorcycle
cat/taxi,36.0,0.0,5.0,0.0,17.0,3.0,4.0,7.0,0.0,0.0
airplane,1.0,68.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
subway,7.0,0.0,57.0,0.0,1.0,2.0,9.0,0.0,1.0,0.0
boat,1.0,1.0,0.0,79.0,0.0,0.0,2.0,0.0,0.0,0.0
bus,11.0,0.0,0.0,0.0,29.0,5.0,10.0,1.0,0.0,0.0
bike,4.0,0.0,5.0,0.0,10.0,66.0,6.0,0.0,1.0,0.0
walk,7.0,1.0,7.0,0.0,20.0,7.0,49.0,0.0,2.0,2.0
train,3.0,3.0,0.0,1.0,4.0,0.0,1.0,82.0,0.0,0.0
run,0.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,22.0,0.0
motorcycle,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0


## Spark Streaming

In [0]:
def preprocessing(data):
    data = data.withColumn('TransportationMode', f.regexp_replace('TransportationMode', 'car|taxi', 'cat/taxi'))
    
    df = data.groupby('ID', 'TransportationMode', f.window('Time', '30 seconds')\
             .alias('Window')).agg(features(f.collect_list('Latitude')[0], f.collect_list('Longitude')[0],
              f.reverse(f.collect_list('Latitude'))[0], f.reverse(f.collect_list('Longitude'))[0],
              f.collect_list('Time')[0], f.reverse(f.collect_list('Time'))[0]).alias('map'))\
             .orderBy('ID', 'Window')

    df = df.withColumn('Distance', df.map['Distance']).withColumn('Time', df.map['Time'])\
           .withColumn('Velocity', df.map['Velocity']).withColumn('Angle', df.map['Angle'])\
           .withColumn('X', df.map['X']).withColumn('Y', df.map['Y'])\
           .withColumn('Z', df.map['Z']).drop('Window', 'map')

    df = df.filter(df['Velocity'] != np.nan) 
    
    return df

In [0]:
custom_schema = t.StructType([t.StructField('Latitude', t.DoubleType(), True),
                              t.StructField('Longitude', t.DoubleType(), True),
                              t.StructField('Altitude', t.IntegerType(), True),
                              t.StructField('Time', t.TimestampType(), True),
                              t.StructField('StartTime', t.TimestampType(), True),
                              t.StructField('EndTime', t.TimestampType(), True),
                              t.StructField('TransportationMode', t.StringType(), True),
                              t.StructField('ID', t.IntegerType(), True)])

stream_data = spark.readStream.schema(custom_schema).option('header', True).option('maxFilesPerTrigger', 1)\
                   .format('csv').load('/FileStore/tables/data.csv')

In [0]:
stream_preprocessed = preprocessing(stream_data)
stream_pred = clf_forest_tuned.transform(stream_preprocessed)

In [0]:
display(stream_pred.select('TransportationMode', 'label', 'prediction'))

TransportationMode,label,prediction
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,0.0
bus,4.0,0.0
bus,4.0,4.0
bus,4.0,0.0
bus,4.0,4.0


In [0]:
display(stream_pred.select('TransportationMode', 'label', 'prediction'))

TransportationMode,label,prediction
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,4.0
bus,4.0,0.0
bus,4.0,0.0
bus,4.0,4.0
bus,4.0,0.0
bus,4.0,4.0
