In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [4]:
import os
os.environ["OMP_NUM_THREADS"] = "1"

import matplotlib.pyplot as plt
from PIL import Image

from lyft_dataset_sdk.lyftdataset import LyftDataset
from lyft_dataset_sdk.utils.data_classes import LidarPointCloud, Box, Quaternion
from lyft_dataset_sdk.utils.geometry_utils import view_points, transform_matrix

In [5]:
ARTIFACTS_FOLDER = "./artifacts"
level5data = LyftDataset(data_path='.', json_path='train_data', verbose=True)
os.makedirs(ARTIFACTS_FOLDER, exist_ok=True)

9 category,
18 attribute,
4 visibility,
18421 instance,
10 sensor,
148 calibrated_sensor,
177789 ego_pose,
180 log,
180 scene,
22680 sample,
189504 sample_data,
638179 sample_annotation,
1 map,
Done loading in 12.0 seconds.
Reverse indexing ...
Done reverse indexing in 3.3 seconds.


In [6]:
classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian",
           "other_vehicle", "animal", "emergency_vehicle"]

In [7]:
trainData = pd.read_csv('train.csv')
trainData.info()
trainData.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22680 entries, 0 to 22679
Data columns (total 2 columns):
Id                  22680 non-null object
PredictionString    22680 non-null object
dtypes: object(2)
memory usage: 354.5+ KB


Unnamed: 0,Id,PredictionString
0,db8b47bd4ebdf3b3fb21598bb41bd8853d12f8d2ef25ce...,2680.2830359778527 698.1969292852777 -18.04776...
1,edf37c1fb1024ba0c1f53ebbf10b6797f781199a9f0e4e...,1208.642684768659 1585.1812946970927 -23.56236...
2,1841b7895e7163a2c1be87e8c1740e759a910cd59157b8...,848.9346793681768 2587.2870557176507 -20.15995...
3,a970c764e7abd7ebc2cc4599a6e3ae58c9773ea3c9cf1d...,1994.4320280494098 1128.30164336183 -18.054752...
4,b909de93ba19460eeea58074ccc1ef6bb302435f37b48c...,1030.3877156973385 1667.13131694941 -23.764820...


In [8]:
idLst = list()
trainRow = 0

predictionDf = pd.DataFrame(trainData.PredictionString.str.split(expand = True))
predDf = pd.DataFrame(predictionDf.values.reshape(-1, 8), 
             columns=['center_x','center_y','center_z','width','length','height','yaw','class_name'])

for ind, row in trainData.iterrows():
    for i in range(130):
        idLst.append(trainData.iloc[trainRow,0])
    trainRow += 1

df = pd.concat([pd.DataFrame(idLst, columns=['id']), predDf],axis=1)
df.dropna(inplace= True)
df.head()

Unnamed: 0,id,center_x,center_y,center_z,width,length,height,yaw,class_name
0,db8b47bd4ebdf3b3fb21598bb41bd8853d12f8d2ef25ce...,2680.2830359778527,698.1969292852777,-18.04776692365821,2.064,5.488,2.053,2.6041643845397946,car
1,db8b47bd4ebdf3b3fb21598bb41bd8853d12f8d2ef25ce...,2691.997461646401,660.8016536569899,-18.674258695658377,1.818,4.57,1.608,-0.3351760246848698,car
2,db8b47bd4ebdf3b3fb21598bb41bd8853d12f8d2ef25ce...,2713.6075009338388,694.4034809694599,-18.589971933264334,1.779,4.992,1.62,2.579455758321168,car
3,db8b47bd4ebdf3b3fb21598bb41bd8853d12f8d2ef25ce...,2679.986916931015,706.9101495091695,-18.349594424165826,1.798,3.903,1.722,2.5861656766206997,car
4,db8b47bd4ebdf3b3fb21598bb41bd8853d12f8d2ef25ce...,2659.352095715659,719.4174623706303,-18.442998898501283,1.936,4.427,1.921,2.601798964714126,car


In [9]:
sample = pd.DataFrame(level5data.sample_data)
sample = sample[['filename','sample_token','token']][sample.fileformat == 'jpeg'].drop_duplicates()
sample.head()

Unnamed: 0,filename,sample_token,token
2,images/host-a007_cam6_1234740264650905006.jpeg,254cea140f7d14fc86e73f789794fb73f81c05831d40f3...,454c65ee6c297d64b17852b4c56def9781a84dcb8558a8...
3,images/host-a007_cam3_1230936242299360006.jpeg,810503d8ca51e9692021ecb56e2b2f2098beaf396cd207...,779018e1f92debcf5c5c28e9577677c6595679780bdf9b...
4,images/host-a004_cam2_1233685223917652006.jpeg,119b8c4bbaf1493bebaaa7d823934a8a3f5a80fb095630...,d966960819eb0aca2883cd3c8d8c5e2540bda952a8cada...
5,images/host-a101_cam5_1242748828932442006.jpeg,32a71cf987e8922a85d052bfcc477e19c56733f453bc94...,c38472dca62728a33c15da9de3bc21aa57a063881aafaa...
6,images/host-a011_cam5_1233090640267719006.jpeg,d1b36f15c13de84d38a795dbbc2fe42a848f5f83968d42...,0cdaef801e20d2d917dd35719c224447c6eff899dfda55...


In [10]:
annot = pd.DataFrame(level5data.sample_annotation)
annot['center_x'] = annot.translation.str[0]
annot['center_y'] = annot.translation.str[1]
annot['center_z'] = annot.translation.str[2]
annot = annot[['sample_token','token','category_name','center_x','center_y','center_z']]
annot.head()

Unnamed: 0,sample_token,token,category_name,center_x,center_y,center_z
0,db8b47bd4ebdf3b3fb21598bb41bd8853d12f8d2ef25ce...,92bff46db1dbfc9679edc8091770c4256ac3c027e9f0a9...,car,2680.283036,698.196929,-18.047767
1,edf37c1fb1024ba0c1f53ebbf10b6797f781199a9f0e4e...,04c4c9883de582c5c4e7a45273a978a52b0432de18883f...,car,1208.642685,1585.181295,-23.562364
2,1841b7895e7163a2c1be87e8c1740e759a910cd59157b8...,4226ce1636193ced735e21ff3c6dcc292415c72d0f84b4...,car,848.934679,2587.287056,-20.159957
3,a970c764e7abd7ebc2cc4599a6e3ae58c9773ea3c9cf1d...,f2446d4ca4b3e79e982cf0a4134af12e71e7b59df42f42...,car,1994.432028,1128.301643,-18.054752
4,b909de93ba19460eeea58074ccc1ef6bb302435f37b48c...,1f5bdec4a1dff45dbbb4db094445dd4051eb39c9b4a2a3...,car,1030.387716,1667.131317,-23.76482
