# Trial Records

This notebook processes autonomous trial telemetry data.
The data from each autonomous trial run is saved in its own CSV file. 

Steps:
1. Create dataframe of models with basic stats by combining trial summary CSV with model history record
1. Create dataframe of each model's consolidated/processed trial data
1. Save everything

In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
N_TRIALS = 5
NODES_PER_LAP = 305

data_dir = '/home/grant/projects/donkeysim-client/data'
history_dir = '../models/model_history.csv'
trial_report_csv = '../data/trial_report.csv'
model_parameters_pickle = '../data/model_parameters.pkl'
trial_stats_pickle = '../data/trial_stats.pkl'

lap_cols = ['steering_angle', 'throttle', 'speed', 'activeNode', 'pos_x', 'pos_z', 'lap']
model_cols = ['batch', 'output', 'scaler',]
telem_cols = ['activeNode', 'pitch', 'pos_x', 'pos_z', 'roll', 'speed', 'yaw',]

In [3]:
history_df = pd.read_csv(history_dir, index_col=0)
history_df.head()

Unnamed: 0,model,history,mae_score,mse_score,r2_score,rmse_score,rmae_score,batch_size,scaler
0,../models/11_06_2021/16_31_26/model_0.h5,"{'loss': [0.11859388649463654, 0.0753109157085...",,,,,,,
1,../models/11_06_2021/16_31_26/model_1.h5,"{'loss': [0.03537946194410324, 0.0303303040564...",,,,,,,
2,../models/11_06_2021/16_31_26/model_2.h5,"{'loss': [0.03537946194410324, 0.0303303040564...",,,,,,,
3,../models/11_06_2021/16_31_26/model_3.h5,"{'loss': [0.0008957930258475244, 0.00078543438...",,,,,,,
4,../models/11_06_2021/16_31_26/model_4.h5,"{'loss': [0.11042173951864243, 0.0588623918592...",,,,,,,


In [4]:
trial_reports = pd.read_csv(trial_report_csv)
trial_reports.head()

Unnamed: 0,id,batch,output,telem,scaler,trial,laps,csv
0,131,128,s,"speed, pitch, yaw, roll, activeNode",mm,1,5,20_40_26
1,131,128,s,"speed, pitch, yaw, roll, activeNode",mm,2,1,22_11_33
2,131,128,s,"speed, pitch, yaw, roll, activeNode",mm,3,0,22_14_01
3,131,128,s,"speed, pitch, yaw, roll, activeNode",mm,4,1,22_57_27
4,131,128,s,"speed, pitch, yaw, roll, activeNode",mm,5,1,22_59_12


In [5]:
trial_reports.dtypes

id         int64
batch      int64
output    object
telem     object
scaler    object
trial      int64
laps       int64
csv       object
dtype: object

### Create DataFrame of Models

In [6]:
model_list = []
for m in trial_reports['id'].unique():
    model = {'id': m}
    for mc in model_cols:
        model[mc] = trial_reports.groupby('id')[mc].unique()[m][0]
    for tc in telem_cols:
        mtcs = str(trial_reports.groupby('id')['telem'].unique()[m])
        model[tc] = (tc in mtcs) * 1
    model_list.append(model)
models = pd.DataFrame(model_list)
models

Unnamed: 0,id,batch,output,scaler,activeNode,pitch,pos_x,pos_z,roll,speed,yaw
0,131,128,s,mm,1,1,0,0,1,1,1
1,141,32,s,ss,1,1,1,1,1,1,1
2,143,128,s,ss,1,1,1,1,1,1,1
3,152,1024,d,mm,1,1,1,1,1,1,1
4,157,512,d,ss,1,1,1,1,1,1,1
5,166,2048,s,ss,0,0,1,1,0,1,1
6,170,128,s,mm,0,0,1,1,0,1,1
7,171,256,s,mm,0,0,1,1,0,1,1
8,172,512,s,mm,0,0,1,1,0,1,1
9,173,1024,s,mm,0,0,1,1,0,1,1


### Process Trial Data
For each model:
* Record model history merics:
* * Mean Absolute Error (mae)
* * Mean Squared Error (mse)
* * Root Meam Sqiared Error (rmse)
* Process trial data
* * Record path to trial's data
* * Calculate total laps over all trials
* * Record lap times
* * Identify and recorded fastest lap, fastest lap time
* * Calculate average speed over all trials
* Create DataFrame with all the above data
* Save DataFrame to pickle

In [7]:
trial_dict = {}
for model_id in models['id'].unique():
    trial_frames_save_file = f'../data/trials/model_{model_id}_trials.pkl'
    trial_dict[model_id] = {}
    trial_frames = []
    model_history = history_df.iloc[model_id]
    for score in ['mae', 'mse',	'rmse']:
        trial_dict[model_id][score] = model_history[f'{score}_score']   
    for trial_id in range(1, N_TRIALS+1):
        trial = {}
        ## Load trial data CSV
        trial_dir = trial_reports.loc[(trial_reports['id']==model_id) & (trial_reports['trial']==trial_id), 'csv'].item()
        trial_path = f'{data_dir}/model_{model_id}/trials/{trial_dir}/data.csv'
        trial_df = pd.read_csv(trial_path) #usecols=['activeNode', 'lap', 'speed', 'time',])
        trial['directory'] = trial_dir
        trial['path'] = trial_path
        ## Nuke the pre-trial lap
        trial_df = trial_df.loc[trial_df['lap'] != 0, :]
        trial_progress = trial_df.groupby('lap')['activeNode'].max().sum()
        trial['laps'] = round(trial_progress/NODES_PER_LAP, 3)
        ## Everything past this point is complete laps only
        trial_df = trial_df.loc[trial_df['lap'] != trial_df['lap'].max(), :]
        if len(trial_df) > 0:
            lap_starts = trial_df.groupby('lap')['time'].min()
            lap_ends = trial_df.groupby('lap')['time'].max()
            lap_times = lap_ends - lap_starts
            trial['lap_times'] = round(lap_times, 3)
            trial['avg_lap'] = round(np.mean(lap_times), 3)
            trial['fastest_lap'] = lap_times.idxmin()
            trial['fastest_time'] = round(lap_times.min(), 3)
            trial['avg_speed'] = round(trial_df['speed'].mean(), 3)
            ## Record the relevant information for plotting 
            trial_df['model_id'] = model_id
            trial_frames.append(trial_df[lap_cols].copy())
        trial_dict[model_id][f'trial_{trial_id}'] = trial
    if len(trial_frames) > 0:
        pd.concat(trial_frames).to_pickle(trial_frames_save_file)

## Save Data

In [8]:
models.to_pickle(model_parameters_pickle)
with open(trial_stats_pickle, 'wb') as pickle_file:
    pickle.dump(trial_dict, pickle_file)