## Laptime2Rank-evaluate-statusmodel-v2

based on: Laptime2Rank-evaluate

+ go beyond curtrack and zerotrack by modeling the track status
+ add pit status modeling


rank prediction by laptime forecasting models

support:
+ train/test split by ratio or event
+ incremental training evaluation(adjust ratio)


In [14]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import mxnet as mx
from mxnet import gluon
import pickle
import json
import random
from scipy import stats
from sklearn.metrics import mean_squared_error
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
from pathlib import Path
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.deep_factor import DeepFactorEstimator
from gluonts.model.deepstate import DeepStateEstimator
from gluonts.trainer import Trainer
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import Evaluator, MultivariateEvaluator
from gluonts.distribution.multivariate_gaussian import MultivariateGaussianOutput
from gluonts.model.predictor import Predictor
from gluonts.model.prophet import ProphetPredictor
from gluonts.model.r_forecast import RForecastPredictor
from indycar.model.NaivePredictor import NaivePredictor
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [22]:
import os
random.seed()
os.getcwd()
#GPUID = 1

'/scratch/hpda/indycar/notebook/12.StatusModel'

In [3]:
def load_data(event, year):
    inputfile = '../data/final/C_'+ event +'-' + year + '-final.csv'
    outputprefix = year +'-' + event + '-'
    dataset = pd.read_csv(inputfile)
    #dataset.info(verbose=True)    
    
    final_lap = max(dataset.completed_laps)
    total_laps = final_lap + 1

    # get records for the cars that finish the race
    completed_car_numbers= dataset[dataset.completed_laps == final_lap].car_number.values
    completed_car_count = len(completed_car_numbers)

    print('count of completed cars:', completed_car_count)
    print('completed cars:', completed_car_numbers)

    #make a copy
    alldata = dataset.copy()
    dataset = dataset[dataset['car_number'].isin(completed_car_numbers)]
    rankdata = alldata.rename_axis('MyIdx').sort_values(by=['elapsed_time','MyIdx'], ascending=True)
    rankdata = rankdata.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')
    
    cldata = make_cl_data(dataset)
    acldata = make_cl_data(alldata)

    return alldata, rankdata, acldata

# make indy car completed_laps dataset
# car_number, completed_laps, rank, elapsed_time, rank_diff, elapsed_time_diff 
def make_cl_data(dataset):

    # pick up data with valid rank
    rankdata = dataset.rename_axis('MyIdx').sort_values(by=['elapsed_time','MyIdx'], ascending=True)
    rankdata = rankdata.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')

    # resort by car_number, lap
    uni_ds = rankdata.sort_values(by=['car_number', 'completed_laps', 'elapsed_time'], ascending=True)    
    #uni_ds = uni_ds.drop(["unique_id", "best_lap", "current_status", "track_status", "lap_status",
    #                  "laps_behind_leade","laps_behind_prec","overall_rank","pit_stop_count",
    #                  "last_pitted_lap","start_position","laps_led"], axis=1)
    
    uni_ds = uni_ds.drop(["unique_id", "best_lap", 
                      "laps_behind_leade","laps_behind_prec","overall_rank","pit_stop_count",
                      "last_pitted_lap","start_position","laps_led"], axis=1)
        
    carnumber = set(uni_ds['car_number'])
    print('cars:', carnumber)
    print('#cars=', len(carnumber))
   
    # faster solution , uni_ds already sorted by car_number and lap
    uni_ds['rank_diff'] = uni_ds['rank'].diff()
    mask = uni_ds.car_number != uni_ds.car_number.shift(1)
    uni_ds['rank_diff'][mask] = 0
    
    uni_ds['time_diff'] = uni_ds['elapsed_time'].diff()
    mask = uni_ds.car_number != uni_ds.car_number.shift(1)
    uni_ds['time_diff'][mask] = 0
    
    #df = uni_ds[['car_number','completed_laps','rank','elapsed_time','rank_diff','time_diff']]
    df = uni_ds[['car_number','completed_laps','rank','elapsed_time',
                 'rank_diff','time_diff',"current_status", "track_status", "lap_status"]]
    
    return df

In [32]:
def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    """

    return np.isnan(y), lambda z: z.nonzero()[0]

def test_flag(a, bitflag):
    return (a & bitflag) ==  bitflag

#
# remove NaN at the tail
# there should be no nans in the middle of the ts
COL_LAPTIME=0
COL_RANK=1
COL_TRACKSTATUS=2
COL_LAPSTATUS=3
COL_TIMEDIFF=4
MODE_ORACLE = 0
MODE_NOLAP = 1
MODE_NOTRACK = 2
MODE_TESTZERO = 4
MODE_TESTCURTRACK = 8

MODE_PREDTRACK = 16
#MODE_STR={MODE_ORACLE:'oracle', MODE_NOLAP:'nolap',MODE_NOTRACK:'notrack',MODE_TEST:'test'}

def make_dataset(runs, prediction_length, freq, 
                       useeid = False,
                       run_ts=COL_LAPTIME, 
                       train_ratio = 0.8,
                       use_global_dict = True,
                       oracle_mode = MODE_ORACLE,
                       test_cars = [],
                       half_moving_win = True 
                ):
    """
    split the ts to train and test part by the ratio
    
    oracle_mode: false to simulate prediction in real by 
        set the covariates of track and lap status as nan in the testset
            
    
    """    
    start = pd.Timestamp("01-01-2019", freq=freq)  # can be different for each time series

    train_set = []
    test_set = []
    
    #select run
    if runs>=0:
        _laptime_data = [laptime_data[runs].copy()]
    else:
        _laptime_data = laptime_data.copy()
    
   
    #_data: eventid, carids, datalist[carnumbers, features, lapnumber]->[laptime, rank, track, lap]]
    for _data in _laptime_data:
        _train = []
        _test = []
        
        #statistics on the ts length
        ts_len = [ _entry.shape[1] for _entry in _data[2]]
        max_len = int(np.max(ts_len))
        train_len = int(np.max(ts_len) * train_ratio)
        
        print(f'====event:{events[_data[0]]}, train_len={train_len}, max_len={np.max(ts_len)}, min_len={np.min(ts_len)}')
                
        # process for each ts
        for rowid in range(_data[2].shape[0]):
            # rec[features, lapnumber] -> [laptime, rank, track_status, lap_status,timediff]]
            rec = _data[2][rowid].copy()
            
            #remove nan(only tails)
            nans, x= nan_helper(rec[run_ts,:])
            nan_count = np.sum(nans)             
            rec = rec[:, ~np.isnan(rec[run_ts,:])]
            
            # remove short ts
            totallen = rec.shape[1]
            if ( totallen < train_len + prediction_length):
                print(f'a short ts: carid={_data[1][rowid]}，len={totallen}')
                continue                
            
            if use_global_dict:
                carno = _data[1][rowid]
                carid = global_carids[_data[1][rowid]]
            else:
                #simulation dataset, todo, fix the carids as decoder
                carno = rowid
                carid = rowid

            #eval on carids
            if test_cars and (carno not in test_cars):
                continue                
            
            if useeid:
                static_cat = [carid, _data[0]]    
            else:
                static_cat = [carid]    
                
            # selection of features
            if test_flag(oracle_mode, MODE_NOTRACK):                
                rec[COL_TRACKSTATUS, :] = 0
            if test_flag(oracle_mode, MODE_NOLAP):                
                rec[COL_LAPSTATUS, :] = 0
                
            # split and add to dataset record
            _train.append({'target': rec[run_ts,:train_len].astype(np.float32), 
                            'start': start, 
                            'feat_static_cat': static_cat,
                            'feat_dynamic_real': [rec[COL_TRACKSTATUS,:train_len],
                                   rec[COL_LAPSTATUS,:train_len]]
                          }
                          )
            
            # multiple test ts(rolling window as half of the prediction_length)
            test_rec_cnt = 0
            step = -int(prediction_length/2) if half_moving_win else -prediction_length
            for endpos in range(max_len, train_len+prediction_length, step):
                
                
                #check if enough for this ts
                if endpos > totallen:
                    continue
                        
                track_rec = rec[COL_TRACKSTATUS, :endpos].copy()
                lap_rec = rec[COL_LAPSTATUS, :endpos].copy()
                
                # test mode
                if test_flag(oracle_mode, MODE_TESTCURTRACK):
                    # since nan does not work, use cur-val instead
                    track_rec[-prediction_length:] = track_rec[-prediction_length - 1]
                    #track_rec[-prediction_length:] = random.randint(0,1)
                    #lap_rec[-prediction_length:] = lap_rec[-prediction_length - 1]
                    lap_rec[-prediction_length:] = 0
                elif test_flag(oracle_mode, MODE_TESTZERO):
                    #set prediction part as nan
                    #track_rec[-prediction_length:] = np.nan
                    #lap_rec[-prediction_length:] = np.nan
                    track_rec[-prediction_length:] = 0
                    lap_rec[-prediction_length:] = 0                    
                
                _test.append({'target': rec[run_ts,:endpos].astype(np.float32), 
                            'start': start, 
                            'feat_static_cat': static_cat,
                            'feat_dynamic_real': [track_rec,lap_rec]
                            #'feat_dynamic_real': [rec[COL_TRACKSTATUS,:endpos],
                            #       rec[COL_LAPSTATUS,:endpos]] 
                             }
                          )   
                test_rec_cnt += 1
            
            #add one ts
            print(f'carno:{carno}, totallen:{totallen}, nancount:{nan_count}, test_reccnt:{test_rec_cnt}')

        train_set.extend(_train)
        test_set.extend(_test)

    print(f'prediction_length:{prediction_length},train len:{len(train_set)}, test len:{len(test_set)}')
    
    train_ds = ListDataset(train_set, freq=freq)
    test_ds = ListDataset(test_set, freq=freq)    
    
    return train_ds, test_ds, train_set, test_set



# endpos -> vector of prediction_length
_track_pred  = {}
_track_true  = {}
def init_track_model():
    global _track_pred,_track_true
    _track_pred = {}
    _track_true  = {}
    
def get_track_model(track_rec, endpos, prediction_length, context_len=10):
    """
    return the predicted track status
    """
    global _track_pred,_track_true
    # this is the perfect track model for Indy500 2018
    track_model = [6,4,4,5,6,6,4]
    if endpos in _track_pred:
        return _track_pred[endpos]
    else:
        #get yflag lap count from the start pred point
        yflaplen = 0
        for i in range(1, context_len):
            if track_rec[- prediction_length - i] == 1:
                yflaplen += 1
            else:
                break
                
        #laps remain, fill into the future
        trackpred = np.array([0 for x in range(prediction_length)])
        
        yflap_pred = random.choice(track_model)
        if yflaplen > 0 and yflap_pred > yflaplen:
            trackpred[:(yflap_pred - yflaplen)] = 1
        _track_pred[endpos] = trackpred
        
        _track_true[endpos]  = track_rec[- prediction_length:].copy()
        
        return trackpred

def make_dataset_byevent(runs, prediction_length, freq, 
                       useeid = False,
                       run_ts=COL_LAPTIME, 
                       test_event = 'Indy500',
                       test_cars = [],  
                       use_global_dict = True,
                       oracle_mode = MODE_ORACLE,
                       half_moving_win = True,
                       train_ratio=0.8
                ):
    """
    split the ts to train and test part by the ratio
    
    oracle_mode: false to simulate prediction in real by 
        set the covariates of track and lap status as nan in the testset
            
    
    """    
    init_track_model()
    
    start = pd.Timestamp("01-01-2019", freq=freq)  # can be different for each time series

    train_set = []
    test_set = []
    
    #select run
    if runs>=0:
        _laptime_data = [laptime_data[runs].copy()]
    else:
        _laptime_data = laptime_data.copy()
    
   
    #_data: eventid, carids, datalist[carnumbers, features, lapnumber]->[laptime, rank, track, lap]]
    for _data in _laptime_data:
        _train = []
        _test = []
        
        if events[_data[0]] == test_event:
            test_mode = True
        
        else:
            test_mode = False
            
            
        #statistics on the ts length
        ts_len = [ _entry.shape[1] for _entry in _data[2]]
        max_len = int(np.max(ts_len))
        train_len = int(np.max(ts_len) * train_ratio)
        
        
        print(f'====event:{events[_data[0]]}, train_len={train_len}, max_len={np.max(ts_len)}, min_len={np.min(ts_len)}')
                
        # process for each ts
        for rowid in range(_data[2].shape[0]):
            # rec[features, lapnumber] -> [laptime, rank, track_status, lap_status,timediff]]
            rec = _data[2][rowid].copy()
            
            #remove nan(only tails)
            nans, x= nan_helper(rec[run_ts,:])
            nan_count = np.sum(nans)             
            rec = rec[:, ~np.isnan(rec[run_ts,:])]
            
            # remove short ts
            totallen = rec.shape[1]
            if ( totallen < train_len + prediction_length):
                print(f'a short ts: carid={_data[1][rowid]}，len={totallen}')
                continue                
            
            if use_global_dict:
                carno = _data[1][rowid]
                carid = global_carids[_data[1][rowid]]
            else:
                #simulation dataset, todo, fix the carids as decoder
                carno = rowid
                carid = rowid
                
            
            if useeid:
                static_cat = [carid, _data[0]]    
            else:
                static_cat = [carid]    
                
            # selection of features
            if test_flag(oracle_mode, MODE_NOTRACK):                
                rec[COL_TRACKSTATUS, :] = 0
            if test_flag(oracle_mode, MODE_NOLAP):                
                rec[COL_LAPSTATUS, :] = 0

            test_rec_cnt = 0
            if not test_mode:
                
                # all go to train set
                _train.append({'target': rec[run_ts,:].astype(np.float32), 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [rec[COL_TRACKSTATUS,:],
                                       rec[COL_LAPSTATUS,:]]
                              }
                              )
            else:
                # reset train_len
                #context_len = prediction_length*2
                #if context_len < 10:
                #    context_len = 10
                
                context_len = train_len
                
                # multiple test ts(rolling window as half of the prediction_length)

                step = -int(prediction_length/2) if half_moving_win else -prediction_length
                
                #bug fix, fixed the split point for all cars/ts
                for endpos in range(max_len, context_len+prediction_length, 
                                    step):

                    #check if enough for this ts
                    if endpos > totallen:
                        continue
                    
                    track_rec = rec[COL_TRACKSTATUS, :endpos].copy()
                    lap_rec = rec[COL_LAPSTATUS, :endpos].copy()

                    # test mode
                    if test_flag(oracle_mode, MODE_TESTCURTRACK):
                        # since nan does not work, use cur-val instead
                        track_rec[-prediction_length:] = track_rec[-prediction_length - 1]
                        #track_rec[-prediction_length:] = random.randint(0,1)
                        #lap_rec[-prediction_length:] = lap_rec[-prediction_length - 1]
                        lap_rec[-prediction_length:] = 0
                    elif test_flag(oracle_mode, MODE_PREDTRACK):
                        predrec = get_track_model(track_rec, endpos, prediction_length)
                        track_rec[-prediction_length:] = predrec
                        lap_rec[-prediction_length:] = 0
                    elif test_flag(oracle_mode, MODE_TESTZERO):
                        #set prediction part as nan
                        #track_rec[-prediction_length:] = np.nan
                        #lap_rec[-prediction_length:] = np.nan
                        track_rec[-prediction_length:] = 0
                        lap_rec[-prediction_length:] = 0                    

                    _test.append({'target': rec[run_ts,:endpos].astype(np.float32), 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [track_rec,lap_rec]
                                #'feat_dynamic_real': [rec[COL_TRACKSTATUS,:endpos],
                                #       rec[COL_LAPSTATUS,:endpos]] 
                                 }
                              )   
                    test_rec_cnt += 1
            
            #add one ts
            print(f'carno:{carno}, totallen:{totallen}, nancount:{nan_count}, test_reccnt:{test_rec_cnt}')

        train_set.extend(_train)
        test_set.extend(_test)

    print(f'train len:{len(train_set)}, test len:{len(test_set)}')
    
    train_ds = ListDataset(train_set, freq=freq)
    test_ds = ListDataset(test_set, freq=freq)    
    
    return train_ds, test_ds, train_set, test_set

def save_dataset(datafile,freq, prediction_length, cardinality, train_ds, test_ds):
    with open(datafile, 'wb') as f:
        #pack [global_carids, laptime_data]
        savedata = [freq, prediction_length, cardinality, train_ds, test_ds]
        #savedata = [freq, train_set, test_set]
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)        

### test for Indy500

In [5]:
def predict(test_ds,predictor):
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )

    forecasts = list(forecast_it)
    tss = list(ts_it)
    print(f'tss len={len(tss)}, forecasts len={len(forecasts)}')
    
    return tss, forecasts

   
def run_prediction(test_ds, prediction_length, trainid):
    with mx.Context(mx.gpu(7)):    
        pred_ret = {}

        rootdir = f'../models/remote/laptime-{trainid}/'
        # deepAR-Oracle
        model_name = 'deepAR-Oracle-curtrack'
        model=f'deepAR-Oracle-laptime-curtrack-indy-f1min-t{prediction_length}-e1000-r1_curtrack_t{prediction_length}'
        modeldir = rootdir + model
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  Predictor.deserialize(Path(modeldir))
        print(f'loading model...done!, ctx:{predictor.ctx}')
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # deepAR-Oracle
        model_name = 'deepAR-Oracle'
        model=f'deepAR-Oracle-laptime-all-indy-f1min-t{prediction_length}-e1000-r1_oracle_t{prediction_length}'
        modeldir = rootdir + model
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  Predictor.deserialize(Path(modeldir))
        print(f'loading model...done!, ctx:{predictor.ctx}')
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # deepAR
        model_name = 'deepAR'
        model=f'deepAR-laptime-all-indy-f1min-t{prediction_length}-e1000-r1_deepar_t{prediction_length}'
        modeldir = rootdir + model
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  Predictor.deserialize(Path(modeldir))
        print(f'loading model...done!, ctx:{predictor.ctx}')
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # naive
        model_name = 'naive'
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  NaivePredictor(freq= freq, prediction_length = prediction_length)
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # arima
        model_name = 'arima'
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  RForecastPredictor(method_name='arima',freq= freq, 
                                        prediction_length = prediction_length,trunc_length=60)
        #tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        return pred_ret    
    
def run_prediction_ex(test_ds, prediction_length, model_name,trainid):
    with mx.Context(mx.gpu(7)):    
        pred_ret = []

        rootdir = f'../models/remote/laptime-{trainid}/'
        # deepAR-Oracle
        if model_name == 'curtrack':
            model=f'deepAR-Oracle-laptime-curtrack-indy-f1min-t{prediction_length}-e1000-r1_curtrack_t{prediction_length}'
            modeldir = rootdir + model
            print(f'predicting model={model_name}, plen={prediction_length}')
            predictor =  Predictor.deserialize(Path(modeldir))
            print(f'loading model...done!, ctx:{predictor.ctx}')
            tss, forecasts = predict(test_ds,predictor)
            pred_ret = [tss, forecasts]

        elif model_name == 'zerotrack':
            model=f'deepAR-Oracle-laptime-nolap-zerotrack-indy-f1min-t{prediction_length}-e1000-r1_zerotrack_t{prediction_length}'
            modeldir = rootdir + model
            print(f'predicting model={model_name}, plen={prediction_length}')
            predictor =  Predictor.deserialize(Path(modeldir))
            print(f'loading model...done!, ctx:{predictor.ctx}')
            tss, forecasts = predict(test_ds,predictor)
            pred_ret = [tss, forecasts]
            
        # deepAR-Oracle
        elif model_name == 'oracle':
            model=f'deepAR-Oracle-laptime-all-indy-f1min-t{prediction_length}-e1000-r1_oracle_t{prediction_length}'
            modeldir = rootdir + model
            print(f'predicting model={model_name}, plen={prediction_length}')
            predictor =  Predictor.deserialize(Path(modeldir))
            print(f'loading model...done!, ctx:{predictor.ctx}')
            tss, forecasts = predict(test_ds,predictor)
            pred_ret = [tss, forecasts]

        # deepAR
        elif model_name == 'deepAR':
            model=f'deepAR-laptime-all-indy-f1min-t{prediction_length}-e1000-r1_deepar_t{prediction_length}'
            modeldir = rootdir + model
            print(f'predicting model={model_name}, plen={prediction_length}')
            predictor =  Predictor.deserialize(Path(modeldir))
            print(f'loading model...done!, ctx:{predictor.ctx}')
            tss, forecasts = predict(test_ds,predictor)
            pred_ret = [tss, forecasts]

        # naive
        elif model_name == 'naive':
            print(f'predicting model={model_name}, plen={prediction_length}')
            predictor =  NaivePredictor(freq= freq, prediction_length = prediction_length)
            tss, forecasts = predict(test_ds,predictor)
            pred_ret = [tss, forecasts]

        # arima
        elif model_name == 'arima':
            print(f'predicting model={model_name}, plen={prediction_length}')
            predictor =  RForecastPredictor(method_name='arima',freq= freq, 
                                            prediction_length = prediction_length,trunc_length=60)
            tss, forecasts = predict(test_ds,predictor)
            pred_ret = [tss, forecasts]
        else:
            print(f'error: model {model_name} not support yet!')

        return pred_ret     

In [6]:
#calc rank
def eval_rank_bytimediff(test_ds,tss,forecasts,prediction_length):
    """
    timediff models
    
    works for one event only
    
    """

    carlist = []

    # carno-lap# -> elapsed_time[] array
    forecasts_et = dict()

    ds_iter =  iter(test_ds)
    for idx in range(len(test_ds)):
        test_rec = next(ds_iter)
        #global carid
        carno = decode_carids[test_rec['feat_static_cat'][0]]
        #print('car no:', carno)

        if carno not in carlist:
            carlist.append(carno)

        # calc elapsed time
        prediction_len = forecasts[idx].samples.shape[1]
        if prediction_length != prediction_len:
            print('error: prediction_len does not match, {prediction_length}:{prediction_len}')
            return []
        
        #forecast_laptime_mean = np.mean(forecasts[idx].samples, axis=0).reshape((prediction_len,1))
        forecast_laptime_mean = np.median(forecasts[idx].samples, axis=0).reshape((prediction_len,1))
        
        timediff_array = tss[idx].values.copy()

        #save the prediction
        completed_laps = len(tss[idx]) - prediction_len + 1
        #print('car no:', carno, 'completed_laps:', completed_laps)
        #key = '%s-%s'%(carno, completed_laps)
        #forecasts_et[key] = elapsed_time[-prediction_len:].copy()
        if completed_laps not in forecasts_et:
            forecasts_et[completed_laps] = {}
        forecasts_et[completed_laps][carno] = [timediff_array[-prediction_len:].copy(),
                                                   forecast_laptime_mean.copy()]


    # calc rank
    rank_ret = []
    for lap in forecasts_et.keys():
        #get car list for this lap
        carlist = list(forecasts_et[lap].keys())
        #print('carlist:', carlist)

        caridmap={key:idx for idx, key in enumerate(carlist)}

        #fill in data
        time_diff = np.zeros((2, len(carlist), prediction_len))
        for carno in carlist:
            carid = caridmap[carno]
            time_diff[0, carid, :] = forecasts_et[lap][carno][0].reshape((prediction_len))
            time_diff[1, carid, :] = forecasts_et[lap][carno][1].reshape((prediction_len))

        #calculate rank    
        idx = np.argsort(time_diff[0], axis=0)
        true_rank = np.argsort(idx, axis=0)

        idx = np.argsort(time_diff[1], axis=0)
        pred_rank = np.argsort(idx, axis=0)

        rank_ret.append([lap, time_diff, true_rank, pred_rank])
        
    return rank_ret,forecasts_et
    
#calc rank
def eval_rank(test_ds,tss,forecasts,prediction_length):
    """
    dependency:
        start_offset[]; for one specific event
    """

    carlist = []

    # carno-lap# -> elapsed_time[] array
    forecasts_et = dict()

    ds_iter =  iter(test_ds)
    for idx in range(len(test_ds)):
        test_rec = next(ds_iter)
        #global carid
        carno = decode_carids[test_rec['feat_static_cat'][0]]
        #print('car no:', carno)

        if carno not in carlist:
            carlist.append(carno)

        #start_offset is global var
        offset = start_offset[(start_offset['car_number']==carno)].elapsed_time.values[0] 
        #print('start_offset:', offset)

        # calc elapsed time
        prediction_len = forecasts[idx].samples.shape[1]
        if prediction_length != prediction_len:
            print('error: prediction_len does not match, {prediction_length}:{prediction_len}')
            return []
        
        forecast_laptime_mean = np.mean(forecasts[idx].samples, axis=0).reshape((prediction_len,1))
        #forecast_laptime_mean = np.median(forecasts[idx].samples, axis=0).reshape((prediction_len,1))

        laptime_array = tss[idx].values.copy()
        elapsed_time = np.cumsum(laptime_array) + offset

        laptime_array = tss[idx].values.copy()
        laptime_array[-prediction_len:] = forecast_laptime_mean 
        elapsed_time_hat = np.cumsum(laptime_array) + offset

        #save the prediction
        completed_laps = len(tss[idx]) - prediction_len + 1
        #print('car no:', carno, 'completed_laps:', completed_laps)
        #key = '%s-%s'%(carno, completed_laps)
        #forecasts_et[key] = elapsed_time[-prediction_len:].copy()
        if completed_laps not in forecasts_et:
            forecasts_et[completed_laps] = {}
        #forecasts_et[completed_laps][carno] = [elapsed_time[-prediction_len-1:].copy(),
        #                                           elapsed_time_hat[-prediction_len-1:].copy()]
        forecasts_et[completed_laps][carno] = [elapsed_time[-prediction_len:].copy(),
                                                   elapsed_time_hat[-prediction_len:].copy()]


    # calc rank
    rank_ret = []
    for lap in forecasts_et.keys():
        #get car list for this lap
        carlist = list(forecasts_et[lap].keys())
        #print('carlist:', carlist)

        caridmap={key:idx for idx, key in enumerate(carlist)}

        #fill in data
        #elapsed_time = np.zeros((2, len(carlist), prediction_len+1))
        elapsed_time = np.zeros((2, len(carlist), prediction_len))
        for carno in carlist:
            carid = caridmap[carno]
            elapsed_time[0, carid, :] = forecasts_et[lap][carno][0]
            elapsed_time[1, carid, :] = forecasts_et[lap][carno][1]

        #calculate rank    
        idx = np.argsort(elapsed_time[0], axis=0)
        true_rank = np.argsort(idx, axis=0)

        idx = np.argsort(elapsed_time[1], axis=0)
        pred_rank = np.argsort(idx, axis=0)

        rank_ret.append([lap, elapsed_time, true_rank, pred_rank])
        
    return rank_ret,forecasts_et
   
def get_acc(rank_ret,prediction_length):    
    # evaluate
    #top1 accuracy
    top1acc = 0
    top1acc_farmost = 0
    top5acc = 0
    top5acc_farmost = 0
    tau = 0
    rmse = 0.
    
    for rec in rank_ret:
        trueRank = rec[2]
        predRank = rec[3]

        #top1 , rank = 0, first col is not prediction
        top1acc += np.sum((trueRank==0) & (predRank==0)) 
        
        top1acc_farmost += np.sum((trueRank[:,-1]==0) & (predRank[:,-1]==0))
        
        #top5
        top5acc += np.sum((trueRank<5) & (predRank<5)) 
        
        top5acc_farmost += np.sum((trueRank[:,-1]<5) & (predRank[:,-1]<5))
        
        # tau
        tao, _ = stats.kendalltau(trueRank, predRank)
        tau += tao
        
        #rmse
        rmse += mean_squared_error(predRank,trueRank)
        

    top1acc = top1acc *1.0/ (len(rank_ret)*prediction_length)
    top1acc_farmost = top1acc_farmost *1.0/ (len(rank_ret))
    top5acc = top5acc *1.0/ (5*len(rank_ret)*prediction_length)
    top5acc_farmost = top5acc_farmost *1.0/ (5*len(rank_ret))
    tau = tau/len(rank_ret)
    rmse = rmse/len(rank_ret)
    
        
    print(f'total:{len(rank_ret)}, prediction_length:{prediction_length}') 
    print('top1acc=', top1acc,
          'top1acc_farmost=', top1acc_farmost,
          'top5acc=', top5acc,
          'top5acc_farmost=', top5acc_farmost,
         )
    print('tau = ', tau,
         'rmse = ', rmse)
    
    return (top1acc,top1acc_farmost,top5acc,top5acc_farmost,tau,rmse)
    
def get_top1acc_farmost(rank_ret,prediction_len):    
    # evaluate
    #top1 accuracy
    hitcnt = 0
    for rec in rank_ret:
        trueRank = rec[2]
        predRank = rec[3]

        #top1 , rank = 0, first col is not prediction
        hitcnt += np.sum((trueRank[:,-1]==0) & (predRank[:,-1]==0)) 

    print('total:', hitcnt, 'top1acc_farmost=', hitcnt *1.0/ (len(rank_ret)*prediction_length))    



In [34]:
def run_exp_old(prediction_length, half_moving_win, train_ratio=0.8):
    ### create test dataset
    test_cars = []
    train_ds, test_ds,_,_ = make_dataset_byevent(events_id[test_event], prediction_length,freq, 
                                         oracle_mode=MODE_TESTCURTRACK,
                                         run_ts = COL_LAPTIME,
                                         test_cars=test_cars,
                                         half_moving_win= half_moving_win,
                                         train_ratio=train_ratio)
    pred_ret = run_prediction(test_ds, prediction_length)

    models = ['deepAR-Oracle','deepAR-Oracle-curtrack','deepAR','naive','arima']
    retdf = []
    for model in models:
        print('model:', model)
        tss, forecasts = pred_ret[model]

        rank_ret,_ = eval_rank(test_ds,tss,forecasts,prediction_length)
        metrics = get_acc(rank_ret,prediction_length)
        ret = [model, prediction_length, half_moving_win]
        ret.extend(metrics)
        retdf.append(ret)
        
    return pred_ret, test_ds, retdf

def run_exp(prediction_length, half_moving_win, train_ratio=0.8, trainid="r0.8"):
    """
    dependency: test_event, test on one event only
    
    """
    
    test_cars = []
    models = ['oracle','deepAR','naive']
    #,'arima']
    retdf = []
    pred_ret = {}
    ds_ret = {}
    
    ### create test dataset
    train_ds, test_ds,_,_ = make_dataset_byevent(events_id[test_event], prediction_length,freq, 
                                         oracle_mode=MODE_ORACLE,
                                         run_ts = COL_LAPTIME,
                                         test_cars=test_cars,
                                         half_moving_win= half_moving_win,
                                         train_ratio=train_ratio)
    
    for model in models:
        print('model:', model)
        tss, forecasts = run_prediction_ex(test_ds, prediction_length, model,trainid=trainid)
        pred_ret[model] = [tss, forecasts]
        ds_ret[model] = test_ds

        rank_ret,_ = eval_rank(test_ds,tss,forecasts,prediction_length)
        metrics = get_acc(rank_ret,prediction_length)
        ret = [model, prediction_length, half_moving_win,trainid]
        ret.extend(metrics)
        retdf.append(ret)
    
    # special model with test_ds
    models = ['curtrack']        
    train_ds, test_ds,_,_ = make_dataset_byevent(events_id[test_event], prediction_length,freq, 
                                         oracle_mode=MODE_TESTCURTRACK,
                                         run_ts = COL_LAPTIME,
                                         test_cars=test_cars,
                                         half_moving_win= half_moving_win,
                                         train_ratio=train_ratio)
    
    for model in models:
        print('model:', model)
        tss, forecasts = run_prediction_ex(test_ds, prediction_length, model,trainid=trainid)
        pred_ret[model] = [tss, forecasts]
        ds_ret[model] = test_ds
        
        rank_ret,_ = eval_rank(test_ds,tss,forecasts,prediction_length)
        metrics = get_acc(rank_ret,prediction_length)
        ret = [model, prediction_length, half_moving_win,trainid]
        ret.extend(metrics)
        retdf.append(ret)

    # zerotrack
    models = ['zerotrack']        
    train_ds, test_ds,_,_ = make_dataset_byevent(events_id[test_event], prediction_length,freq, 
                                         oracle_mode=MODE_TESTZERO,
                                         run_ts = COL_LAPTIME,
                                         test_cars=test_cars,
                                         half_moving_win= half_moving_win,
                                         train_ratio=train_ratio)
    
    for model in models:
        print('model:', model)
        tss, forecasts = run_prediction_ex(test_ds, prediction_length, model,trainid=trainid)
        pred_ret[model] = [tss, forecasts]
        ds_ret[model] = test_ds
        
        rank_ret,_ = eval_rank(test_ds,tss,forecasts,prediction_length)
        metrics = get_acc(rank_ret,prediction_length)
        ret = [model, prediction_length, half_moving_win,trainid]
        ret.extend(metrics)
        retdf.append(ret)
    
        
    return pred_ret, ds_ret, retdf

def run_exp_predtrack(prediction_length, half_moving_win, train_ratio=0.8, trainid="r0.8"):
    """
    dependency: test_event, test on one event only
    
    """
    
    test_cars = []
    models = ['oracle','curtrack','zerotrack']
    #,'arima']
    retdf = []
    pred_ret = {}
    ds_ret = {}
    
    ### create test dataset
    train_ds, test_ds,_,_ = make_dataset_byevent(events_id[test_event], prediction_length,freq, 
                                         oracle_mode=MODE_PREDTRACK,
                                         run_ts = COL_LAPTIME,
                                         test_cars=test_cars,
                                         half_moving_win= half_moving_win,
                                         train_ratio=train_ratio)
    
    for model in models:
        print('model:', model)
        tss, forecasts = run_prediction_ex(test_ds, prediction_length, model,trainid=trainid)
        pred_ret[model] = [tss, forecasts]
        ds_ret[model] = test_ds

        rank_ret,_ = eval_rank(test_ds,tss,forecasts,prediction_length)
        metrics = get_acc(rank_ret,prediction_length)
        ret = [model, prediction_length, half_moving_win,trainid]
        ret.extend(metrics)
        retdf.append(ret)
    
    return pred_ret, ds_ret, retdf

### init

In [8]:
#
# parameters
#
#year = '2017'
year = '2018'
#event = 'Toronto'
#https://www.racing-reference.info/season-stats/2018/O/#
events_totalmiles=[256,500,372,268,500,310]
events_laplen = [1.022,2.5,1.5,0.894,2.5,1.25]
events = ['Phoenix','Indy500','Texas','Iowa','Pocono','Gateway']
#events = ['Gateway']

#events = ['Indy500']
#events = ['Phoenix']
events_id={key:idx for idx, key in enumerate(events)}
#works for only one event


In [9]:
stagedata = {}
global_carids = {}
traindata = None
cur_carid = 0
for event in events:
    #alldata, rankdata, acldata, flagdata
    stagedata[event] = load_data(event, year)
    
    alldata, rankdata, acldata = stagedata[event]
    carlist = set(acldata['car_number'])
    laplist = set(acldata['completed_laps'])
    print('%s: carno=%d, lapnum=%d'%(event, len(carlist), len(laplist)))

    #build the carid map
    for car in carlist:
        if car not in global_carids:
            global_carids[car] = cur_carid
            cur_carid += 1

count of completed cars: 11
completed cars: [ 1  6 27  9 28  5 20 14 15 22 30]
cars: {1, 5, 6, 9, 14, 15, 20, 22, 27, 28, 30}
#cars= 11
cars: {1, 4, 5, 6, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 26, 27, 28, 30, 32, 59, 88, 98}
#cars= 23
Phoenix: carno=23, lapnum=251
count of completed cars: 18
completed cars: [12 20  9 27 28 22 29  1  6 15 66 98  4 88 25 60 64 23]
cars: {64, 1, 66, 98, 4, 6, 9, 12, 60, 15, 20, 22, 23, 88, 25, 27, 28, 29}
#cars= 18
cars: {1, 3, 4, 6, 7, 9, 10, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 59, 60, 64, 66, 88, 98}
#cars= 33
Indy500: carno=33, lapnum=201


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


count of completed cars: 9
completed cars: [ 9 22 27  5 28 15 30 18 10]
cars: {5, 9, 10, 15, 18, 22, 27, 28, 30}
#cars= 9
cars: {1, 3, 4, 5, 6, 7, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 47, 55, 57, 59, 60, 68, 73, 83, 88, 98}
#cars= 32
Texas: carno=32, lapnum=249
count of completed cars: 5
completed cars: [ 5 21 30  1  6]
cars: {1, 5, 6, 21, 30}
#cars= 5
cars: {1, 4, 5, 6, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 26, 27, 28, 30, 59, 88, 98}
#cars= 22
Iowa: carno=22, lapnum=301
count of completed cars: 4
completed cars: [27 12  9 18]
cars: {9, 18, 27, 12}
#cars= 4
cars: {1, 4, 5, 6, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 26, 27, 28, 30, 59, 88, 98}
#cars= 22
Pocono: carno=22, lapnum=201
count of completed cars: 8
completed cars: [12 27  9 22 26 21  1 10]
cars: {1, 9, 10, 12, 21, 22, 26, 27}
#cars= 8
cars: {1, 4, 5, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 26, 27, 28, 30, 59, 88, 98}
#cars= 21
Gateway: carno=21, lapnum=249


In [10]:
event='Indy500'
test_event = event
alldata, rankdata, acldata = stagedata[event]
final_lap = max(rankdata.completed_laps)
completed_car_numbers= rankdata[rankdata.completed_laps == final_lap].car_number.values

start_offset = rankdata[rankdata['completed_laps']==0][['car_number','elapsed_time']]

In [11]:
# start from here
import pickle
with open('laptime_rank_timediff-oracle-%s.pickle'%year, 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    global_carids, laptime_data = pickle.load(f, encoding='latin1')

In [12]:
freq = "1min"
#decode global_carids
decode_carids={carid:carno for carno, carid in global_carids.items()}
    
#useeid = False
#interpolate = False
#ipstr = '-ip' if interpolate else '-noip'
#ipstr = '%s-%s'%('ip' if interpolate else 'noip', 'eid' if useeid else 'noeid')
#if useeid:
#    cardinality = [len(global_carids), len(laptime_data)]
#else:
#    cardinality = [len(global_carids)]

### loop test

In [35]:
#half=[True, False]
half=[False]
#plens=[2,5,10,20,30]
plens=[2,5,10]
#trainids = ["r0.5","r0.6","r0.7"]
trainids = ["r0.7"]
#half=[True,False]
#plens=[2]

#exp_id='median-r0.8'
exp_id='mean-splitbystage-predtrack'

exp_data = []
exp_result = []

for halfmode in half:
    for plen in plens:
        for trainid in trainids:
            print('='*30)
            pred_ret, test_ds, metric_ret = run_exp_predtrack(plen, halfmode, trainid=trainid)

            #save 
            exp_data.append((pred_ret, test_ds))
            exp_result.extend(metric_ret)
            
            #
            print(_track_pred, _track_true)
        
#save result
result = pd.DataFrame(exp_result, columns = ['model' , 'prediction_length', 'halfmode',
                                   'trainid',
                                   'top1acc','top1acc_farmost','top5acc',
                                   'top5acc_farmost','tau','rmse'])
result.to_csv(f'laptime2rank-evaluate-indy500-{exp_id}-result.csv')

INFO:root:Using GPU


====event:Indy500, train_len=160, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:19
a short ts: carid=3，len=146
carno:4, totallen:200, nancount:0, test_reccnt:19
carno:6, totallen:200, nancount:0, test_reccnt:19
carno:7, totallen:193, nancount:7, test_reccnt:15
carno:9, totallen:200, nancount:0, test_reccnt:19
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:19
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:12
carno:15, totallen:200, nancount:0, test_reccnt:19
carno:17, totallen:199, nancount:1, test_reccnt:18
a short ts: carid=18，len=137
carno:19, totallen:199, nancount:1, test_reccnt:18
carno:20, totallen:200, nancount:0, test_reccnt:19
carno:21, totallen:199, nancount:1, test_reccnt:18
carno:22, totallen:200, nancount:0, test_reccnt:19
carno:23, totallen:200, nancount:0, test_reccnt:19
a short ts: carid=24，len=154
carno:25, totallen:200, nancount:0, test_reccnt:19
carno:26, totallen:198, nancount:2, te

INFO:root:Using GPU


total:19, prediction_length:2
top1acc= 0.631578947368421 top1acc_farmost= 0.5789473684210527 top5acc= 0.8315789473684211 top5acc_farmost= 0.7789473684210526
tau =  0.8403812896769554 rmse =  9.150508517670987
model: curtrack
predicting model=curtrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=459, forecasts len=459


INFO:root:Using GPU


total:19, prediction_length:2
top1acc= 0.631578947368421 top1acc_farmost= 0.5789473684210527 top5acc= 0.8157894736842105 top5acc_farmost= 0.7684210526315789
tau =  0.8391154817797029 rmse =  9.520894991100942
model: zerotrack
predicting model=zerotrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=459, forecasts len=459


INFO:root:Using GPU


total:19, prediction_length:2
top1acc= 0.6052631578947368 top1acc_farmost= 0.5263157894736842 top5acc= 0.8421052631578947 top5acc_farmost= 0.7789473684210526
tau =  0.8444542340894474 rmse =  8.523162979913554
{200: array([0, 0]), 198: array([0, 0]), 196: array([0, 0]), 194: array([0, 0]), 192: array([1, 1]), 190: array([1, 1]), 188: array([0, 0]), 186: array([0, 0]), 184: array([0, 0]), 182: array([0, 0]), 180: array([0, 0]), 178: array([0, 0]), 176: array([0, 0]), 174: array([0, 0]), 172: array([0, 0]), 170: array([0, 0]), 168: array([0, 0]), 166: array([0, 0]), 164: array([0, 0])} {200: array([0., 0.]), 198: array([0., 0.]), 196: array([0., 0.]), 194: array([0., 0.]), 192: array([1., 1.]), 190: array([1., 1.]), 188: array([0., 1.]), 186: array([0., 0.]), 184: array([0., 0.]), 182: array([0., 0.]), 180: array([0., 0.]), 178: array([0., 0.]), 176: array([0., 0.]), 174: array([0., 0.]), 172: array([0., 0.]), 170: array([0., 0.]), 168: array([0., 0.]), 166: array([0., 0.]), 164: array([

INFO:root:Using GPU


tss len=165, forecasts len=165
total:7, prediction_length:5
top1acc= 0.42857142857142855 top1acc_farmost= 0.42857142857142855 top5acc= 0.6114285714285714 top5acc_farmost= 0.5142857142857142
tau =  0.6208095260238482 rmse =  25.222005521049002
model: curtrack
predicting model=curtrack, plen=5
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=165, forecasts len=165
total:7, prediction_length:5
top1acc= 0.37142857142857144 top1acc_farmost= 0.2857142857142857 top5acc= 0.6228571428571429 top5acc_farmost= 0.4857142857142857
tau =  0.6473150459656853 rmse =  22.77290545203589
model: zerotrack
predicting model=zerotrack, plen=5
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=165, forecasts len=165
total:7, prediction_length:5
top1acc= 0.42857142857142855 top1acc_farmost= 0.42857142857142855 top5acc= 0.6114285714285714 top5acc_farmost= 0.5428571428571428
tau =  0.609568949266136 rmse =  25.579113871635606
{200: array([0, 0, 0, 0, 0]), 195: array([1, 1, 1, 0, 0]), 190: array([0, 0, 0, 0, 0]), 185: array([0, 0, 0, 0, 0]), 180: array([0, 0, 0, 0, 0]), 175: array([0, 0, 0, 0, 0]), 170: array([0, 0, 0, 0, 0])} {200: array([0., 0., 0., 0., 0.]), 195: array([1., 1., 0., 0., 0.]), 190: array([0., 0., 1., 1., 1.]), 185: array([0., 0., 0., 0., 0.]), 180: array([0., 0., 0., 0., 0.]), 175: array([0., 0., 0., 0., 0.]), 170: array([0., 0., 0., 0., 0.])}
====event:Indy500, train_len=160, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:3
a short ts: carid=3，len=146
carno:4, totallen:200, nancount:0, test_reccnt:3
carno:6, totallen:200, nancount:0, test_reccnt:3
carno:7, totallen:193, nancount:7, test_reccnt:2
carno:9, totallen:200, nancount:

INFO:root:Using GPU


tss len=67, forecasts len=67
total:3, prediction_length:10
top1acc= 0.23333333333333334 top1acc_farmost= 0.0 top5acc= 0.43333333333333335 top5acc_farmost= 0.4
tau =  0.38119232736572894 rmse =  38.03740740740741
model: curtrack
predicting model=curtrack, plen=10
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=67, forecasts len=67
total:3, prediction_length:10
top1acc= 0.1 top1acc_farmost= 0.0 top5acc= 0.36666666666666664 top5acc_farmost= 0.2
tau =  0.2993257364781661 rmse =  42.86866666666666
model: zerotrack
predicting model=zerotrack, plen=10
loading model...done!, ctx:gpu(0)
tss len=67, forecasts len=67
total:3, prediction_length:10
top1acc= 0.3333333333333333 top1acc_farmost= 0.3333333333333333 top5acc= 0.44 top5acc_farmost= 0.3333333333333333
tau =  0.4000076157999432 rmse =  37.32255555555555
{200: array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0]), 190: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 180: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])} {200: array([1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]), 190: array([0., 0., 0., 0., 0., 0., 0., 1., 1., 1.]), 180: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}


In [36]:
half=[True, False]
#plens=[2,5,10,20,30]
plens=[2,5,10]
half=[False]
trainids = ["indy500-r0.2","indy500-r0.4","indy500"]
#trainids = ["r0.5","r0.6"]
#half=[True,False]
#plens=[2]

#exp_id='median-r0.8'
exp_id='mean-splitbyevent-predtrack'

exp_data = []
exp_result = []

for halfmode in half:
    for plen in plens:
        for trainid in trainids:
            print('='*30)
            pred_ret, test_ds, metric_ret = run_exp_predtrack(plen, halfmode, 
                                                    train_ratio=0.4,
                                                    trainid=trainid)

            #save 
            exp_data.append((pred_ret, test_ds))
            exp_result.extend(metric_ret)
        
#save result
result = pd.DataFrame(exp_result, columns = ['model' , 'prediction_length', 'halfmode',
                                   'trainid',
                                   'top1acc','top1acc_farmost','top5acc',
                                   'top5acc_farmost','tau','rmse'])
result.to_csv(f'laptime2rank-evaluate-indy500-{exp_id}-result.csv')

INFO:root:Using GPU


====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:59
carno:3, totallen:146, nancount:54, test_reccnt:32
carno:4, totallen:200, nancount:0, test_reccnt:59
carno:6, totallen:200, nancount:0, test_reccnt:59
carno:7, totallen:193, nancount:7, test_reccnt:55
carno:9, totallen:200, nancount:0, test_reccnt:59
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:59
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:52
carno:15, totallen:200, nancount:0, test_reccnt:59
carno:17, totallen:199, nancount:1, test_reccnt:58
carno:18, totallen:137, nancount:63, test_reccnt:27
carno:19, totallen:199, nancount:1, test_reccnt:58
carno:20, totallen:200, nancount:0, test_reccnt:59
carno:21, totallen:199, nancount:1, test_reccnt:58
carno:22, totallen:200, nancount:0, test_reccnt:59
carno:23, totallen:200, nancount:0, test_reccnt:59
carno:24, totallen:154, nancount:46, test_reccnt:36
carno:25, totallen:20

INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.6440677966101694 top1acc_farmost= 0.5932203389830508 top5acc= 0.7932203389830509 top5acc_farmost= 0.7389830508474576
tau =  0.8235096496581644 rmse =  12.496194860031538
model: curtrack
predicting model=curtrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=1568, forecasts len=1568


INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.635593220338983 top1acc_farmost= 0.576271186440678 top5acc= 0.8135593220338984 top5acc_farmost= 0.7627118644067796
tau =  0.8352795482808256 rmse =  11.703664070988037
model: zerotrack
predicting model=zerotrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=1568, forecasts len=1568


INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.6610169491525424 top1acc_farmost= 0.576271186440678 top5acc= 0.8220338983050848 top5acc_farmost= 0.7694915254237288
tau =  0.8277503291990825 rmse =  11.333107768873784
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:59
carno:3, totallen:146, nancount:54, test_reccnt:32
carno:4, totallen:200, nancount:0, test_reccnt:59
carno:6, totallen:200, nancount:0, test_reccnt:59
carno:7, totallen:193, nancount:7, test_reccnt:55
carno:9, totallen:200, nancount:0, test_reccnt:59
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:59
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:52
carno:15, totallen:200, nancount:0, test_reccnt:59
carno:17, totallen:199, nancount:1, test_reccnt:58
carno:18, totallen:137, nancount:63, test_reccnt:27
carno:19, totallen:199, nancount:1, test_reccnt:58
carno:20, totallen:200, nancount:0, test_reccnt:59
carno:21, totall

INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.6101694915254238 top1acc_farmost= 0.5254237288135594 top5acc= 0.7983050847457627 top5acc_farmost= 0.7322033898305085
tau =  0.827641609796128 rmse =  12.222393380770756
model: curtrack
predicting model=curtrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=1568, forecasts len=1568


INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.635593220338983 top1acc_farmost= 0.559322033898305 top5acc= 0.8254237288135593 top5acc_farmost= 0.7627118644067796
tau =  0.8373302073716012 rmse =  11.499896095597821
model: zerotrack
predicting model=zerotrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=1568, forecasts len=1568


INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.5508474576271186 top1acc_farmost= 0.4406779661016949 top5acc= 0.8016949152542373 top5acc_farmost= 0.735593220338983
tau =  0.8325035521514413 rmse =  11.04832662904284
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:59
carno:3, totallen:146, nancount:54, test_reccnt:32
carno:4, totallen:200, nancount:0, test_reccnt:59
carno:6, totallen:200, nancount:0, test_reccnt:59
carno:7, totallen:193, nancount:7, test_reccnt:55
carno:9, totallen:200, nancount:0, test_reccnt:59
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:59
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:52
carno:15, totallen:200, nancount:0, test_reccnt:59
carno:17, totallen:199, nancount:1, test_reccnt:58
carno:18, totallen:137, nancount:63, test_reccnt:27
carno:19, totallen:199, nancount:1, test_reccnt:58
carno:20, totallen:200, nancount:0, test_reccnt:59
carno:21, totalle

INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.6440677966101694 top1acc_farmost= 0.5932203389830508 top5acc= 0.8169491525423729 top5acc_farmost= 0.7762711864406779
tau =  0.8286163367059097 rmse =  11.799994406834164
model: curtrack
predicting model=curtrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=1568, forecasts len=1568


INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.5677966101694916 top1acc_farmost= 0.4915254237288136 top5acc= 0.8152542372881356 top5acc_farmost= 0.7593220338983051
tau =  0.8197925093534516 rmse =  12.129067986184465
model: zerotrack
predicting model=zerotrack, plen=2
loading model...done!, ctx:gpu(0)
tss len=1568, forecasts len=1568


INFO:root:Using GPU


total:59, prediction_length:2
top1acc= 0.576271186440678 top1acc_farmost= 0.5084745762711864 top5acc= 0.8288135593220339 top5acc_farmost= 0.7627118644067796
tau =  0.8370114320143691 rmse =  10.633894081135587
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:23
carno:3, totallen:146, nancount:54, test_reccnt:12
carno:4, totallen:200, nancount:0, test_reccnt:23
carno:6, totallen:200, nancount:0, test_reccnt:23
carno:7, totallen:193, nancount:7, test_reccnt:21
carno:9, totallen:200, nancount:0, test_reccnt:23
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:23
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:20
carno:15, totallen:200, nancount:0, test_reccnt:23
carno:17, totallen:199, nancount:1, test_reccnt:22
carno:18, totallen:137, nancount:63, test_reccnt:10
carno:19, totallen:199, nancount:1, test_reccnt:22
carno:20, totallen:200, nancount:0, test_reccnt:23
carno:21, totall

INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.5043478260869565 top1acc_farmost= 0.34782608695652173 top5acc= 0.648695652173913 top5acc_farmost= 0.5391304347826087
tau =  0.6919044251839813 rmse =  25.45737211284998
model: curtrack
predicting model=curtrack, plen=5
loading model...done!, ctx:gpu(0)
tss len=605, forecasts len=605


INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.5304347826086957 top1acc_farmost= 0.43478260869565216 top5acc= 0.648695652173913 top5acc_farmost= 0.5304347826086957
tau =  0.6896013683653525 rmse =  25.36454974413308
model: zerotrack
predicting model=zerotrack, plen=5
loading model...done!, ctx:gpu(0)
tss len=605, forecasts len=605


INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.4434782608695652 top1acc_farmost= 0.34782608695652173 top5acc= 0.5947826086956521 top5acc_farmost= 0.48695652173913045
tau =  0.6428428996398322 rmse =  29.614726226577215
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:23
carno:3, totallen:146, nancount:54, test_reccnt:12
carno:4, totallen:200, nancount:0, test_reccnt:23
carno:6, totallen:200, nancount:0, test_reccnt:23
carno:7, totallen:193, nancount:7, test_reccnt:21
carno:9, totallen:200, nancount:0, test_reccnt:23
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:23
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:20
carno:15, totallen:200, nancount:0, test_reccnt:23
carno:17, totallen:199, nancount:1, test_reccnt:22
carno:18, totallen:137, nancount:63, test_reccnt:10
carno:19, totallen:199, nancount:1, test_reccnt:22
carno:20, totallen:200, nancount:0, test_reccnt:23
carno:21, tot

INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.4608695652173913 top1acc_farmost= 0.34782608695652173 top5acc= 0.6417391304347826 top5acc_farmost= 0.5304347826086957
tau =  0.6916929079033138 rmse =  24.901920250417874
model: curtrack
predicting model=curtrack, plen=5
loading model...done!, ctx:gpu(0)
tss len=605, forecasts len=605


INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.4 top1acc_farmost= 0.30434782608695654 top5acc= 0.64 top5acc_farmost= 0.5304347826086957
tau =  0.6789276279362255 rmse =  26.13912340808945
model: zerotrack
predicting model=zerotrack, plen=5
loading model...done!, ctx:gpu(0)
tss len=605, forecasts len=605


INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.4434782608695652 top1acc_farmost= 0.34782608695652173 top5acc= 0.6104347826086957 top5acc_farmost= 0.46956521739130436
tau =  0.6462941509910302 rmse =  29.051680606211736
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:23
carno:3, totallen:146, nancount:54, test_reccnt:12
carno:4, totallen:200, nancount:0, test_reccnt:23
carno:6, totallen:200, nancount:0, test_reccnt:23
carno:7, totallen:193, nancount:7, test_reccnt:21
carno:9, totallen:200, nancount:0, test_reccnt:23
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:23
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:20
carno:15, totallen:200, nancount:0, test_reccnt:23
carno:17, totallen:199, nancount:1, test_reccnt:22
carno:18, totallen:137, nancount:63, test_reccnt:10
carno:19, totallen:199, nancount:1, test_reccnt:22
carno:20, totallen:200, nancount:0, test_reccnt:23
carno:21, tot

INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.4434782608695652 top1acc_farmost= 0.30434782608695654 top5acc= 0.6382608695652174 top5acc_farmost= 0.5043478260869565
tau =  0.6783989895726397 rmse =  26.596998027980753
model: curtrack
predicting model=curtrack, plen=5
loading model...done!, ctx:gpu(0)
tss len=605, forecasts len=605


INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.4260869565217391 top1acc_farmost= 0.30434782608695654 top5acc= 0.6034782608695652 top5acc_farmost= 0.45217391304347826
tau =  0.6624399753243858 rmse =  28.023693773116634
model: zerotrack
predicting model=zerotrack, plen=5
loading model...done!, ctx:gpu(0)
tss len=605, forecasts len=605


INFO:root:Using GPU


total:23, prediction_length:5
top1acc= 0.46956521739130436 top1acc_farmost= 0.30434782608695654 top5acc= 0.6052173913043478 top5acc_farmost= 0.45217391304347826
tau =  0.6405089619167388 rmse =  29.95879925122113
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:11
carno:3, totallen:146, nancount:54, test_reccnt:5
carno:4, totallen:200, nancount:0, test_reccnt:11
carno:6, totallen:200, nancount:0, test_reccnt:11
carno:7, totallen:193, nancount:7, test_reccnt:10
carno:9, totallen:200, nancount:0, test_reccnt:11
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:11
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:9
carno:15, totallen:200, nancount:0, test_reccnt:11
carno:17, totallen:199, nancount:1, test_reccnt:10
carno:18, totallen:137, nancount:63, test_reccnt:4
carno:19, totallen:199, nancount:1, test_reccnt:10
carno:20, totallen:200, nancount:0, test_reccnt:11
carno:21, totall

INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.4 top1acc_farmost= 0.18181818181818182 top5acc= 0.52 top5acc_farmost= 0.36363636363636365
tau =  0.5162021258379578 rmse =  39.53412120574189
model: curtrack
predicting model=curtrack, plen=10
loading model...done!, ctx:gpu(0)
tss len=284, forecasts len=284


INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.4090909090909091 top1acc_farmost= 0.2727272727272727 top5acc= 0.5109090909090909 top5acc_farmost= 0.36363636363636365
tau =  0.5044694864062357 rmse =  40.796390263631636
model: zerotrack
predicting model=zerotrack, plen=10
loading model...done!, ctx:gpu(0)
tss len=284, forecasts len=284


INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.3090909090909091 top1acc_farmost= 0.18181818181818182 top5acc= 0.5254545454545455 top5acc_farmost= 0.38181818181818183
tau =  0.5096126185410426 rmse =  40.47960893640204
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:11
carno:3, totallen:146, nancount:54, test_reccnt:5
carno:4, totallen:200, nancount:0, test_reccnt:11
carno:6, totallen:200, nancount:0, test_reccnt:11
carno:7, totallen:193, nancount:7, test_reccnt:10
carno:9, totallen:200, nancount:0, test_reccnt:11
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:11
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:9
carno:15, totallen:200, nancount:0, test_reccnt:11
carno:17, totallen:199, nancount:1, test_reccnt:10
carno:18, totallen:137, nancount:63, test_reccnt:4
carno:19, totallen:199, nancount:1, test_reccnt:10
carno:20, totallen:200, nancount:0, test_reccnt:11
carno:21, totall

INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.3090909090909091 top1acc_farmost= 0.18181818181818182 top5acc= 0.54 top5acc_farmost= 0.34545454545454546
tau =  0.5142074321744927 rmse =  40.19380899049864
model: curtrack
predicting model=curtrack, plen=10
loading model...done!, ctx:gpu(0)
tss len=284, forecasts len=284


INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.32727272727272727 top1acc_farmost= 0.18181818181818182 top5acc= 0.5054545454545455 top5acc_farmost= 0.2909090909090909
tau =  0.5013490920608921 rmse =  41.15535397935398
model: zerotrack
predicting model=zerotrack, plen=10
loading model...done!, ctx:gpu(0)
tss len=284, forecasts len=284


INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.2909090909090909 top1acc_farmost= 0.09090909090909091 top5acc= 0.4490909090909091 top5acc_farmost= 0.3090909090909091
tau =  0.48638465962868477 rmse =  42.11722900088417
====event:Indy500, train_len=80, max_len=200, min_len=200
carno:1, totallen:200, nancount:0, test_reccnt:11
carno:3, totallen:146, nancount:54, test_reccnt:5
carno:4, totallen:200, nancount:0, test_reccnt:11
carno:6, totallen:200, nancount:0, test_reccnt:11
carno:7, totallen:193, nancount:7, test_reccnt:10
carno:9, totallen:200, nancount:0, test_reccnt:11
a short ts: carid=10，len=57
carno:12, totallen:200, nancount:0, test_reccnt:11
a short ts: carid=13，len=67
carno:14, totallen:187, nancount:13, test_reccnt:9
carno:15, totallen:200, nancount:0, test_reccnt:11
carno:17, totallen:199, nancount:1, test_reccnt:10
carno:18, totallen:137, nancount:63, test_reccnt:4
carno:19, totallen:199, nancount:1, test_reccnt:10
carno:20, totallen:200, nancount:0, test_reccnt:11
carno:21, totall

INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.4 top1acc_farmost= 0.2727272727272727 top5acc= 0.5436363636363636 top5acc_farmost= 0.36363636363636365
tau =  0.5124403574824865 rmse =  40.520312692416134
model: curtrack
predicting model=curtrack, plen=10
loading model...done!, ctx:gpu(0)
tss len=284, forecasts len=284


INFO:root:Using GPU


total:11, prediction_length:10
top1acc= 0.37272727272727274 top1acc_farmost= 0.2727272727272727 top5acc= 0.5436363636363636 top5acc_farmost= 0.36363636363636365
tau =  0.5490978206772633 rmse =  37.0515596025941
model: zerotrack
predicting model=zerotrack, plen=10
loading model...done!, ctx:gpu(0)
tss len=284, forecasts len=284
total:11, prediction_length:10
top1acc= 0.32727272727272727 top1acc_farmost= 0.18181818181818182 top5acc= 0.5345454545454545 top5acc_farmost= 0.32727272727272727
tau =  0.5281452096028603 rmse =  38.67680725149691


### test

In [16]:
result[result["prediction_length"]==2]

Unnamed: 0,model,prediction_length,halfmode,trainid,top1acc,top1acc_farmost,top5acc,top5acc_farmost,tau,rmse
0,oracle,2,True,r0.5,0.697368,0.631579,0.810526,0.784211,0.856538,6.495403
1,deepAR,2,True,r0.5,0.381579,0.315789,0.681579,0.615789,0.713712,16.4469
2,naive,2,True,r0.5,0.618421,0.578947,0.813158,0.768421,0.795573,14.307786
3,curtrack,2,True,r0.5,0.473684,0.368421,0.789474,0.742105,0.81207,10.850776
4,zerotrack,2,True,r0.5,0.473684,0.447368,0.802632,0.752632,0.808004,10.236402
5,oracle,2,True,r0.6,0.789474,0.789474,0.868421,0.831579,0.887596,5.271319
6,deepAR,2,True,r0.6,0.473684,0.368421,0.739474,0.678947,0.782108,13.329318
7,naive,2,True,r0.6,0.618421,0.578947,0.813158,0.768421,0.795573,14.307786
8,curtrack,2,True,r0.6,0.605263,0.552632,0.807895,0.757895,0.837663,9.677535
9,zerotrack,2,True,r0.6,0.605263,0.526316,0.797368,0.742105,0.826675,9.016


In [None]:
tss, forecasts = exp_data[0][0]['naive']

In [None]:
forecasts[0].samples.shape

In [None]:
[x[3][:,:].shape for x in rank_ret]

In [None]:
rank_ret[x][2][:,:]

In [None]:
rank_ret[1][3][:,:]

In [None]:
pred_ret.keys()

In [None]:
forecasts[0].samples.shape

In [None]:
start_offset

In [None]:
result[result['prediction_length']==10]

In [27]:
_track_pred

{200: array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0]),
 190: array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
 180: array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0])}

In [26]:
_track_true

{200: array([1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]),
 190: array([1., 1., 1., 1., 1., 1., 0., 0., 0., 0.]),
 180: array([1., 1., 1., 1., 1., 1., 0., 0., 0., 0.])}