## QuickTest

    makedb laptime
    makedb gluonts
    train model
    evaluate model


In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os,sys
import random
import mxnet as mx
from mxnet import gluon
import pickle
import json
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

from pathlib import Path

from gluonts.model.deepar import DeepAREstimator
from gluonts.model.deep_factor import DeepFactorEstimator
from gluonts.model.deepstate import DeepStateEstimator
from gluonts.trainer import Trainer
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import Evaluator, MultivariateEvaluator
from gluonts.model.predictor import Predictor
from gluonts.model.prophet import ProphetPredictor
from gluonts.model.r_forecast import RForecastPredictor
from gluonts.dataset.util import to_pandas

from gluonts.distribution.neg_binomial import NegativeBinomialOutput
from gluonts.distribution.student_t import StudentTOutput
from gluonts.distribution.multivariate_gaussian import MultivariateGaussianOutput

from indycar.model.NaivePredictor import NaivePredictor
from indycar.model.deeparw import DeepARWeightEstimator

import indycar.model.stint_simulator_shortterm_pitmodel as stint

INFO:root:Using GPU
INFO:root:Using GPU
INFO:root:Using GPU
INFO:root:Using GPU
INFO:root:Using GPU
INFO:root:Using GPU


In [2]:
# make indy car completed_laps dataset
# car_number, completed_laps, rank, elapsed_time, rank_diff, elapsed_time_diff 
def make_cl_data(dataset):

    # pick up data with valid rank
    rankdata = dataset.rename_axis('MyIdx').sort_values(by=['elapsed_time','MyIdx'], ascending=True)
    rankdata = rankdata.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')

    # resort by car_number, lap
    uni_ds = rankdata.sort_values(by=['car_number', 'completed_laps', 'elapsed_time'], ascending=True)    
    #uni_ds = uni_ds.drop(["unique_id", "best_lap", "current_status", "track_status", "lap_status",
    #                  "laps_behind_leade","laps_behind_prec","overall_rank","pit_stop_count",
    #                  "last_pitted_lap","start_position","laps_led"], axis=1)
    
    uni_ds = uni_ds.drop(["unique_id", "best_lap", 
                      "laps_behind_leade","laps_behind_prec","overall_rank","pit_stop_count",
                      "last_pitted_lap","start_position","laps_led"], axis=1)
        
    carnumber = set(uni_ds['car_number'])
    print('cars:', carnumber)
    print('#cars=', len(carnumber))
   
    # faster solution , uni_ds already sorted by car_number and lap
    uni_ds['rank_diff'] = uni_ds['rank'].diff()
    mask = uni_ds.car_number != uni_ds.car_number.shift(1)
    uni_ds['rank_diff'][mask] = 0
    
    uni_ds['time_diff'] = uni_ds['elapsed_time'].diff()
    mask = uni_ds.car_number != uni_ds.car_number.shift(1)
    uni_ds['time_diff'][mask] = 0
    
    #df = uni_ds[['car_number','completed_laps','rank','elapsed_time','rank_diff','time_diff']]
    #df = uni_ds[['car_number','completed_laps','rank',
    #             'rank_diff','time_diff',"current_status", "track_status", "lap_status",'elapsed_time']]
    
    df = uni_ds[['car_number','completed_laps','time_diff','rank','track_status', 'lap_status','elapsed_time']]
    
    return df

def make_lapstatus_data(dataset):
    final_lap = max(dataset.completed_laps)
    total_laps = final_lap + 1

    # get records for the cars that finish the race
    completed_car_numbers= dataset[dataset.completed_laps == final_lap].car_number.values
    completed_car_count = len(completed_car_numbers)

    print('count of completed cars:', completed_car_count)
    print('completed cars:', completed_car_numbers)
    
    #pick up one of them
    onecar = dataset[dataset['car_number']==completed_car_numbers[0]]
    onecar = onecar.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')
    return onecar[['completed_laps','track_status']]

def load_data(event, year=0):
    #inputfile = '../data/final/C_'+ event +'-' + year + '-final.csv'
    if year>0:
        inputfile = '../data/final/C_'+ event +'-' + year + '.csv'
    else:
        inputfile = '../data/final/C_'+ event +'.csv'
    
    #outputprefix = year +'-' + event + '-'
    dataset = pd.read_csv(inputfile)
    #dataset.info(verbose=True)    
    
    final_lap = max(dataset.completed_laps)
    total_laps = final_lap + 1

    # get records for the cars that finish the race
    completed_car_numbers= dataset[dataset.completed_laps == final_lap].car_number.values
    completed_car_count = len(completed_car_numbers)

    print('count of completed cars:', completed_car_count)
    print('completed cars:', completed_car_numbers)

    #make a copy
    alldata = dataset.copy()
    dataset = dataset[dataset['car_number'].isin(completed_car_numbers)]
    rankdata = alldata.rename_axis('MyIdx').sort_values(by=['elapsed_time','MyIdx'], ascending=True)
    rankdata = rankdata.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')
    
    cldata = make_cl_data(dataset)
    flagdata = make_lapstatus_data(dataset)
    acldata = make_cl_data(alldata)

    return alldata, rankdata, acldata, flagdata

def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    """

    return np.isnan(y), lambda z: z.nonzero()[0]

def get_lap2nextpit(lap_status, maxlap=200):
    """
    input:
        lapstatus  ; array of 0/1 indicating pitstops for each lap, nan means incomplete race
        maxlap     ; the max lap number of the race
    output:
        lap2nextpit ; array of the lap gap to the next pit for each lap
    
    """
    
    #pitstops = np.where(lap_status==1)[0]
                    
    pitstops = list(np.where(lap_status==1)[0])
    #if not len(lap_status) < maxlap:
    nans, x= nan_helper(lap_status)
    nan_count = np.sum(nans)      
    if nan_count == 0:
        #complete cars
        # the last stint, to the end
        pitstops.append(maxlap)
    
    lap2nextpit = np.zeros_like(lap_status)
    lap2nextpit[:] = np.nan
    
    #guard
    if len(pitstops)==0:
        return lap2nextpit
    
    idx = 0
    for lap in range(len(lap_status)):
        if lap < pitstops[idx]:
            lap2nextpit[lap] = pitstops[idx] - lap
        else:
            idx += 1
            if idx < len(pitstops):
                lap2nextpit[lap] = pitstops[idx] - lap
            else:
                break
            
    return lap2nextpit

def get_lapdata(acldata):
    """
    input:
        acldata['car_number','completed_laps','time_diff','rank','track_status', 'lap_status','elapsed_time']
    
        timediff: [car_number, completed_laps] -> elapsed time diff to leader
    output:
        lapdata = acldata[['car_number','completed_laps',
                           'time_diff','rank','track_status', 'lap_status','time_behind']].to_numpy()
    """
    COL_COMPLETED_LAPS = 1
    COL_ELAPSED_TIME = 6
    
    maxlap = np.max(acldata['completed_laps'].values)
    #'car_number','completed_laps','time_diff','rank','track_status', 'lap_status','time_behind'
    time_behind = []
    
    for lap in range(1, maxlap+1):
        this_lap = acldata[acldata['completed_laps']==lap][
            ['car_number','completed_laps','time_diff','rank',
             'track_status', 'lap_status','elapsed_time']].values
        
        min_elapsed_time = np.nanmin(this_lap[:,COL_ELAPSED_TIME].astype(np.float))
        #print(f'lap:{lap}, min_elapsed_time:{min_elapsed_time}')
        
        for row in this_lap:
            car_number = int(row[0])
            time_diff = row[2]
            rank = row[3]
            track_status = row[4]
            lap_status = row[5]
            
            timebehind = float(row[COL_ELAPSED_TIME]) - min_elapsed_time
            #
            time_behind.append([car_number, lap, time_diff,rank,track_status, lap_status,
                                timebehind, float(row[COL_ELAPSED_TIME])])
    
    #return
    lapdata = np.array(time_behind)
    return lapdata



# features: laptime, rank, track_status, lap_status, timediff
LAPTIME = 0
RANK = 1
TRACK_STATUS = 2
LAP_STATUS = 3
TIME_BEHIND = 4
CAUTION_LAPS_INSTINT = 5 
LAPS_INSTINT = 6
ELAPSED_TIME = 7
LAP2NEXTPIT = 8

_featureCnt = 9
        
def get_laptime_dataset(stagedata, inlap_status = 0):
    """
    #add caution_laps_instint, laps_instint
    
    input: (alldata, rankdata, acldata, flagdata)
    output: laptime & rank data
    
    [(
    eventid,
    carids : rowid -> carno,
    datalist: #car_number x features x #totallaps (padded by Nan)
        entry: [[laptime, rank, track_status, lap_status,
                caution_laps_instint, laps_instint]]
    )]
    """
    laptime_data = []
    for event in stagedata.keys():
        
        print(f'start event: {event}')
        
        laptime_rec = []
        eventid = events_id[event]
        
        alldata, rankdata, acldata, flagdata = stagedata[event]
        carlist = set(acldata['car_number'])
        laplist = set(acldata['completed_laps'])
        totalcars = len(carlist)
        totallaps = len(laplist)
        


        #carnumber -> carid
        carids={key:idx for idx, key in enumerate(carlist)}
        decode_carids={idx:key for idx, key in enumerate(carlist)}

        #init
        lap_instint = {carids[x]:0 for x in carlist}
        caution_instint = {carids[x]:0 for x in carlist}        
        
        #array: car_number x lap
        #laptime = np.zeros((totalcars, totallaps-1))
        #rank = np.zeros((totalcars, totallaps-1))
        laptime = np.empty((totalcars, totallaps-1))
        rank = np.empty((totalcars, totallaps-1))
        laptime[:] = np.NaN
        rank[:] = np.NaN
        

        datalist = np.empty((totalcars, _featureCnt, totallaps-1))
        datalist[:] = np.NaN
        
        #lapdata = acldata[['car_number','completed_laps',
        #                   'time_diff','rank','track_status', 'lap_status','elapsed_time']].to_numpy()
        
        #'car_number','completed_laps','time_diff','rank','track_status', 'lap_status','time_behind'
        lapdata = get_lapdata(acldata)
        
        
        for row in lapdata:
            #completed_laps
            if int(row[1]) == 0:
                continue
                
            #add to data array
            car_number = carids[int(row[0])]
            completed_laps = int(row[1])-1
            time_diff = float(row[2])
            rank = int(row[3])
            track_status = 1 if row[4]=='Y' else 0
            lap_status = 1 if row[5]=='P' else 0
            time_behind = float(row[6])
            
            datalist[car_number, LAPTIME, completed_laps] = time_diff
            datalist[car_number, RANK, completed_laps] = rank
            datalist[car_number, TRACK_STATUS, completed_laps] = track_status
            datalist[car_number, LAP_STATUS, completed_laps] = lap_status
            datalist[car_number, TIME_BEHIND, completed_laps] = time_behind

            datalist[car_number, ELAPSED_TIME, completed_laps] = float(row[7])

            
            #stint status
            if track_status == 1:
                caution_instint[car_number] += 1
            lap_instint[car_number] += 1
            if lap_status == 1:
                #new stint
                lap_instint[car_number] = 0
                caution_instint[car_number] = 0
                
                # add inlap feature into lap_Status
                # set the previous lap to inlap status
                
                # what does it mean?
                
                if (inlap_status!=0):
                    if inlap_status == 1:
                        # set the previous lap of 'P'
                        if completed_laps > 0:
                            #datalist[car_number, LAP_STATUS, completed_laps-1] = INLAP_STATUS
                            datalist[car_number, LAP_STATUS, completed_laps-1] = 1
                    else:
                        # set the next lap of 'P'
                        if completed_laps +1 < totallaps:
                            #datalist[car_number, LAP_STATUS, completed_laps-1] = INLAP_STATUS
                            datalist[car_number, LAP_STATUS, completed_laps + 1] = 1
                
            
            datalist[car_number, LAPS_INSTINT, completed_laps] = lap_instint[car_number]
            datalist[car_number, CAUTION_LAPS_INSTINT, completed_laps] = caution_instint[car_number]
                

                
        #update lap2nextpit in datalist
        for caridx in range(datalist.shape[0]):
            lap_status = datalist[caridx, LAP_STATUS, :]
            #pit status
            lap2nextpit = get_lap2nextpit(lap_status)
            datalist[caridx, LAP2NEXTPIT, :] = lap2nextpit        
                
        #add one record
        laptime_data.append([eventid, decode_carids, datalist])
        # push this event into stage dataframe
        print('event=%s, records=%s'%(event, datalist.shape))
        
    
    return laptime_data

In [3]:
def test_flag(a, bitflag):
    return (a & bitflag) ==  bitflag

#
# remove NaN at the tail
# there should be no nans in the middle of the ts
COL_LAPTIME=0
COL_RANK=1
COL_TRACKSTATUS=2
COL_LAPSTATUS=3
COL_TIMEDIFF=4
COL_CAUTION_LAPS_INSTINT=5
COL_LAPS_INSTINT= 6
COL_ELAPSED_TIME= 7

FEATURE_STATUS = 2
FEATURE_PITAGE = 4

MODE_ORACLE = 0
MODE_NOLAP = 1
MODE_NOTRACK = 2
MODE_TESTZERO = 4
MODE_TESTCURTRACK = 8
#MODE_STR={MODE_ORACLE:'oracle', MODE_NOLAP:'nolap',MODE_NOTRACK:'notrack',MODE_TEST:'test'}

#_feature_mode = FEATURE_STATUS

def make_dataset_byevent(runs, prediction_length, freq, 
                       useeid = False,
                       run_ts=COL_LAPTIME, 
                       test_event = 'Indy500-2018',
                       use_global_dict = True,
                       oracle_mode = MODE_ORACLE,
                       half_moving_win = True,
                       train_ratio=0.8,
                       log_transform = False,
                       context_ratio = 0.,
                       dorerank = True
                ):
    """
    split the ts to train and test part by the ratio
    
    oracle_mode: false to simulate prediction in real by 
        set the covariates of track and lap status as nan in the testset
            
    
    """    
    #global setting
    feature_mode = _feature_mode
    
    
    start = pd.Timestamp("01-01-2019", freq=freq)  # can be different for each time series

    train_set = []
    test_set = []
    
    #select run
    if runs>=0:
        _laptime_data = [laptime_data[runs].copy()]
    else:
        _laptime_data = laptime_data.copy()
    
    totalTSCnt = 0
    totalTSLen = 0
    test_eventid = events_id[test_event]
    
    #_data: eventid, carids, datalist[carnumbers, features, lapnumber]->[laptime, rank, track, lap]]
    for _data in _laptime_data:
        _train = []
        _test = []
        
        #skip eid > test_eventid
        if _data[0] > test_eventid:
            print('skip this event:', events[_data[0]])
            break
        
        if events[_data[0]] == test_event:
            test_mode = True
        
        else:
            test_mode = False
            
        
        
        #statistics on the ts length
        ts_len = [ _entry.shape[1] for _entry in _data[2]]
        train_len = int(np.max(ts_len) * train_ratio)
        if train_len == 0:
            #use global train_len
            train_len = _train_len
        
        if context_ratio != 0.:
            # add this part to train set
            context_len = int(np.max(ts_len) * context_ratio)
        else:    
            context_len = prediction_length*2
        if context_len < 10:
            context_len = 10
        
        print(f'====event:{events[_data[0]]}, prediction_len={prediction_length},train_len={train_len}, max_len={np.max(ts_len)}, min_len={np.min(ts_len)},context_len={context_len}')

        #rerank due to short ts removed
        if run_ts == COL_RANK and dorerank == True:
            sel_rows = []
            for rowid in range(_data[2].shape[0]):
                # rec[features, lapnumber] -> [laptime, rank, track_status, lap_status,timediff]]
                rec = _data[2][rowid].copy()
                #remove nan(only tails)
                nans, x= nan_helper(rec[run_ts,:])
                nan_count = np.sum(nans)             
                rec = rec[:, ~np.isnan(rec[run_ts,:])]
                
                totallen = rec.shape[1]
                if ( totallen < train_len + prediction_length):
                    print(f'rerank a short ts: carid={_data[1][rowid]}，len={totallen}')
                    continue 
                else:
                    sel_rows.append(rowid)
                    
            #get selected matrix
            sel_idx = np.array(sel_rows)
            selmat = _data[2][sel_idx]
            
            mask = np.isnan(selmat[:,COL_RANK,:])
            
            idx = np.argsort(selmat[:,COL_RANK,:], axis=0)
            true_rank = np.argsort(idx, axis=0).astype(np.float)
            true_rank[mask] = np.nan
            
            #set it back
            #if _data[0]==0:
            #    print('raw:')
            #    print(_data[2][:,COL_RANK,0])
            #    print('true_rank:')
            #    print(true_rank[:,0])
            #_data[2][sel_idx][:,COL_RANK,:] = true_rank       
            _data[2][sel_idx,COL_RANK,:] = true_rank       
            #if _data[0]==0:
            #    _view = _data[2][sel_idx]
            #    _view[:,COL_RANK,:] = true_rank
            #    print('view:')
            #    print(_data[2][:,COL_RANK,0])
            #    print(_view[:,COL_RANK,0])
            #    print('rerank:')
            #    print(_data[2][sel_idx][:,COL_RANK,0])
        
        
        # process for each ts
        for rowid in range(_data[2].shape[0]):
            # rec[features, lapnumber] -> [laptime, rank, track_status, lap_status,timediff]]
            rec = _data[2][rowid].copy()
            
            #remove nan(only tails)
            nans, x= nan_helper(rec[run_ts,:])
            nan_count = np.sum(nans)             
            rec = rec[:, ~np.isnan(rec[run_ts,:])]
            
            # remove short ts
            totallen = rec.shape[1]
            
            totalTSCnt += 1
            totalTSLen += totallen
            
            if ( totallen < train_len + prediction_length):
                print(f'a short ts: carid={_data[1][rowid]}，len={totallen}')
                continue                
            
            if use_global_dict:
                carno = _data[1][rowid]
                carid = global_carids[_data[1][rowid]]
            else:
                #simulation dataset, todo, fix the carids as decoder
                carno = rowid
                carid = rowid
                
            
            if useeid:
                static_cat = [carid, _data[0]]    
            else:
                static_cat = [carid]    
                
            #first, get target a copy    
            # target can be COL_XXSTATUS
            target_val = rec[run_ts,:].copy().astype(np.float32)
            if log_transform:
                target_val = np.log(target_val + 1.0)
            
            # selection of features
            if test_flag(oracle_mode, MODE_NOTRACK):                
                rec[COL_TRACKSTATUS, :] = 0
            if test_flag(oracle_mode, MODE_NOLAP):                
                rec[COL_LAPSTATUS, :] = 0

            test_rec_cnt = 0
            if not test_mode:
                if feature_mode == FEATURE_PITAGE:  
                    # all go to train set
                    _train.append({'target': target_val, 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [rec[COL_TRACKSTATUS,:],
                                       rec[COL_LAPSTATUS,:],
                                       rec[COL_LAPS_INSTINT,:]]
                              }
                              )
                else:
                    # all go to train set
                    _train.append({'target': target_val, 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [rec[COL_TRACKSTATUS,:],
                                       rec[COL_LAPSTATUS,:]]
                              }
                              )
                    
            else:
                # reset train_len
                if context_ratio != 0.:
                    # all go to train set
                    #add [0, context_len] to train set 
                    if feature_mode == FEATURE_PITAGE:  
                        _train.append({'target': target_val[:context_len], 
                                    'start': start, 
                                    'feat_static_cat': static_cat,
                                    'feat_dynamic_real': [rec[COL_TRACKSTATUS,:context_len],
                                           rec[COL_LAPSTATUS,:context_len],
                                           rec[COL_LAPS_INSTINT,:context_len]               
                                                         ]
                                  }
                                  )                    
                    else:
                        _train.append({'target': target_val[:context_len], 
                                    'start': start, 
                                    'feat_static_cat': static_cat,
                                    'feat_dynamic_real': [rec[COL_TRACKSTATUS,:context_len],
                                           rec[COL_LAPSTATUS,:context_len]               
                                                         ]
                                  }
                                  )                    
                
                # testset
                # multiple test ts(rolling window as half of the prediction_length)

                step = -int(prediction_length/2) if half_moving_win else -prediction_length
                for endpos in range(totallen, context_len+prediction_length, 
                                    step):

                    track_rec = rec[COL_TRACKSTATUS, :endpos].copy()
                    lap_rec = rec[COL_LAPSTATUS, :endpos].copy()
                    pitage_rec = rec[COL_LAPS_INSTINT, :endpos].copy()

                    # test mode
                    if test_flag(oracle_mode, MODE_TESTCURTRACK):
                        # since nan does not work, use cur-val instead
                        track_rec[-prediction_length:] = track_rec[-prediction_length - 1]
                        #track_rec[-prediction_length:] = random.randint(0,1)
                        #lap_rec[-prediction_length:] = lap_rec[-prediction_length - 1]
                        lap_rec[-prediction_length:] = 0

                        #for pitage, just assume there is no pit
                        start_pitage = pitage_rec[-prediction_length - 1]
                        pitage_rec[-prediction_length:] = np.array([x+start_pitage+1 for x in range(prediction_length)])

                    elif test_flag(oracle_mode, MODE_TESTZERO):
                        #set prediction part as nan
                        #track_rec[-prediction_length:] = np.nan
                        #lap_rec[-prediction_length:] = np.nan
                        track_rec[-prediction_length:] = 0
                        lap_rec[-prediction_length:] = 0                    

                        #for pitage, just assume there is no pit
                        start_pitage = pitage_rec[-prediction_length - 1]
                        pitage_rec[-prediction_length:] = np.array([x+start_pitage+1 for x in range(prediction_length)])

                    if feature_mode == FEATURE_PITAGE:                          
                        _test.append({'target': rec[run_ts,:endpos].astype(np.float32), 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [track_rec,lap_rec,pitage_rec]
                                #'feat_dynamic_real': [rec[COL_TRACKSTATUS,:endpos],
                                #       rec[COL_LAPSTATUS,:endpos]] 
                                 }
                              )                     
                    else:
                        _test.append({'target': rec[run_ts,:endpos].astype(np.float32), 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [track_rec,lap_rec]
                                #'feat_dynamic_real': [rec[COL_TRACKSTATUS,:endpos],
                                #       rec[COL_LAPSTATUS,:endpos]] 
                                 }
                              )                     
                        
   
                    test_rec_cnt += 1
            
            #add one ts
            print(f'carno:{carno}, totallen:{totallen}, nancount:{nan_count}, test_reccnt:{test_rec_cnt}')

        train_set.extend(_train)
        test_set.extend(_test)

    print(f'train len:{len(train_set)}, test len:{len(test_set)}, totsl TsCnt:{totalTSCnt}, total ts len:{totalTSLen}')
    
    train_ds = ListDataset(train_set, freq=freq)
    test_ds = ListDataset(test_set, freq=freq)    
    
    return train_ds, test_ds, train_set, test_set

def makedbs():
    useeid = False
    interpolate = False
    #ipstr = '-ip' if interpolate else '-noip'
    ipstr = '%s-%s'%('ip' if interpolate else 'noip', 'eid' if useeid else 'noeid')
    if useeid:
        cardinality = [len(global_carids), len(laptime_data)]
    else:
        cardinality = [len(global_carids)]

    train_ds, test_ds,_,_ = make_dataset_byevent(-1, prediction_length,freq,
                                         useeid=useeid, run_ts=_run_ts,
                                        test_event=_test_event, log_transform =False,
                                        context_ratio=0, train_ratio = 0, dorerank =True)
    
    dbname = f'{_task_id}-oracle-{ipstr}-all-all-f{freq}-t{prediction_length}-r{_test_event}-gluonts-indy-2018.pickle'
    #save_dataset(dbname, freq, prediction_length, cardinality,train_ds, test_ds)  
    with open(dbname, 'wb') as f:
        #pack [global_carids, laptime_data]
        savedata = [freq, prediction_length, cardinality, train_ds, test_ds]
        #savedata = [freq, train_set, test_set]
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)
    
    
    return dbname, train_ds, test_ds
            


In [4]:
def init_estimator(model, gpuid, epochs=100, batch_size = 32, 
        target_dim = 3, distr_output = None, use_feat_static = True):
    
    if int(gpuid) < 0:
        ctx = "cpu"
    else:
        ctx = "gpu(%s)"%gpuid

    if model == 'deepAR':
        estimator = DeepAREstimator(
            prediction_length=prediction_length,
            context_length= context_length,
            use_feat_static_cat=True,
            cardinality=cardinality,
            distr_output = distr_output,
            freq=freq,
            trainer=Trainer(ctx=ctx, 
                            batch_size = batch_size,
                            epochs=epochs, 
                            learning_rate=1e-3, 
                            num_batches_per_epoch=100
                           )
        )
    elif model == 'deepARW':
        estimator = DeepARWEstimator(
            prediction_length=prediction_length,
            context_length= context_length,
            use_feat_static_cat=True,
            cardinality=cardinality,
            distr_output = distr_output,
            freq=freq,
            trainer=Trainer(ctx=ctx, 
                            batch_size = batch_size,
                            epochs=epochs, 
                            learning_rate=1e-3, 
                            num_batches_per_epoch=100
                           )
        )
        
    elif model == 'deepAR-Oracle':

        if use_feat_static:
            estimator = DeepAREstimator(
                prediction_length=prediction_length,
                context_length= context_length,
                use_feat_static_cat=use_feat_static,
                cardinality=cardinality,
                use_feat_dynamic_real=True,
                distr_output = distr_output,
                freq=freq,
                trainer=Trainer(ctx=ctx, 
                                batch_size = batch_size,
                                epochs=epochs, 
                                learning_rate=1e-3, 
                                num_batches_per_epoch=100
                               )
                )
        else:
            estimator = DeepAREstimator(
                prediction_length=prediction_length,
                context_length= context_length,
                use_feat_static_cat=use_feat_static,
                #cardinality=cardinality,
                use_feat_dynamic_real=True,
                distr_output = distr_output,
                freq=freq,
                trainer=Trainer(ctx=ctx, 
                                batch_size = batch_size,
                                epochs=epochs, 
                                learning_rate=1e-3, 
                                num_batches_per_epoch=100
                               )
                )
    elif model == 'deepARW-Oracle':

        if use_feat_static:
            estimator = DeepARWeightEstimator(
                prediction_length=prediction_length,
                context_length= context_length,
                use_feat_static_cat=use_feat_static,
                cardinality=cardinality,
                use_feat_dynamic_real=True,
                distr_output = distr_output,
                freq=freq,
                trainer=Trainer(ctx=ctx, 
                                batch_size = batch_size,
                                epochs=epochs, 
                                learning_rate=1e-3, 
                                #hybridize=False,
                                num_batches_per_epoch=100
                               )
                )
        else:
            estimator = DeepARWeightEstimator(
                prediction_length=prediction_length,
                context_length= context_length,
                use_feat_static_cat=use_feat_static,
                #cardinality=cardinality,
                use_feat_dynamic_real=True,
                distr_output = distr_output,
                freq=freq,
                trainer=Trainer(ctx=ctx, 
                                batch_size = batch_size,
                                epochs=epochs, 
                                learning_rate=1e-3, 
                                #hybridize=False,
                                num_batches_per_epoch=100
                               )
                )
            
    elif model == 'deepAR-nocarid':
        estimator = DeepAREstimator(
            prediction_length=prediction_length,
            context_length= context_length,
            use_feat_static_cat=use_feat_static,
            cardinality=cardinality,
            use_feat_dynamic_real=True,
            distr_output = distr_output,
            freq=freq,
            trainer=Trainer(ctx=ctx, 
                            batch_size = batch_size,
                            epochs=epochs, 
                            learning_rate=1e-3, 
                            num_batches_per_epoch=100
                           )
        )
    elif model == 'deepAR-multi':
        estimator = DeepAREstimator(
            prediction_length=prediction_length,
            context_length= context_length,
            use_feat_static_cat=use_feat_static,
            #cardinality=cardinality,
            use_feat_dynamic_real=True,
            freq=freq,
            trainer=Trainer(ctx=ctx, 
                            batch_size = batch_size,
                            epochs=epochs, 
                            learning_rate=1e-3, 
                            num_batches_per_epoch=100
                           ),
            distr_output=MultivariateGaussianOutput(dim=target_dim),
        )


    elif model == 'simpleFF':
        estimator = SimpleFeedForwardEstimator(
            num_hidden_dimensions=[10],
            prediction_length=prediction_length,
            context_length= context_length,
            freq=freq,
            trainer=Trainer(ctx=ctx, 
                            batch_size = batch_size,
                            epochs=epochs,
                            learning_rate=1e-3,
                            hybridize=False,
                            num_batches_per_epoch=100
                           )
        )
    elif model == 'deepFactor':
        estimator = DeepFactorEstimator(
            prediction_length=prediction_length,
            context_length= context_length,
            freq=freq,
            trainer=Trainer(ctx=ctx, 
                            batch_size = batch_size,
                            epochs=epochs, 
                            learning_rate=1e-3, 
                            num_batches_per_epoch=100
                           )
        )
    elif model == 'deepState':
        estimator = DeepStateEstimator(
            prediction_length=prediction_length,
            use_feat_static_cat=True,
            cardinality=cardinality,
            freq=freq,
            trainer=Trainer(ctx=ctx, 
                            batch_size = batch_size,
                            epochs=epochs, 
                            learning_rate=1e-3, 
                            num_batches_per_epoch=100
                           )
        )
    elif model == 'ets':
        estimator = RForecastPredictor(method_name='ets',freq= freq, prediction_length = prediction_length)
    elif model == 'prophet':

        estimator = ProphetPredictor(freq= freq, prediction_length = prediction_length)
    elif model == 'arima':
        estimator = RForecastPredictor(method_name='arima',freq= freq, prediction_length = prediction_length, trunc_length = 200)
    elif model == 'naive':
        estimator = NaivePredictor(freq= freq, prediction_length = prediction_length)
    else:
        logger.error('model %s not support yet, quit', model)
        sys.exit(-1)


    return estimator


In [5]:
def simulation(datasetid, testevent, taskid, runts, expid, predictionlen, 
               datamode, loopcnt, featuremode = stint.FEATURE_STATUS,
              pitmodel = 0, model = 'oracle', inlapmode=0):
    #
    # configurataion
    #
    # model path:  <_dataset_id>/<_task_id>-<trainid>/
    #_dataset_id = 'indy2013-2018-nocarid'
    
    stint._inlap_status = inlapmode
    
    stint.init(pitmodel)
    stint._dataset_id = datasetid
    stint._test_event = testevent
    #_test_event = 'Indy500-2019'

    stint._feature_mode = featuremode
    stint._context_ratio = 0.

    stint._task_id = taskid  # rank,laptime, the trained model's task
    stint._run_ts = runts   #COL_LAPTIME,COL_RANK
    stint._exp_id=expid  #rank, laptime, laptim2rank, timediff2rank... 

    stint._use_mean = True
    
    stint._train_len = 40
    predictor = stint.load_model(predictionlen, model,trainid='indy500',epochs = epochs, exproot='./')

    ret2 = {}
    for i in range(loopcnt):
        #df, full_samples, full_tss
        ret2[i] = stint.run_simulation_shortterm(predictor, predictionlen, stint.freq, datamode=datamode)

    acc = []
    for i in ret2.keys():
        df = ret2[i][0]
        _x = stint.get_evalret_shortterm(df)
        acc.append(_x)

    b = np.array(acc)
    print(np.mean(b, axis=0))
    
    #save keys
    #stint._pitmodel.save_keys('pitmodel-keys.pickle')
    
    return b, ret2

def get_alldf(dfx, year=2018):
    

    #dfx = ret[f'{model}-RANK-{year}-inlap-nopitage']
    #dfx = ret[f'{model}-TIMEDIFF-{year}-noinlap-nopitage']
    
    samples = dfx.keys()
    retdfs = []
    for id in samples:
        df = dfx[id][0]
        retdfs.append(df)
        
    if len(retdfs) > 1:
        dfout = pd.concat(retdfs)
    else:
        dfout = retdfs[0]
        
    return dfout
    
def get_alldf_mode(dfx, year=2018,mode=0):
    """
    mode: 
        0; mode
        1; mean
        2; median
    """
    dfall = get_alldf(dfx, year=year)
    
    cars = set(dfall.carno.values)
    startlaps = {}
    for car in cars:
        startlaps[car] = set(dfall[dfall['carno']==car].startlap.values)
        
    retdf = []
    for car in cars:
        for startlap in startlaps[car]:
            dfrec = dfall[(dfall['carno']==car) & (dfall['startlap']==startlap)]
            
            #get mode
            if mode == 0:
                pred_endrank = stats.mode(dfrec.pred_endrank.values).mode[0]
                #pred_endlap =  stats.mode(dfrec.pred_endlap.values).mode[0]
            elif mode == 1:
                #use mean
                pred_endrank = np.mean(dfrec.pred_endrank.values)
                #pred_endlap =  np.mean(dfrec.pred_endlap.values)
            elif mode == 2:
                #use mean
                pred_endrank = np.median(dfrec.pred_endrank.values)
                #pred_endlap =  np.median(dfrec.pred_endlap.values)
            
            firstrec = dfrec.to_numpy()[0,:]
            firstrec[6] = pred_endrank
            firstrec[7] = pred_endrank - firstrec[2]
            if firstrec[7] == 0:
                firstrec[8] = 0
            elif firstrec[7] > 0:
                firstrec[8] = 1
            else:
                firstrec[8] = -1
                
            #endlap, pred_endlap
            
        
            retdf.append(firstrec)
        
    #dfout = pd.concat(retdf)
    dfout = pd.DataFrame(retdf, columns =['carno', 'startlap', 'startrank',    
                                         'endrank', 'diff', 'sign',
                                         'pred_endrank', 'pred_diff', 'pred_sign',
                                         #'endlap','pred_endlap'
                                        ])
    print('df size:', len(dfout))
    return dfout

def get_allsamples(dfx, year=2018):
    
    runs = list(dfx.keys())
    runcnt = len(runs)
    
    full_samples = {}
    full_tss = dfx[runs[0]][2]
    carlist = list(full_tss.keys())
    samplecnt, lapcnt = dfx[runs[0]][1][carlist[0]].shape
    
    print('sacmplecnt:', samplecnt, 'lapcnt:',lapcnt,'runcnt:', runcnt)
    
    #empty samples
    for carid, carno in enumerate(carlist):
        full_samples[carno] = np.zeros((runcnt, lapcnt))
    
    for runid in runs:
        #one run
        tss = dfx[runid][2]
        forecast = dfx[runid][1]
        
        for carid, carno in enumerate(carlist):
            #get mean for this run
            forecast_mean = np.nanmean(forecast[carno], axis=0)
            full_samples[carno][runid, :] = forecast_mean
            
            #if carno==3 and runid == 0:
            #    print('forecast:',forecast_mean)
            
    return full_samples, full_tss

#straight implementation of prisk
def quantile_loss(target, quantile_forecast, q):
    return 2.0 * np.nansum(
        np.abs(
            (quantile_forecast - target)
            * ((target <= quantile_forecast) - q)
        )
    )

def abs_target_sum(target): 
    return np.nansum(np.abs(target)) 

def prisk(full_samples, full_tss, verbose = False):
    carlist = full_tss.keys()
    tss = []
    forecasts = []
    forecasts_mean = []
    freq = '1min'
    start = pd.Timestamp("01-01-2019", freq=freq) 

    for car in carlist:
        testcar = car
        fc = SampleForecast(samples = full_samples[testcar][:, 12:], freq=freq, start_date=start + 12)

        samples = np.mean(full_samples[testcar][:, 12:], axis =0, keepdims=True)
        fc_mean = SampleForecast(samples = samples, freq=freq, start_date=start + 12)

        index = pd.date_range(start='2019-01-01 00:00:00', freq = 'T', periods = len(full_tss[testcar]))
        ts = pd.DataFrame(index = index, data = full_tss[testcar])    

        tss.append(ts)
        forecasts.append(fc)
        forecasts_mean.append(fc_mean)

    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) 
    agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(tss))
    if verbose:
        print(json.dumps(agg_metrics, indent=4))  
    
    print(agg_metrics["wQuantileLoss[0.1]"], agg_metrics["wQuantileLoss[0.5]"],agg_metrics["wQuantileLoss[0.9]"])
    
    return agg_metrics


def prisk_direct_bysamples2(full_samples, full_tss, quantiles=[0.1,0.5,0.9], startid = 12, verbose=False):
    """
    target: endrank
    forecast: pred_endrank
    item_id: <carno, startlap>
    """
    
    carlist = full_tss.keys()
    
    prisk = np.zeros((len(carlist), len(quantiles)))
    target_sum = np.zeros((len(carlist)))
    aggrisk = np.zeros((len(quantiles)))
    
    for carid, carno in enumerate(carlist):

        # for this car
        forecast = full_samples[carno]
        target = full_tss[carno]
        
        #calc quantiles
        # len(quantiles) x 1
        quantile_forecasts = np.quantile(forecast, quantiles, axis=0)
        
        for idx, q in enumerate(quantiles):
            q_forecast = quantile_forecasts[idx]
            prisk[carid, idx] = quantile_loss(target[startid:], q_forecast[startid:], q)
            target_sum[carid] = abs_target_sum(target[startid:])
            
        if verbose==True and carno==3:
            print('target:', target[startid:])
            print('forecast:', q_forecast[startid:])
            print('target_sum:', target_sum[carid])
            
            print('quantile_forecasts:', quantile_forecasts[:,startid:])
        
    #agg
    #aggrisk = np.mean(prisk, axis=0)
    prisk_sum = np.nansum(prisk, axis=0)
    if verbose==True:
        print('prisk:',prisk)
        print('prisk_sum:',prisk_sum)
        print('target_sum:',target_sum)
    for idx, q in enumerate(quantiles):
        aggrisk[idx] = np.divide(prisk_sum[idx], np.sum(target_sum))
    
    agg_metrics = {}
    for idx, q in enumerate(quantiles):
        agg_metrics[f'wQuantileLoss[{q}]'] = aggrisk[idx]
        
    print(agg_metrics.values())
    
    return agg_metrics, aggrisk

In [6]:
def prisk_direct_bysamples(full_samples, full_tss, quantiles=[0.1,0.5,0.9], startid = 12, verbose=False):
    """
    calculate prisk by <samples, tss> directly (equal to gluonts implementation)
    
    target: endrank
    forecast: pred_endrank
    item_id: <carno, startlap>
    """
    
    carlist = full_tss.keys()
    
    prisk = np.zeros((len(carlist), len(quantiles)))
    target_sum = np.zeros((len(carlist)))
    aggrisk = np.zeros((len(quantiles)))
    
    for carid, carno in enumerate(carlist):

        # for this car
        forecast = full_samples[carno]
        target = full_tss[carno]
        
        #calc quantiles
        # len(quantiles) x 1
        quantile_forecasts = np.quantile(forecast, quantiles, axis=0)
        
        for idx, q in enumerate(quantiles):
            q_forecast = quantile_forecasts[idx]
            prisk[carid, idx] = quantile_loss(target[startid:], q_forecast[startid:], q)
            target_sum[carid] = abs_target_sum(target[startid:])
            
        if verbose==True and carno==3:
            print('target:', target[startid:])
            print('forecast:', q_forecast[startid:])
            print('target_sum:', target_sum[carid])
            
            print('quantile_forecasts:', quantile_forecasts[:,startid:])
        
    #agg
    #aggrisk = np.mean(prisk, axis=0)
    prisk_sum = np.nansum(prisk, axis=0)
    if verbose==True:
        print('prisk:',prisk)
        print('prisk_sum:',prisk_sum)
        print('target_sum:',target_sum)
    for idx, q in enumerate(quantiles):
        aggrisk[idx] = np.divide(prisk_sum[idx], np.sum(target_sum))
    
    agg_metrics = {}
    for idx, q in enumerate(quantiles):
        agg_metrics[f'wQuantileLoss[{q}]'] = aggrisk[idx]
        
    print(agg_metrics.values())
    
    return agg_metrics, aggrisk

def clear_samples(full_samples, full_tss, clearidx):
    """
    clear the laps in clearidx
    """
    import copy
    ret_samples = copy.deepcopy(full_samples)
    ret_tss = copy.deepcopy(full_tss)
    
    
    carlist = full_tss.keys()
    
    for carid, carno in enumerate(carlist):
        forecast = ret_samples[carno]
        target = ret_tss[carno]
        
        forecast[:, clearidx] = np.nan
        target[clearidx] = np.nan
        
        ret_samples[carno] = forecast
        ret_tss[carno] = target
        
    return ret_samples, ret_tss

def do_rerank(dfout, short=True):
    """
    carno','startlap','startrank','endrank','diff','sign','pred_endrank','pred_diff','pred_sign','endlap','pred_endlap
    
    output of prediction of target can be float
    
    resort the endrank globally
    
    """
    
    cols=['carno','startlap','startrank','endrank','diff','sign','pred_endrank','pred_diff','pred_sign','endlap','pred_endlap']
    colid={x:id for id,x in enumerate(cols)}
    
    #df = dfout.sort_values(by=['startlap','carno'])
    print('rerank...')
    laps = set(dfout.startlap.values)
    
    dfs = []
    for lap in laps:
        df = dfout[dfout['startlap']==lap].to_numpy()
        
        #print('in',df)
        
        idx = np.argsort(df[:,colid['pred_endrank']], axis=0)
        true_rank = np.argsort(idx, axis=0)
    
        df[:,colid['pred_endrank']] = true_rank
        
        #reset preds 
        df[:,colid['pred_diff']] = df[:,colid['pred_endrank']] - df[:,colid['endrank']]

        for rec in df:
            if rec[colid['pred_diff']] == 0:
                rec[colid['pred_sign']] = 0
            elif rec[colid['pred_diff']] > 0:
                rec[colid['pred_sign']] = 1
            else:
                rec[colid['pred_sign']] = -1        
        
        #print('out',df)
        if len(dfs) == 0:
            dfs = df
        else:
            dfs = np.vstack((dfs, df))
        #dfs.append(df)
        #np.vstack(df)
        
    #dfret = pd.concat(dfs)
    #data = np.array(dfs)
    if short:
        dfret = pd.DataFrame(dfs.astype(int), columns = cols[:-2])
    else:
        dfret = pd.DataFrame(dfs.astype(int), columns = cols)
    return dfret



## run

In [7]:

#
# global variables
#
stagedata = {}
global_carids = {}

#
# global settings
#
#_savedata = False
_savedata = True

#inlap status = 
# 0 , no inlap
# 1 , set previous lap
# 2 , set the next lap
_inlap_status = 0

#
# featuremode in [FEATURE_STATUS, FEATURE_PITAGE]:
#
_feature_mode = FEATURE_STATUS
_featureCnt = 9

#
# training parameters
#
freq = "1min"
context_ratio = 0.
_train_len = 40
prediction_length = 2

context_length =  40
dataset='rank'

#
#
#
epochs = 1000
#epochs = 10
contextlen = 40
gpuid = 5
#'deepAR-Oracle','deepARW-Oracle'
trainmodel = 'deepARW-Oracle'

#
#
#
distroutput = 'student'
batch_size = 32
use_feat_static = False
distr_outputs ={'student':StudentTOutput(),
                'negbin':NegativeBinomialOutput()
                }
distr_output = distr_outputs[distroutput]
#
# test
#
_test_event = 'Indy500-2018'

years = ['2013','2014','2015','2016','2017','2018','2019']
events = [f'Indy500-{x}' for x in years]
events_id={key:idx for idx, key in enumerate(events)}
dbid = f'Indy500_{years[0]}_{years[-1]}_v{_featureCnt}_p{_inlap_status}'

inlapstr = {0:'noinlap',1:'inlap',2:'outlap'}
featurestr = {FEATURE_STATUS:'nopitage',FEATURE_PITAGE:'pitage'}

#
#
#
experimentid = 'weighted-noinlap'

#
#
#
outputRoot = f"QuickTestOutput/{experimentid}/"
os.makedirs(outputRoot, exist_ok=True)

### 1. make laptime dataset

In [8]:
cur_carid = 0
for event in events:
    #dataid = f'{event}-{year}'
    #alldata, rankdata, acldata, flagdata
    stagedata[event] = load_data(event)

    alldata, rankdata, acldata, flagdata = stagedata[event]
    carlist = set(acldata['car_number'])
    laplist = set(acldata['completed_laps'])
    print('%s: carno=%d, lapnum=%d'%(event, len(carlist), len(laplist)))

    #build the carid map
    for car in carlist:
        if car not in global_carids:
            global_carids[car] = cur_carid
            cur_carid += 1
            
laptime_data = get_laptime_dataset(stagedata,inlap_status = _inlap_status)

if _savedata:
    import pickle
    #stintdf.to_csv('laptime-%s.csv'%year)
    savefile = outputRoot + f'laptime_rank_timediff_pit-oracle-{dbid}.pickle' 
    print(savefile)
    with open(savefile, 'wb') as f:
        #pack [global_carids, laptime_data]
        savedata = [global_carids, laptime_data]
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)

    savefile = outputRoot + f'stagedata-{dbid}.pickle' 
    print(savefile)
    with open(savefile, 'wb') as f:
        #pack [global_carids, laptime_data]
        savedata = stagedata
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)    

count of completed cars: 19
completed cars: [11 26  1 25 19  3  2 77 83 20 22  8 14  9 18 55 78  5 12]
cars: {1, 2, 3, 5, 8, 9, 11, 12, 77, 14, 78, 18, 19, 20, 83, 22, 55, 25, 26}
#cars= 19
count of completed cars: 19
completed cars: [11 26  1 25 19  3  2 77 83 20 22  8 14  9 18 55 78  5 12]
cars: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 25, 26, 27, 41, 55, 60, 63, 77, 78, 81, 83, 91, 98}
#cars= 33
Indy500-2013: carno=33, lapnum=201
count of completed cars: 20
completed cars: [28  3 25 34  2 26 11 12 22 21 16 77 68  5 17 33 18  8 14 98]
cars: {33, 2, 3, 34, 5, 68, 98, 8, 11, 12, 77, 14, 16, 17, 18, 21, 22, 25, 26, 28}
#cars= 20
count of completed cars: 20
completed cars: [28  3 25 34  2 26 11 12 22 21 16 77 68  5 17 33 18  8 14 98]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


cars: {2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 25, 26, 27, 28, 33, 34, 41, 63, 67, 68, 77, 83, 91, 98}
#cars= 33
Indy500-2014: carno=33, lapnum=201
count of completed cars: 20
completed cars: [ 2  1 83  9 15 27  3  6 21 22 11  5 14 24 28 98 48  7 29 26]
cars: {1, 2, 3, 98, 5, 6, 7, 9, 11, 14, 15, 48, 83, 21, 22, 24, 26, 27, 28, 29}
#cars= 20
count of completed cars: 20
completed cars: [ 2  1 83  9 15 27  3  6 21 22 11  5 14 24 28 98 48  7 29 26]
cars: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 32, 41, 43, 48, 63, 83, 88, 98}
#cars= 33
Indy500-2015: carno=33, lapnum=201
count of completed cars: 17
completed cars: [98 26 21 10 42  6  5  9 11 12  3 77 27 15  8 41 35]
cars: {98, 3, 35, 5, 6, 8, 9, 10, 11, 12, 41, 42, 15, 77, 21, 26, 27}
#cars= 17
count of completed cars: 17
completed cars: [98 26 21 10 42  6  5  9 11 12  3 77 27 15  8 41 35]
cars: {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 2

### 2. make gluonts db

In [9]:
#if _savedata:        
#    with open(outputRoot + f'laptime_rank_timediff_pit-oracle-{dbid}.pickle', 'rb') as f:
#        global_carids, laptime_data = pickle.load(f, encoding='latin1') 
            
_train_len = 40
_dataset_id = '%s-%s'%(inlapstr[_inlap_status], featurestr[_feature_mode])

outdir = outputRoot + _dataset_id
os.makedirs(outdir, exist_ok=True)

if dataset == 'laptime':
    subdir = 'laptime-indy500'
    os.makedirs(f'{outdir}/{subdir}', exist_ok=True)
    _run_ts = COL_LAPTIME
elif dataset == 'timediff':
    subdir = 'timediff-indy500'
    os.makedirs(f'{outdir}/{subdir}', exist_ok=True)
    _run_ts = COL_TIMEDIFF
elif dataset == 'rank':
    subdir = 'rank-indy500'
    os.makedirs(f'{outdir}/{subdir}', exist_ok=True)
    _run_ts = COL_RANK
else:
    print('error, dataset not support: ', dataset)
    
_task_id = f'{outdir}/{subdir}/'
dbname, train_ds, test_ds = makedbs()        
    

====event:Indy500-2013, prediction_len=2,train_len=40, max_len=200, min_len=200,context_len=10
rerank a short ts: carid=4，len=3
rerank a short ts: carid=6，len=34
carno:1, totallen:200, nancount:0, test_reccnt:0
carno:2, totallen:200, nancount:0, test_reccnt:0
carno:3, totallen:200, nancount:0, test_reccnt:0
a short ts: carid=4，len=3
carno:5, totallen:200, nancount:0, test_reccnt:0
a short ts: carid=6，len=34
carno:7, totallen:178, nancount:22, test_reccnt:0
carno:8, totallen:200, nancount:0, test_reccnt:0
carno:9, totallen:200, nancount:0, test_reccnt:0
carno:10, totallen:197, nancount:3, test_reccnt:0
carno:11, totallen:200, nancount:0, test_reccnt:0
carno:12, totallen:200, nancount:0, test_reccnt:0
carno:14, totallen:200, nancount:0, test_reccnt:0
carno:15, totallen:193, nancount:7, test_reccnt:0
carno:16, totallen:199, nancount:1, test_reccnt:0
carno:18, totallen:200, nancount:0, test_reccnt:0
carno:19, totallen:200, nancount:0, test_reccnt:0
carno:20, totallen:200, nancount:0, test_

carno:33, totallen:46, nancount:154, test_reccnt:34
carno:59, totallen:198, nancount:2, test_reccnt:186
carno:60, totallen:200, nancount:0, test_reccnt:188
carno:64, totallen:200, nancount:0, test_reccnt:188
carno:66, totallen:200, nancount:0, test_reccnt:188
carno:88, totallen:200, nancount:0, test_reccnt:188
carno:98, totallen:200, nancount:0, test_reccnt:188
skip this event: Indy500-2019
train len:161, test len:5340, totsl TsCnt:198, total ts len:34777


### 3. train the model

In [10]:
id='oracle'
run=1


#
#
#
runid=f'{trainmodel}-{dataset}-all-indy-f1min-t{prediction_length}-e{epochs}-r{run}_{id}_t{prediction_length}'
db=f'{dataset}-oracle-noip-noeid-all-all-f1min-t{prediction_length}-rIndy500-2018-gluonts-indy-2018.pickle'
outputfile = _task_id + runid
print("runid=", outputfile) 

#if _savedata:
#    #$root/data/indy2013-2018-inlap-nopitage/rank-indy500/$db
#    train_ds, test_ds = load_dataset(inputfile)

#get target dim
entry = next(iter(train_ds))
target_dim = entry['target'].shape
target_dim = target_dim[0] if len(target_dim) > 1 else 1
print('target_dim:%s', target_dim)

#runid = f'-i{outputfile}-e{epochs}-m{model}-p{prediction_length}-c{contextlen}-f{freq}-dim{target_dim}-dstr{distroutput}'
#print("runid=%s", runid)


estimator = init_estimator(trainmodel, gpuid, 
        epochs, batch_size,target_dim, distr_output = distr_output,use_feat_static = use_feat_static)

predictor = estimator.train(train_ds)

if _savedata:
    os.makedirs(outputfile, exist_ok=True)

    print('Start to save the model to %s', outputfile)
    predictor.serialize(Path(outputfile))
    print('End of saving the model.')



INFO:root:Start model training


runid= QuickTestOutput/weighted-noinlap/noinlap-nopitage/rank-indy500/deepARW-Oracle-rank-all-indy-f1min-t2-e1000-r1_oracle_t2
target_dim:%s 1


INFO:root:Epoch[0] Learning rate is 0.001
  0%|          | 0/100 [00:00<?, ?it/s]INFO:root:Number of parameters in DeepARWeightTrainingNetwork: 25244
100%|██████████| 100/100 [00:04<00:00, 22.78it/s, avg_epoch_loss=2.66]
INFO:root:Epoch[0] Elapsed time 4.393 seconds
INFO:root:Epoch[0] Evaluation metric 'epoch_loss'=2.656086
INFO:root:Epoch[1] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.26it/s, avg_epoch_loss=2.22]
INFO:root:Epoch[1] Elapsed time 4.126 seconds
INFO:root:Epoch[1] Evaluation metric 'epoch_loss'=2.224133
INFO:root:Epoch[2] Learning rate is 0.001
100%|██████████| 100/100 [00:03<00:00, 25.58it/s, avg_epoch_loss=2.05]
INFO:root:Epoch[2] Elapsed time 3.911 seconds
INFO:root:Epoch[2] Evaluation metric 'epoch_loss'=2.051012
INFO:root:Epoch[3] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.05it/s, avg_epoch_loss=1.56]
INFO:root:Epoch[20] Elapsed time 4.160 seconds
INFO:root:Epoch[20] Evaluation metric 'epoch_loss'=1.561064
INFO:root:Epoch[21

INFO:root:Epoch[53] Evaluation metric 'epoch_loss'=1.425124
INFO:root:Epoch[54] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.50it/s, avg_epoch_loss=1.42]
INFO:root:Epoch[54] Elapsed time 4.083 seconds
INFO:root:Epoch[54] Evaluation metric 'epoch_loss'=1.422892
INFO:root:Epoch[55] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.29it/s, avg_epoch_loss=1.44]
INFO:root:Epoch[55] Elapsed time 4.118 seconds
INFO:root:Epoch[55] Evaluation metric 'epoch_loss'=1.436995
INFO:root:Epoch[56] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.97it/s, avg_epoch_loss=1.42]
INFO:root:Epoch[56] Elapsed time 4.006 seconds
INFO:root:Epoch[56] Evaluation metric 'epoch_loss'=1.421006
INFO:root:Epoch[57] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.29it/s, avg_epoch_loss=1.44]
INFO:root:Epoch[57] Elapsed time 4.118 seconds
INFO:root:Epoch[57] Evaluation metric 'epoch_loss'=1.436055
INFO:root:Epoch[58] Learning rate is 0.001
100%|████████

INFO:root:Epoch[91] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.41it/s, avg_epoch_loss=1.35]
INFO:root:Epoch[91] Elapsed time 4.100 seconds
INFO:root:Epoch[91] Evaluation metric 'epoch_loss'=1.352095
INFO:root:Epoch[92] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.86it/s, avg_epoch_loss=1.35]
INFO:root:Epoch[92] Elapsed time 4.025 seconds
INFO:root:Epoch[92] Evaluation metric 'epoch_loss'=1.352343
INFO:root:Epoch[93] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 23.60it/s, avg_epoch_loss=1.35]
INFO:root:Epoch[93] Elapsed time 4.240 seconds
INFO:root:Epoch[93] Evaluation metric 'epoch_loss'=1.349496
INFO:root:Epoch[94] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.87it/s, avg_epoch_loss=1.36]
INFO:root:Epoch[94] Elapsed time 4.023 seconds
INFO:root:Epoch[94] Evaluation metric 'epoch_loss'=1.355789
INFO:root:Epoch[95] Learning rate is 0.001
100%|██████████| 100/100 [00:04<00:00, 24.72it/s, avg_epoch_loss=1.37]
IN

100%|██████████| 100/100 [00:04<00:00, 23.75it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[127] Elapsed time 4.212 seconds
INFO:root:Epoch[127] Evaluation metric 'epoch_loss'=1.296757
INFO:root:Epoch[128] Learning rate is 0.00025
100%|██████████| 100/100 [00:03<00:00, 25.04it/s, avg_epoch_loss=1.31]
INFO:root:Epoch[128] Elapsed time 3.996 seconds
INFO:root:Epoch[128] Evaluation metric 'epoch_loss'=1.312681
INFO:root:Epoch[129] Learning rate is 0.00025
100%|██████████| 100/100 [00:04<00:00, 24.94it/s, avg_epoch_loss=1.32]
INFO:root:Epoch[129] Elapsed time 4.012 seconds
INFO:root:Epoch[129] Evaluation metric 'epoch_loss'=1.318018
INFO:root:Epoch[130] Learning rate is 0.00025
100%|██████████| 100/100 [00:04<00:00, 24.85it/s, avg_epoch_loss=1.32]
INFO:root:Epoch[130] Elapsed time 4.026 seconds
INFO:root:Epoch[130] Evaluation metric 'epoch_loss'=1.315125
INFO:root:Epoch[131] Learning rate is 0.00025
100%|██████████| 100/100 [00:04<00:00, 24.43it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[131] Elapse

INFO:root:Epoch[163] Learning rate is 6.25e-05
100%|██████████| 100/100 [00:04<00:00, 24.41it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[163] Elapsed time 4.098 seconds
INFO:root:Epoch[163] Evaluation metric 'epoch_loss'=1.284676
INFO:root:Epoch[164] Learning rate is 6.25e-05
100%|██████████| 100/100 [00:04<00:00, 24.25it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[164] Elapsed time 4.126 seconds
INFO:root:Epoch[164] Evaluation metric 'epoch_loss'=1.303323
INFO:root:Epoch[165] Learning rate is 6.25e-05
100%|██████████| 100/100 [00:04<00:00, 24.33it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[165] Elapsed time 4.111 seconds
INFO:root:Epoch[165] Evaluation metric 'epoch_loss'=1.303256
INFO:root:Epoch[166] Learning rate is 6.25e-05
100%|██████████| 100/100 [00:04<00:00, 24.44it/s, avg_epoch_loss=1.29]
INFO:root:Epoch[166] Elapsed time 4.094 seconds
INFO:root:Epoch[166] Evaluation metric 'epoch_loss'=1.293288
INFO:root:Epoch[167] Learning rate is 6.25e-05
100%|██████████| 100/100 [00:03<00:00, 25.01it/

100%|██████████| 100/100 [00:03<00:00, 25.33it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[199] Elapsed time 3.949 seconds
INFO:root:Epoch[199] Evaluation metric 'epoch_loss'=1.304974
INFO:root:Epoch[200] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.39it/s, avg_epoch_loss=1.29]
INFO:root:Epoch[200] Elapsed time 3.940 seconds
INFO:root:Epoch[200] Evaluation metric 'epoch_loss'=1.294806
INFO:root:Epoch[201] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.15it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[201] Elapsed time 4.143 seconds
INFO:root:Epoch[201] Evaluation metric 'epoch_loss'=1.275892
INFO:root:Epoch[202] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.53it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[202] Elapsed time 4.078 seconds
INFO:root:Epoch[202] Evaluation metric 'epoch_loss'=1.299686
INFO:root:Epoch[203] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.72it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[203] Elapsed time 3

INFO:root:Epoch[236] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.15it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[236] Elapsed time 4.142 seconds
INFO:root:Epoch[236] Evaluation metric 'epoch_loss'=1.281669
INFO:root:Epoch[237] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.59it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[237] Elapsed time 4.069 seconds
INFO:root:Epoch[237] Evaluation metric 'epoch_loss'=1.273635
INFO:root:Epoch[238] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.93it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[238] Elapsed time 4.013 seconds
INFO:root:Epoch[238] Evaluation metric 'epoch_loss'=1.301487
INFO:root:Epoch[239] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 23.84it/s, avg_epoch_loss=1.29]
INFO:root:Epoch[239] Elapsed time 4.196 seconds
INFO:root:Epoch[239] Evaluation metric 'epoch_loss'=1.290440
INFO:root:Epoch[240] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.03it/s, avg_epoch_l

INFO:root:Epoch[272] Evaluation metric 'epoch_loss'=1.291819
INFO:root:Epoch[273] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.62it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[273] Elapsed time 4.064 seconds
INFO:root:Epoch[273] Evaluation metric 'epoch_loss'=1.272925
INFO:root:Epoch[274] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 23.94it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[274] Elapsed time 4.179 seconds
INFO:root:Epoch[274] Evaluation metric 'epoch_loss'=1.276847
INFO:root:Epoch[275] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.69it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[275] Elapsed time 4.051 seconds
INFO:root:Epoch[275] Evaluation metric 'epoch_loss'=1.281482
INFO:root:Epoch[276] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.44it/s, avg_epoch_loss=1.29]
INFO:root:Epoch[276] Elapsed time 4.093 seconds
INFO:root:Epoch[276] Evaluation metric 'epoch_loss'=1.285204
INFO:root:Epoch[277] Learning rate is 5e-05

100%|██████████| 100/100 [00:03<00:00, 25.04it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[309] Elapsed time 3.995 seconds
INFO:root:Epoch[309] Evaluation metric 'epoch_loss'=1.284273
INFO:root:Epoch[310] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.39it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[310] Elapsed time 4.102 seconds
INFO:root:Epoch[310] Evaluation metric 'epoch_loss'=1.262270
INFO:root:Epoch[311] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.84it/s, avg_epoch_loss=1.3]
INFO:root:Epoch[311] Elapsed time 4.028 seconds
INFO:root:Epoch[311] Evaluation metric 'epoch_loss'=1.298308
INFO:root:Epoch[312] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.37it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[312] Elapsed time 3.943 seconds
INFO:root:Epoch[312] Evaluation metric 'epoch_loss'=1.282048
INFO:root:Epoch[313] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.12it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[313] Elapsed time 

INFO:root:Epoch[346] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.67it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[346] Elapsed time 3.897 seconds
INFO:root:Epoch[346] Evaluation metric 'epoch_loss'=1.278152
INFO:root:Epoch[347] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.12it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[347] Elapsed time 4.148 seconds
INFO:root:Epoch[347] Evaluation metric 'epoch_loss'=1.269942
INFO:root:Epoch[348] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.20it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[348] Elapsed time 4.134 seconds
INFO:root:Epoch[348] Evaluation metric 'epoch_loss'=1.282087
INFO:root:Epoch[349] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 26.26it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[349] Elapsed time 3.810 seconds
INFO:root:Epoch[349] Evaluation metric 'epoch_loss'=1.273638
INFO:root:Epoch[350] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.42it/s, avg_epoch_

INFO:root:Epoch[382] Evaluation metric 'epoch_loss'=1.258606
INFO:root:Epoch[383] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.61it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[383] Elapsed time 4.066 seconds
INFO:root:Epoch[383] Evaluation metric 'epoch_loss'=1.268582
INFO:root:Epoch[384] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.73it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[384] Elapsed time 4.046 seconds
INFO:root:Epoch[384] Evaluation metric 'epoch_loss'=1.281223
INFO:root:Epoch[385] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.86it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[385] Elapsed time 4.025 seconds
INFO:root:Epoch[385] Evaluation metric 'epoch_loss'=1.280586
INFO:root:Epoch[386] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.07it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[386] Elapsed time 4.157 seconds
INFO:root:Epoch[386] Evaluation metric 'epoch_loss'=1.275939
INFO:root:Epoch[387] Learning rate is 5e-05

100%|██████████| 100/100 [00:04<00:00, 24.04it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[419] Elapsed time 4.163 seconds
INFO:root:Epoch[419] Evaluation metric 'epoch_loss'=1.273237
INFO:root:Epoch[420] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.69it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[420] Elapsed time 3.895 seconds
INFO:root:Epoch[420] Evaluation metric 'epoch_loss'=1.268060
INFO:root:Epoch[421] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.33it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[421] Elapsed time 3.950 seconds
INFO:root:Epoch[421] Evaluation metric 'epoch_loss'=1.276491
INFO:root:Epoch[422] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.50it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[422] Elapsed time 4.083 seconds
INFO:root:Epoch[422] Evaluation metric 'epoch_loss'=1.282681
INFO:root:Epoch[423] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.73it/s, avg_epoch_loss=1.29]
INFO:root:Epoch[423] Elapsed time

INFO:root:Epoch[456] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.13it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[456] Elapsed time 4.146 seconds
INFO:root:Epoch[456] Evaluation metric 'epoch_loss'=1.284043
INFO:root:Epoch[457] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.44it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[457] Elapsed time 3.933 seconds
INFO:root:Epoch[457] Evaluation metric 'epoch_loss'=1.258533
INFO:root:Epoch[458] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.92it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[458] Elapsed time 4.015 seconds
INFO:root:Epoch[458] Evaluation metric 'epoch_loss'=1.276964
INFO:root:Epoch[459] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.58it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[459] Elapsed time 4.071 seconds
INFO:root:Epoch[459] Evaluation metric 'epoch_loss'=1.275262
INFO:root:Epoch[460] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 26.05it/s, avg_epoch_

INFO:root:Epoch[492] Evaluation metric 'epoch_loss'=1.247640
INFO:root:Epoch[493] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.94it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[493] Elapsed time 3.857 seconds
INFO:root:Epoch[493] Evaluation metric 'epoch_loss'=1.281211
INFO:root:Epoch[494] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.04it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[494] Elapsed time 3.995 seconds
INFO:root:Epoch[494] Evaluation metric 'epoch_loss'=1.257846
INFO:root:Epoch[495] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 23.93it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[495] Elapsed time 4.181 seconds
INFO:root:Epoch[495] Evaluation metric 'epoch_loss'=1.255639
INFO:root:Epoch[496] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.50it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[496] Elapsed time 3.930 seconds
INFO:root:Epoch[496] Evaluation metric 'epoch_loss'=1.263590
INFO:root:Epoch[497] Learning rate is 5e-05

100%|██████████| 100/100 [00:03<00:00, 25.34it/s, avg_epoch_loss=1.28]
INFO:root:Epoch[529] Elapsed time 3.948 seconds
INFO:root:Epoch[529] Evaluation metric 'epoch_loss'=1.279945
INFO:root:Epoch[530] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.79it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[530] Elapsed time 4.036 seconds
INFO:root:Epoch[530] Evaluation metric 'epoch_loss'=1.272017
INFO:root:Epoch[531] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.95it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[531] Elapsed time 4.010 seconds
INFO:root:Epoch[531] Evaluation metric 'epoch_loss'=1.265689
INFO:root:Epoch[532] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.70it/s, avg_epoch_loss=1.25]
INFO:root:Epoch[532] Elapsed time 3.893 seconds
INFO:root:Epoch[532] Evaluation metric 'epoch_loss'=1.252818
INFO:root:Epoch[533] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.61it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[533] Elapsed time

INFO:root:Epoch[566] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.23it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[566] Elapsed time 4.129 seconds
INFO:root:Epoch[566] Evaluation metric 'epoch_loss'=1.257011
INFO:root:Epoch[567] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.82it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[567] Elapsed time 4.032 seconds
INFO:root:Epoch[567] Evaluation metric 'epoch_loss'=1.255491
INFO:root:Epoch[568] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.37it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[568] Elapsed time 3.944 seconds
INFO:root:Epoch[568] Evaluation metric 'epoch_loss'=1.273371
INFO:root:Epoch[569] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.23it/s, avg_epoch_loss=1.25]
INFO:root:Epoch[569] Elapsed time 4.130 seconds
INFO:root:Epoch[569] Evaluation metric 'epoch_loss'=1.248688
INFO:root:Epoch[570] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.79it/s, avg_epoch_

INFO:root:Epoch[602] Evaluation metric 'epoch_loss'=1.254737
INFO:root:Epoch[603] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.00it/s, avg_epoch_loss=1.25]
INFO:root:Epoch[603] Elapsed time 4.168 seconds
INFO:root:Epoch[603] Evaluation metric 'epoch_loss'=1.246743
INFO:root:Epoch[604] Learning rate is 5e-05
100%|██████████| 100/100 [00:03<00:00, 25.00it/s, avg_epoch_loss=1.27]
INFO:root:Epoch[604] Elapsed time 4.002 seconds
INFO:root:Epoch[604] Evaluation metric 'epoch_loss'=1.268618
INFO:root:Epoch[605] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 23.87it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[605] Elapsed time 4.192 seconds
INFO:root:Epoch[605] Evaluation metric 'epoch_loss'=1.259102
INFO:root:Epoch[606] Learning rate is 5e-05
100%|██████████| 100/100 [00:04<00:00, 24.16it/s, avg_epoch_loss=1.26]
INFO:root:Epoch[606] Elapsed time 4.142 seconds
INFO:root:Epoch[606] Evaluation metric 'epoch_loss'=1.258471
INFO:root:Epoch[607] Learning rate is 5e-05

### 4. evaluate the model

In [11]:
acc, ret, pret = {}, {}, {}

loopcnt=2

year = '2018'
lapmode = _inlap_status
fmode = _feature_mode
runts = dataset

testmodel = 'oracle'
modelfile = 'oracle'

mid = f'{testmodel}-%s-%s-%s-%s'%(runts, year, inlapstr[lapmode], featurestr[fmode])
datasetid = outputRoot + _dataset_id

if runts == 'rank':
    acc[mid], ret[mid] = simulation(datasetid, _test_event, 
                'rank',stint.COL_RANK,'rank',
               prediction_length, stint.MODE_ORACLE,loopcnt, 
                  pitmodel=modelfile, model=testmodel, inlapmode=lapmode,featuremode =fmode)        
else:
    acc[mid], ret[mid] = simulation(datasetid, _test_event, 
                    'timediff',stint.COL_TIMEDIFF,'timediff2rank',
                   prediction_length, stint.MODE_ORACLE,loopcnt, 
                      pitmodel=modelfile, model=testmodel, inlapmode=lapmode,featuremode =fmode)

allsamples, alltss = get_allsamples(ret[mid], year=year)
_, pret[mid]= prisk_direct_bysamples(allsamples, alltss)
print(pret[mid])
    
dfs={}
       
mode=1
df = get_alldf_mode(ret[mid], year=year,mode=mode)
name = '%s_%s'%(testmodel, 'mean' if mode==1 else ('mode' if mode==0 else 'median'))
if year not in dfs:
    dfs[year] = {}
dfs[year][name] = df

_trim = 0
_include_final = True
_include_stintlen = True
include_str = '1' if _include_final else '0'
stint_str = '1' if _include_stintlen else ''            
simulation_outfile=outputRoot + f'shortterm-dfout-oracle-indy500-{dataset}-{inlapstr[_inlap_status]}-{featurestr[_feature_mode]}-2018-oracle-l{loopcnt}-alldata-weighted.pickle'

with open(simulation_outfile, 'wb') as f:
    savedata = [dfs,acc,ret,pret]
    pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  uni_ds['rank_diff'][mask] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  uni_ds['time_diff'][mask] = 0
INFO:root:Using GPU


init: load dataset laptime_rank_timediff_pit-oracle-Indy500_2013_2019_v9_p0.pickle with 7 races, 58 cars
predicting model=oracle, plen=2
loading model...deepARW-Oracle-rank-all-indy-f1min-t2-e1000-r1_oracle_t2...done!, ctx:gpu(0)
evalbyrank: True
evalbyrank: True
model: acc={0.87}, mae={1.02}, rmse={2.26},r2={0.93}, {188}
            naive: acc={0.72}, mae={1.34}, rmse={3.25},r2={0.85}
model: acc={0.89}, mae={1.02}, rmse={2.26},r2={0.93}, {188}
            naive: acc={0.72}, mae={1.34}, rmse={3.25},r2={0.85}
[[0.88031915 1.02359551 2.26134025 0.92860477]
 [0.71808511 1.33932584 3.24896987 0.85262365]]
sacmplecnt: 100 lapcnt: 200 runcnt: 2
dict_values([0.07605958608019021, 0.080683291905328, 0.07673012536474658])
[0.07605959 0.08068329 0.07673013]




df size: 5340


### 5. final evaluation

In [12]:
with open(simulation_outfile, 'rb') as f:
    dfs,acc,ret,pret = pickle.load(f, encoding='latin1') 

# get pit laps, pit-covered-laps
# pitdata[year] = [pitlaps, pitcoveredlaps]
with open('pitcoveredlaps-g1.pickle', 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    pitdata = pickle.load(f, encoding='latin1') 

In [13]:
#
# Model,SignAcc,MAE,50-Risk,90-Risk
# 
cols = ['Year','Model','laptype','SignAcc','MAE','50-Risk','90-Risk']
plen = prediction_length
usemeanstr='mean'

#load data
# dfs,acc,ret,pret
ranknetdf = dfs

In [14]:
retdata = []
            
#oracle
dfx = ret[mid]
allsamples, alltss = get_allsamples(dfx, year=year)
#_, pret[mid]= prisk_direct_bysamples(ret[mid][0][1], ret[mid][0][2])
_, prisk_vals = prisk_direct_bysamples(allsamples, alltss)

dfout = do_rerank(ranknetdf[year]['oracle_mean'])
accret = stint.get_evalret_shortterm(dfout)[0]
#fsamples, ftss = runs2samples_ex(ranknet_ret[f'oracle-RANK-{year}-inlap-nopitage'],[])
#_, prisk_vals = prisk_direct_bysamples(fsamples, ftss)
retdata.append([year,'Oracle','all', accret[0], accret[1], prisk_vals[1], prisk_vals[2]])
        
for laptype in ['normal','pit']:
    # select the set
    pitcoveredlaps = pitdata[year][1]
    normallaps = set([x for x in range(1,201)]) - pitcoveredlaps

    if laptype == 'normal':
        sellaps = normallaps
        clearlaps = pitcoveredlaps
    else:
        sellaps = pitcoveredlaps
        clearlaps = normallaps


    # pitcoveredlaps start idx = 1
    startlaps = [x-plen-1 for x in sellaps]
    #sellapidx = np.array([x-1 for x in sellaps])
    clearidx = np.array([x-1 for x in clearlaps])
    print('sellaps:', len(sellaps), 'clearlaps:',len(clearlaps))

    #oracle
    #outfile=f'shortterm-dfout-ranknet-indy500-rank-inlap-nopitage-20182019-oracle-l10-alldata-weighted.pickle'
    #_all = load_dfout_all(outfile)[0]
    #ranknetdf, acc, ret, pret = _all[0],_all[1],_all[2],_all[3]

    dfout = do_rerank(ranknetdf[year]['oracle_mean'])

    allsamples, alltss = get_allsamples(dfx, year=year)


    allsamples, alltss = clear_samples(allsamples, alltss,clearidx)

    _, prisk_vals = prisk_direct_bysamples(allsamples, alltss)

    dfout = dfout[dfout['startlap'].isin(startlaps)]
    accret = stint.get_evalret_shortterm(dfout)[0]

    print(year, laptype,'RankNet-Oracle',accret[0], accret[1], prisk_vals[1], prisk_vals[2])
    retdata.append([year, 'Oracle',laptype, accret[0], accret[1], prisk_vals[1], prisk_vals[2]])
        
oracle_eval_result = pd.DataFrame(data=retdata, columns=cols)

sacmplecnt: 100 lapcnt: 200 runcnt: 2
dict_values([0.07605958608019021, 0.080683291905328, 0.07673012536474658])
rerank...
model: acc={0.88}, mae={1.02}, rmse={2.25},r2={0.93}, {188}
            naive: acc={0.72}, mae={1.34}, rmse={3.25},r2={0.85}
sellaps: 78 clearlaps: 122
rerank...




sacmplecnt: 100 lapcnt: 200 runcnt: 2
dict_values([0.03592620986381254, 0.03916154037620452, 0.03628548874586637])
model: acc={0.89}, mae={0.44}, rmse={1.06},r2={0.99}, {66}
            naive: acc={0.88}, mae={0.29}, rmse={0.92},r2={0.99}
2018 normal RankNet-Oracle 0.8939393939380395 0.44353182751540043 0.03916154037620452 0.03628548874586637
sellaps: 122 clearlaps: 78
rerank...
sacmplecnt: 100 lapcnt: 200 runcnt: 2
dict_values([0.1006488311801486, 0.10612317814426703, 0.10151007603320189])
model: acc={0.87}, mae={1.35}, rmse={2.71},r2={0.89}, {122}
            naive: acc={0.63}, mae={1.94}, rmse={4.02},r2={0.76}
2018 pit RankNet-Oracle 0.8688524590156813 1.3508254716981132 0.10612317814426703 0.10151007603320189




In [15]:
oracle_eval_result

Unnamed: 0,Year,Model,laptype,SignAcc,MAE,50-Risk,90-Risk
0,2018,Oracle,all,0.87766,1.01985,0.080683,0.07673
1,2018,Oracle,normal,0.893939,0.443532,0.039162,0.036285
2,2018,Oracle,pit,0.868852,1.350825,0.106123,0.10151


In [16]:
oracle_eval_result.to_csv(f'{experimentid}-oracle_eval_result.csv')