### laptime & rank dataset + oracle features

base: 9.DeepModels/laptime_rank_dataset.ipynb

Build a time series dataset across all the oval races, including laptime, rank. When this dataset aims to be used in forecasting, covariates of the racing status can not be included, such as track_status and lap_status. However, they can be used in oracle test to tell the upper bound of performance of the predictor.

Change to a new dataset format that following telemetry dataset.

raw:
+  [(eventid, carids: carno -> rowid, datalist)]

datalist := [datalist_entry] in shape of #car_number

datalist_entry := [[laptime, rank, track_status, lap_status]], in shape of #totallaps x #featureCnt (padded by nan)

gluonts:


In [77]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import mxnet as mx
from mxnet import gluon
import pickle
import json
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

## Load Data

In [2]:
import os
os.getcwd()

'/scratch/hpda/indycar/notebook/10.OracleModel'

In [3]:
#
# parameters
#
#year = '2017'
year = '2018'
#event = 'Toronto'
#https://www.racing-reference.info/season-stats/2018/O/#
events_totalmiles=[256,500,372,268,500,310]
events_laplen = [1.022,2.5,1.5,0.894,2.5,1.25]
events = ['Phoenix','Indy500','Texas','Iowa','Pocono','Gateway']
events_id={key:idx for idx, key in enumerate(events)}
#events = ['Indy500']
#events = ['Phoenix']

In [4]:
# make indy car completed_laps dataset
# car_number, completed_laps, rank, elapsed_time, rank_diff, elapsed_time_diff 
def make_cl_data(dataset):

    # pick up data with valid rank
    rankdata = dataset.rename_axis('MyIdx').sort_values(by=['elapsed_time','MyIdx'], ascending=True)
    rankdata = rankdata.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')

    # resort by car_number, lap
    uni_ds = rankdata.sort_values(by=['car_number', 'completed_laps', 'elapsed_time'], ascending=True)    
    #uni_ds = uni_ds.drop(["unique_id", "best_lap", "current_status", "track_status", "lap_status",
    #                  "laps_behind_leade","laps_behind_prec","overall_rank","pit_stop_count",
    #                  "last_pitted_lap","start_position","laps_led"], axis=1)
    
    uni_ds = uni_ds.drop(["unique_id", "best_lap", 
                      "laps_behind_leade","laps_behind_prec","overall_rank","pit_stop_count",
                      "last_pitted_lap","start_position","laps_led"], axis=1)
        
    carnumber = set(uni_ds['car_number'])
    print('cars:', carnumber)
    print('#cars=', len(carnumber))
   
    # faster solution , uni_ds already sorted by car_number and lap
    uni_ds['rank_diff'] = uni_ds['rank'].diff()
    mask = uni_ds.car_number != uni_ds.car_number.shift(1)
    uni_ds['rank_diff'][mask] = 0
    
    uni_ds['time_diff'] = uni_ds['elapsed_time'].diff()
    mask = uni_ds.car_number != uni_ds.car_number.shift(1)
    uni_ds['time_diff'][mask] = 0
    
    #df = uni_ds[['car_number','completed_laps','rank','elapsed_time','rank_diff','time_diff']]
    df = uni_ds[['car_number','completed_laps','rank','elapsed_time',
                 'rank_diff','time_diff',"current_status", "track_status", "lap_status"]]
    
    return df

def make_lapstatus_data(dataset):
    final_lap = max(dataset.completed_laps)
    total_laps = final_lap + 1

    # get records for the cars that finish the race
    completed_car_numbers= dataset[dataset.completed_laps == final_lap].car_number.values
    completed_car_count = len(completed_car_numbers)

    print('count of completed cars:', completed_car_count)
    print('completed cars:', completed_car_numbers)
    
    #pick up one of them
    onecar = dataset[dataset['car_number']==completed_car_numbers[0]]
    onecar = onecar.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')
    return onecar[['completed_laps','track_status']]
    

In [5]:
def load_data(event, year):
    inputfile = '../data/final/C_'+ event +'-' + year + '-final.csv'
    outputprefix = year +'-' + event + '-'
    dataset = pd.read_csv(inputfile)
    #dataset.info(verbose=True)    
    
    final_lap = max(dataset.completed_laps)
    total_laps = final_lap + 1

    # get records for the cars that finish the race
    completed_car_numbers= dataset[dataset.completed_laps == final_lap].car_number.values
    completed_car_count = len(completed_car_numbers)

    print('count of completed cars:', completed_car_count)
    print('completed cars:', completed_car_numbers)

    #make a copy
    alldata = dataset.copy()
    dataset = dataset[dataset['car_number'].isin(completed_car_numbers)]
    rankdata = alldata.rename_axis('MyIdx').sort_values(by=['elapsed_time','MyIdx'], ascending=True)
    rankdata = rankdata.drop_duplicates(subset=['car_number', 'completed_laps'], keep='first')
    
    cldata = make_cl_data(dataset)
    flagdata = make_lapstatus_data(dataset)
    acldata = make_cl_data(alldata)

    return alldata, rankdata, acldata, flagdata

### overall view of laptime scatter plots



In [6]:
def get_cardata(curcarno, ycol='time_diff'):
    car = acldata[acldata['car_number']==curcarno]
    #print(car['time_diff'].describe())
    
    cols=['completed_laps','rank','car_number','lap_status','track_status',
    'pit_stop_count','current_status','start_position']
    colid={key:idx for idx, key in enumerate(cols)}

    cardata = rankdata[rankdata['car_number'] == curcarno]

    carstatus = [[row[0], row[1],row[2],row[3],row[4],row[5],row[6],row[7]] for row in cardata[
        ['completed_laps','rank','car_number','lap_status','track_status',
        'pit_stop_count','current_status','start_position']].values]
    
    x = car['completed_laps'][1:].values
    y = car[ycol][1:].values

    pits=[]
    yellowflags=[]
    lastflag = 'x'
    for row in carstatus:
        lap = int(row[colid['completed_laps']])

        if row[colid['lap_status']]=='P':
            pits.append(lap)

        if row[colid['track_status']]=='Y':
            if lastflag != 'Y':       
                #start
                yellowflags.append(lap)
        else:
            if lastflag == 'Y':       
                #end
                yellowflags.append(lap)        
        lastflag = row[colid['track_status']]

    #pit lap
    pits = np.array(pits)
    #start, end lap
    #
    yellowflags = np.array(yellowflags)
    if (yellowflags.shape[0] % 2)==1:
        print('crash?:carno=', curcarno)
        yellowflags = []
    else:
        yellowflags = np.array(yellowflags).reshape((-1,2))    
    
    return car, x, y, pits, yellowflags

### bulid the dataset

In [12]:
def get_laptime_dataset(stagedata):
    """
    input: (alldata, rankdata, acldata, flagdata)
    output: laptime & rank data
    
    [(
    eventid,
    carids : rowid -> carno,
    datalist: #car_number x features x #totallaps (padded by Nan)
        entry: [[laptime, rank, track_status, lap_status]]
    )]
    """
    laptime_data = []
    for event in stagedata.keys():
        
        laptime_rec = []
        eventid = events_id[event]
        
        alldata, rankdata, acldata, flagdata = stagedata[event]
        carlist = set(acldata['car_number'])
        laplist = set(acldata['completed_laps'])
        totalcars = len(carlist)
        totallaps = len(laplist)

        #carnumber -> carid
        carids={key:idx for idx, key in enumerate(carlist)}
        decode_carids={idx:key for idx, key in enumerate(carlist)}
        
        #array: car_number x lap
        #laptime = np.zeros((totalcars, totallaps-1))
        #rank = np.zeros((totalcars, totallaps-1))
        laptime = np.empty((totalcars, totallaps-1))
        rank = np.empty((totalcars, totallaps-1))
        laptime[:] = np.NaN
        rank[:] = np.NaN
        
        # features: laptime, rank, track_status, lap_status
        LAPTIME = 0
        RANK = 1
        TRACK_STATUS = 2
        LAP_STATUS = 3
        featureCnt = 4
        datalist = np.empty((totalcars, featureCnt, totallaps-1))
        datalist[:] = np.NaN
        
        lapdata = acldata[['car_number','completed_laps',
                           'time_diff','rank','track_status', 'lap_status']].to_numpy()
        
        for row in lapdata:
            #completed_laps
            if int(row[1]) == 0:
                continue
                
            #add to data array
            car_number = carids[row[0]]
            completed_laps = int(row[1])-1
            time_diff = float(row[2])
            rank = int(row[3])
            track_status = 1 if row[4]=='Y' else 0
            lap_status = 1 if row[5]=='P' else 0
            
            datalist[car_number, LAPTIME, completed_laps] = time_diff
            datalist[car_number, RANK, completed_laps] = rank
            datalist[car_number, TRACK_STATUS, completed_laps] = track_status
            datalist[car_number, LAP_STATUS, completed_laps] = lap_status
            
            
            

        #add one record
        laptime_data.append([eventid, decode_carids, datalist])
        # push this event into stage dataframe
        print('event=%s, records=%s'%(event, datalist.shape))
        
    
    return laptime_data

### load data

In [8]:
stagedata = {}
global_carids = {}
traindata = None
cur_carid = 0
for event in events:
    #alldata, rankdata, acldata, flagdata
    stagedata[event] = load_data(event, year)
    
    alldata, rankdata, acldata, flagdata = stagedata[event]
    carlist = set(acldata['car_number'])
    laplist = set(acldata['completed_laps'])
    print('%s: carno=%d, lapnum=%d'%(event, len(carlist), len(laplist)))

    #build the carid map
    for car in carlist:
        if car not in global_carids:
            global_carids[car] = cur_carid
            cur_carid += 1
    

count of completed cars: 11
completed cars: [ 1  6 27  9 28  5 20 14 15 22 30]
cars: {1, 5, 6, 9, 14, 15, 20, 22, 27, 28, 30}
#cars= 11
count of completed cars: 11
completed cars: [ 1  6 27  9 28  5 20 14 15 22 30]
cars: {1, 4, 5, 6, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 26, 27, 28, 30, 32, 59, 88, 98}
#cars= 23
Phoenix: carno=23, lapnum=251
count of completed cars: 18
completed cars: [12 20  9 27 28 22 29  1  6 15 66 98  4 88 25 60 64 23]
cars: {64, 1, 66, 98, 4, 6, 9, 12, 60, 15, 20, 22, 23, 88, 25, 27, 28, 29}
#cars= 18
count of completed cars: 18
completed cars: [12 20  9 27 28 22 29  1  6 15 66 98  4 88 25 60 64 23]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


cars: {1, 3, 4, 6, 7, 9, 10, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 59, 60, 64, 66, 88, 98}
#cars= 33
Indy500: carno=33, lapnum=201
count of completed cars: 9
completed cars: [ 9 22 27  5 28 15 30 18 10]
cars: {5, 9, 10, 15, 18, 22, 27, 28, 30}
#cars= 9
count of completed cars: 9
completed cars: [ 9 22 27  5 28 15 30 18 10]
cars: {1, 3, 4, 5, 6, 7, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 47, 55, 57, 59, 60, 68, 73, 83, 88, 98}
#cars= 32
Texas: carno=32, lapnum=249
count of completed cars: 5
completed cars: [ 5 21 30  1  6]
cars: {1, 5, 6, 21, 30}
#cars= 5
count of completed cars: 5
completed cars: [ 5 21 30  1  6]
cars: {1, 4, 5, 6, 9, 10, 12, 14, 15, 18, 19, 20, 21, 22, 23, 26, 27, 28, 30, 59, 88, 98}
#cars= 22
Iowa: carno=22, lapnum=301
count of completed cars: 4
completed cars: [27 12  9 18]
cars: {9, 18, 27, 12}
#cars= 4
count of completed cars: 4
completed cars: [27 12  9 18]
cars: {1, 4, 5, 6, 9, 10, 12, 14, 15, 18, 

In [13]:
laptime_data = get_laptime_dataset(stagedata)

event=Phoenix, records=(23, 4, 250)
event=Indy500, records=(33, 4, 200)
event=Texas, records=(32, 4, 248)
event=Iowa, records=(22, 4, 300)
event=Pocono, records=(22, 4, 200)
event=Gateway, records=(21, 4, 248)


In [16]:
#check Car12 Indy500
laptime_data[1][2][7,:,:50]

array([[ 41.9238,  41.036 ,  41.3339,  41.0918,  40.7903,  41.1153,
         41.2998,  41.1854,  41.0853,  41.0486,  41.2261,  41.3214,
         41.2019,  41.1745,  41.3155,  41.2843,  41.4283,  41.407 ,
         41.4171,  41.4934,  41.3869,  41.2333,  41.2477,  41.3136,
         41.1053,  41.398 ,  41.9177,  41.9383,  42.1843,  44.0387,
         45.6879,  56.1652,  67.5397,  41.6366,  40.9116,  40.359 ,
         40.5654,  40.9596,  41.2025,  41.2059,  41.3058,  41.0387,
         41.124 ,  41.4863,  41.3339,  41.3737,  41.7481,  62.8487,
         97.1485, 116.1604],
       [  2.    ,   2.    ,   3.    ,   3.    ,   3.    ,   3.    ,
          3.    ,   3.    ,   3.    ,   3.    ,   3.    ,   3.    ,
          3.    ,   3.    ,   3.    ,   3.    ,   3.    ,   3.    ,
          3.    ,   3.    ,   3.    ,   3.    ,   3.    ,   3.    ,
          3.    ,   3.    ,   3.    ,   3.    ,   3.    ,   4.    ,
          2.    ,   4.    ,  18.    ,  10.    ,   6.    ,   6.    ,
          6.    ,  

In [19]:
#pocono crash
laptime_data[4][2][7,:,:50]

array([[ 92.5507, 101.063 , 104.9568, 100.7178,  91.1552,  80.7788,
         63.8658, 157.409 , 162.3938, 105.6318, 110.3061,  93.895 ,
         45.941 ,  43.3921,  43.4406, 448.2418,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,      nan],
       [ 14.    ,  14.    ,  14.    ,  16.    ,  16.    ,  16.    ,
         11.    ,  11.    ,  11.    ,  11.    ,   8.    ,   8.    ,
          8.    ,   8.    ,   8.    ,  16.    ,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,      nan,      nan,      nan,      nan,      nan,
             nan,  

In [9]:
alldata, rankdata, acldata, flagdata = stagedata['Indy500']
acldata[acldata['car_number']==12].head(10)

Unnamed: 0_level_0,car_number,completed_laps,rank,elapsed_time,rank_diff,time_diff,current_status,track_status,lap_status
MyIdx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,12,0,3,0.2247,0.0,0.0,Active,G,T
37,12,1,2,42.1485,-1.0,41.9238,Active,G,T
148,12,2,2,83.1845,0.0,41.036,Active,G,T
231,12,3,3,124.5184,1.0,41.3339,Active,G,T
311,12,4,3,165.6102,0.0,41.0918,Active,G,T
393,12,5,3,206.4005,0.0,40.7903,Active,G,T
474,12,6,3,247.5158,0.0,41.1153,Active,G,T
558,12,7,3,288.8156,0.0,41.2998,Active,G,T
644,12,8,3,330.001,0.0,41.1854,Active,G,T
729,12,9,3,371.0863,0.0,41.0853,Active,G,T


In [11]:
lapdata = acldata[['car_number','completed_laps','time_diff','rank','track_status']].to_numpy()
lapdata[:10]

array([[1, 0, 0.0, 4, 'G'],
       [1, 1, 42.3679, 4, 'G'],
       [1, 2, 41.0984, 4, 'G'],
       [1, 3, 41.094300000000004, 4, 'G'],
       [1, 4, 41.11800000000001, 4, 'G'],
       [1, 5, 41.07169999999999, 4, 'G'],
       [1, 6, 41.18700000000001, 4, 'G'],
       [1, 7, 41.26490000000001, 4, 'G'],
       [1, 8, 41.34119999999996, 4, 'G'],
       [1, 9, 41.36439999999999, 4, 'G']], dtype=object)

In [12]:
import pickle
#stintdf.to_csv('laptime-%s.csv'%year)
with open(f'laptime_rank-oracle-{year}.pickle', 'wb') as f:
    #pack [global_carids, laptime_data]
    savedata = [global_carids, laptime_data]
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)

### convert to gluonts dataset

In [80]:

def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    """

    return np.isnan(y), lambda z: z.nonzero()[0]

def test_flag(a, bitflag):
    return (a & bitflag) ==  bitflag

#
# remove NaN at the tail
# there should be no nans in the middle of the ts
COL_LAPTIME=0
COL_RANK=1
COL_TRACKSTATUS=2
COL_LAPSTATUS=3
MODE_ORACLE = 0
MODE_NOLAP = 1
MODE_NOTRACK = 2
MODE_TEST = 4
MODE_STR={MODE_ORACLE:'oracle', MODE_NOLAP:'nolap',MODE_NOTRACK:'notrack',MODE_TEST:'test'}

def make_dataset(runs, prediction_length, freq, 
                       run_ts=COL_LAPTIME, 
                       train_ratio = 0.8,
                       use_global_dict = True,
                       oracle_mode = MODE_ORACLE
                ):
    """
    split the ts to train and test part by the ratio
    
    oracle_mode: false to simulate prediction in real by 
        set the covariates of track and lap status as nan in the testset
            
    
    """    
    start = pd.Timestamp("01-01-2019", freq=freq)  # can be different for each time series

    train_set = []
    test_set = []
    
    #select run
    if runs>=0:
        _laptime_data = [laptime_data[runs].copy()]
    else:
        _laptime_data = laptime_data.copy()
    
   
    #_data: eventid, carids, datalist[carnumbers, features, lapnumber]->[laptime, rank, track, lap]]
    for _data in _laptime_data:
        _train = []
        _test = []
        
        #statistics on the ts length
        ts_len = [ _entry.shape[1] for _entry in _data[2]]
        train_len = int(np.max(ts_len) * train_ratio)
        
        print(f'====event:{events[_data[0]]}, train_len={train_len}, max_len={np.max(ts_len)}, min_len={np.min(ts_len)}')
                
        # process for each ts
        for rowid in range(_data[2].shape[0]):
            # rec[features, lapnumber] -> [laptime, rank, track_status, lap_status]]
            rec = _data[2][rowid].copy()
            
            #remove nan(only tails)
            nans, x= nan_helper(rec[run_ts,:])
            nan_count = np.sum(nans)             
            rec = rec[:, ~np.isnan(rec[run_ts,:])]
            
            # remove short ts
            totallen = rec.shape[1]
            if ( totallen < train_len + prediction_length):
                print(f'a short ts: carid={_data[1][rowid]}，len={totallen}')
                continue                
            
            if use_global_dict:
                carno = _data[1][rowid]
                carid = global_carids[_data[1][rowid]]
            else:
                #simulation dataset, todo, fix the carids as decoder
                carno = rowid
                carid = rowid
                
            # selection of features
            if test_flag(oracle_mode, MODE_NOTRACK):                
                rec[COL_TRACKSTATUS, :] = 0
            if test_flag(oracle_mode, MODE_NOLAP):                
                rec[COL_LAPSTATUS, :] = 0
                
            # split and add to dataset record
            _train.append({'target': rec[run_ts,:train_len].astype(np.float32), 
                            'start': start, 
                            'feat_static_cat': [carid],
                            'feat_dynamic_real': [rec[COL_TRACKSTATUS,:train_len],
                                   rec[COL_LAPSTATUS,:train_len]]
                          }
                          )
            
            # multiple test ts(rolling window as half of the prediction_length)
            test_rec_cnt = 0
            for endpos in range(totallen, train_len+prediction_length, 
                                -int(prediction_length/2)):
                
                track_rec = rec[COL_TRACKSTATUS, :endpos].copy()
                lap_rec = rec[COL_LAPSTATUS, :endpos].copy()
                
                # test mode
                if test_flag(oracle_mode, MODE_TEST):
                    
                    #set prediction part as nan
                    #track_rec[-prediction_length:] = np.nan
                    #lap_rec[-prediction_length:] = np.nan
                    
                    # since nan does not work, use cur-val instead
                    track_rec[-prediction_length:] = track_rec[-prediction_length - 1]
                    #track_rec[-prediction_length:] = random.randint(0,1)
                    #lap_rec[-prediction_length:] = lap_rec[-prediction_length - 1]
                    lap_rec[-prediction_length:] = 0
                    
                
                _test.append({'target': rec[run_ts,:endpos].astype(np.float32), 
                            'start': start, 
                            'feat_static_cat': [carid],
                            'feat_dynamic_real': [track_rec,lap_rec]
                            #'feat_dynamic_real': [rec[COL_TRACKSTATUS,:endpos],
                            #       rec[COL_LAPSTATUS,:endpos]] 
                             }
                          )   
                test_rec_cnt += 1
            
            #add one ts
            print(f'carno:{carno}, totallen:{totallen}, nancount:{nan_count}, test_reccnt:{test_rec_cnt}')

        train_set.extend(_train)
        test_set.extend(_test)

    print(f'train len:{len(train_set)}, test len:{len(test_set)}')
    
    train_ds = ListDataset(train_set, freq=freq)
    test_ds = ListDataset(test_set, freq=freq)    
    
    return train_ds, test_ds, train_set, test_set

def save_dataset(datafile,freq, prediction_length, cardinality, train_ds, test_ds):
    with open(datafile, 'wb') as f:
        #pack [global_carids, laptime_data]
        savedata = [freq, prediction_length, cardinality, train_ds, test_ds]
        #savedata = [freq, train_set, test_set]
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)

### bulid dataset

In [88]:
# global configuration
#prediction_length = 5
prediction_length = 10
prediction_length = 20
train_ratio = 0.8

freq = "1min"
cardinality = [len(global_carids)]

#run on indy dataset
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq)
save_dataset(f'laptime-oracle-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:3
carno:4, totallen:241, nancount:9, test_reccnt:3
carno:5, totallen:250, nancount:0, test_reccnt:3
carno:6, totallen:250, nancount:0, test_reccnt:3
carno:9, totallen:250, nancount:0, test_reccnt:3
carno:10, totallen:229, nancount:21, test_reccnt:1
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:3
carno:15, totallen:250, nancount:0, test_reccnt:3
carno:18, totallen:249, nancount:1, test_reccnt:3
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:3
carno:21, totallen:249, nancount:1, test_reccnt:3
carno:22, totallen:250, nancount:0, test_reccnt:3
carno:23, totallen:248, nancount:2, test_reccnt:3
carno:26, totallen:249, nancount:1, test_reccnt:3
carno:27, totallen:250, nancount:0, test_reccnt:3
carno:28, totallen:250, nancount:0, test_reccnt:3
carno:30, totallen:250, nancount:0, test_reccnt:3
a short ts: carid=32，len=174
carno:59,

In [89]:
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq, oracle_mode=MODE_NOTRACK)
save_dataset(f'laptime-oracle-notrack-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:3
carno:4, totallen:241, nancount:9, test_reccnt:3
carno:5, totallen:250, nancount:0, test_reccnt:3
carno:6, totallen:250, nancount:0, test_reccnt:3
carno:9, totallen:250, nancount:0, test_reccnt:3
carno:10, totallen:229, nancount:21, test_reccnt:1
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:3
carno:15, totallen:250, nancount:0, test_reccnt:3
carno:18, totallen:249, nancount:1, test_reccnt:3
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:3
carno:21, totallen:249, nancount:1, test_reccnt:3
carno:22, totallen:250, nancount:0, test_reccnt:3
carno:23, totallen:248, nancount:2, test_reccnt:3
carno:26, totallen:249, nancount:1, test_reccnt:3
carno:27, totallen:250, nancount:0, test_reccnt:3
carno:28, totallen:250, nancount:0, test_reccnt:3
carno:30, totallen:250, nancount:0, test_reccnt:3
a short ts: carid=32，len=174
carno:59,

In [90]:
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq, oracle_mode=MODE_NOLAP)
save_dataset(f'laptime-oracle-nolap-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:3
carno:4, totallen:241, nancount:9, test_reccnt:3
carno:5, totallen:250, nancount:0, test_reccnt:3
carno:6, totallen:250, nancount:0, test_reccnt:3
carno:9, totallen:250, nancount:0, test_reccnt:3
carno:10, totallen:229, nancount:21, test_reccnt:1
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:3
carno:15, totallen:250, nancount:0, test_reccnt:3
carno:18, totallen:249, nancount:1, test_reccnt:3
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:3
carno:21, totallen:249, nancount:1, test_reccnt:3
carno:22, totallen:250, nancount:0, test_reccnt:3
carno:23, totallen:248, nancount:2, test_reccnt:3
carno:26, totallen:249, nancount:1, test_reccnt:3
carno:27, totallen:250, nancount:0, test_reccnt:3
carno:28, totallen:250, nancount:0, test_reccnt:3
carno:30, totallen:250, nancount:0, test_reccnt:3
a short ts: carid=32，len=174
carno:59,

In [91]:
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq, oracle_mode=MODE_TEST)
save_dataset(f'laptime-oracle-testcurtrack-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:3
carno:4, totallen:241, nancount:9, test_reccnt:3
carno:5, totallen:250, nancount:0, test_reccnt:3
carno:6, totallen:250, nancount:0, test_reccnt:3
carno:9, totallen:250, nancount:0, test_reccnt:3
carno:10, totallen:229, nancount:21, test_reccnt:1
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:3
carno:15, totallen:250, nancount:0, test_reccnt:3
carno:18, totallen:249, nancount:1, test_reccnt:3
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:3
carno:21, totallen:249, nancount:1, test_reccnt:3
carno:22, totallen:250, nancount:0, test_reccnt:3
carno:23, totallen:248, nancount:2, test_reccnt:3
carno:26, totallen:249, nancount:1, test_reccnt:3
carno:27, totallen:250, nancount:0, test_reccnt:3
carno:28, totallen:250, nancount:0, test_reccnt:3
carno:30, totallen:250, nancount:0, test_reccnt:3
a short ts: carid=32，len=174
carno:59,

#### rank dataset

In [92]:
#run on indy dataset
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq,run_ts=COL_RANK)
save_dataset(f'rank-oracle-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq, oracle_mode=MODE_NOLAP,run_ts=COL_RANK)
save_dataset(f'rank-oracle-nolap-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq, oracle_mode=MODE_NOTRACK,run_ts=COL_RANK)
save_dataset(f'rank-oracle-notrack-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)
train_ds, test_ds,_,_ = make_dataset(-1, prediction_length,freq, oracle_mode=MODE_TEST,run_ts=COL_RANK)
save_dataset(f'rank-oracle-testcurtrack-f{freq}-t{prediction_length}-gluonts-indy-2018.pickle'
             , freq, prediction_length, cardinality,train_ds, test_ds)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:3
carno:4, totallen:241, nancount:9, test_reccnt:3
carno:5, totallen:250, nancount:0, test_reccnt:3
carno:6, totallen:250, nancount:0, test_reccnt:3
carno:9, totallen:250, nancount:0, test_reccnt:3
carno:10, totallen:229, nancount:21, test_reccnt:1
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:3
carno:15, totallen:250, nancount:0, test_reccnt:3
carno:18, totallen:249, nancount:1, test_reccnt:3
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:3
carno:21, totallen:249, nancount:1, test_reccnt:3
carno:22, totallen:250, nancount:0, test_reccnt:3
carno:23, totallen:248, nancount:2, test_reccnt:3
carno:26, totallen:249, nancount:1, test_reccnt:3
carno:27, totallen:250, nancount:0, test_reccnt:3
carno:28, totallen:250, nancount:0, test_reccnt:3
carno:30, totallen:250, nancount:0, test_reccnt:3
a short ts: carid=32，len=174
carno:59,

### test

In [42]:
def test_a(a, bitflag):
    return (a & bitflag) ==  bitflag

test_a(5, 4)


True

In [45]:
test_a(3, 2)

True