## timediff2Rank for event model

basedon: 11.OracleRank/laptime_rank_timediff_dataset-oracle.ipynb

rank prediction by timediff forecasting models

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import mxnet as mx
from mxnet import gluon
import pickle
import json
from scipy import stats
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
from pathlib import Path
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.deep_factor import DeepFactorEstimator
from gluonts.model.deepstate import DeepStateEstimator
from gluonts.trainer import Trainer
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import Evaluator, MultivariateEvaluator
from gluonts.distribution.multivariate_gaussian import MultivariateGaussianOutput
from gluonts.model.predictor import Predictor
from gluonts.model.prophet import ProphetPredictor
from gluonts.model.r_forecast import RForecastPredictor
from indycar.model.NaivePredictor import NaivePredictor
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

INFO:root:Using GPU
INFO:root:Using GPU
INFO:root:Using GPU
INFO:root:Using GPU
ERROR:fbprophet:Importing plotly failed. Interactive plots will not work.


In [2]:
import os
os.getcwd()
GPUID = 1

In [3]:
def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    """

    return np.isnan(y), lambda z: z.nonzero()[0]

def test_flag(a, bitflag):
    return (a & bitflag) ==  bitflag

#
# remove NaN at the tail
# there should be no nans in the middle of the ts
COL_LAPTIME=0
COL_RANK=1
COL_TRACKSTATUS=2
COL_LAPSTATUS=3
COL_TIMEDIFF=4
MODE_ORACLE = 0
MODE_NOLAP = 1
MODE_NOTRACK = 2
MODE_TESTZERO = 4
MODE_TESTCURTRACK = 8
#MODE_STR={MODE_ORACLE:'oracle', MODE_NOLAP:'nolap',MODE_NOTRACK:'notrack',MODE_TEST:'test'}

def make_dataset(runs, prediction_length, freq, 
                       useeid = False,
                       run_ts=COL_LAPTIME, 
                       train_ratio = 0.8,
                       use_global_dict = True,
                       oracle_mode = MODE_ORACLE,
                       test_cars = [],
                       half_moving_win = True 
                ):
    """
    split the ts to train and test part by the ratio
    
    oracle_mode: false to simulate prediction in real by 
        set the covariates of track and lap status as nan in the testset
            
    
    """    
    start = pd.Timestamp("01-01-2019", freq=freq)  # can be different for each time series

    train_set = []
    test_set = []
    
    #select run
    if runs>=0:
        _laptime_data = [laptime_data[runs].copy()]
    else:
        _laptime_data = laptime_data.copy()
    
   
    #_data: eventid, carids, datalist[carnumbers, features, lapnumber]->[laptime, rank, track, lap]]
    for _data in _laptime_data:
        _train = []
        _test = []
        
        #statistics on the ts length
        ts_len = [ _entry.shape[1] for _entry in _data[2]]
        train_len = int(np.max(ts_len) * train_ratio)
        
        print(f'====event:{events[_data[0]]}, train_len={train_len}, max_len={np.max(ts_len)}, min_len={np.min(ts_len)}')
                
        # process for each ts
        for rowid in range(_data[2].shape[0]):
            # rec[features, lapnumber] -> [laptime, rank, track_status, lap_status,timediff]]
            rec = _data[2][rowid].copy()
            
            #remove nan(only tails)
            nans, x= nan_helper(rec[run_ts,:])
            nan_count = np.sum(nans)             
            rec = rec[:, ~np.isnan(rec[run_ts,:])]
            
            # remove short ts
            totallen = rec.shape[1]
            if ( totallen < train_len + prediction_length):
                print(f'a short ts: carid={_data[1][rowid]}，len={totallen}')
                continue                
            
            if use_global_dict:
                carno = _data[1][rowid]
                carid = global_carids[_data[1][rowid]]
            else:
                #simulation dataset, todo, fix the carids as decoder
                carno = rowid
                carid = rowid

            #eval on carids
            if test_cars and (carno not in test_cars):
                continue                
            
            if useeid:
                static_cat = [carid, _data[0]]    
            else:
                static_cat = [carid]    
                
            # selection of features
            if test_flag(oracle_mode, MODE_NOTRACK):                
                rec[COL_TRACKSTATUS, :] = 0
            if test_flag(oracle_mode, MODE_NOLAP):                
                rec[COL_LAPSTATUS, :] = 0
                
            # split and add to dataset record
            _train.append({'target': rec[run_ts,:train_len].astype(np.float32), 
                            'start': start, 
                            'feat_static_cat': static_cat,
                            'feat_dynamic_real': [rec[COL_TRACKSTATUS,:train_len],
                                   rec[COL_LAPSTATUS,:train_len]]
                          }
                          )
            
            # multiple test ts(rolling window as half of the prediction_length)
            test_rec_cnt = 0
            step = -int(prediction_length/2) if half_moving_win else -prediction_length
            for endpos in range(totallen, train_len+prediction_length, step):
                
                track_rec = rec[COL_TRACKSTATUS, :endpos].copy()
                lap_rec = rec[COL_LAPSTATUS, :endpos].copy()
                
                # test mode
                if test_flag(oracle_mode, MODE_TESTCURTRACK):
                    # since nan does not work, use cur-val instead
                    track_rec[-prediction_length:] = track_rec[-prediction_length - 1]
                    #track_rec[-prediction_length:] = random.randint(0,1)
                    #lap_rec[-prediction_length:] = lap_rec[-prediction_length - 1]
                    lap_rec[-prediction_length:] = 0
                elif test_flag(oracle_mode, MODE_TESTZERO):
                    #set prediction part as nan
                    #track_rec[-prediction_length:] = np.nan
                    #lap_rec[-prediction_length:] = np.nan
                    track_rec[-prediction_length:] = 0
                    lap_rec[-prediction_length:] = 0                    
                
                _test.append({'target': rec[run_ts,:endpos].astype(np.float32), 
                            'start': start, 
                            'feat_static_cat': static_cat,
                            'feat_dynamic_real': [track_rec,lap_rec]
                            #'feat_dynamic_real': [rec[COL_TRACKSTATUS,:endpos],
                            #       rec[COL_LAPSTATUS,:endpos]] 
                             }
                          )   
                test_rec_cnt += 1
            
            #add one ts
            print(f'carno:{carno}, totallen:{totallen}, nancount:{nan_count}, test_reccnt:{test_rec_cnt}')

        train_set.extend(_train)
        test_set.extend(_test)

    print(f'prediction_length:{prediction_length},train len:{len(train_set)}, test len:{len(test_set)}')
    
    train_ds = ListDataset(train_set, freq=freq)
    test_ds = ListDataset(test_set, freq=freq)    
    
    return train_ds, test_ds, train_set, test_set


def make_dataset_byevent(runs, prediction_length, freq, 
                       useeid = False,
                       run_ts=COL_LAPTIME, 
                       test_event = 'Indy500',
                       test_cars = [],  
                       use_global_dict = True,
                       oracle_mode = MODE_ORACLE,
                       half_moving_win = True,
                       train_ratio=0.8
                ):
    """
    split the ts to train and test part by the ratio
    
    oracle_mode: false to simulate prediction in real by 
        set the covariates of track and lap status as nan in the testset
            
    
    """    
    start = pd.Timestamp("01-01-2019", freq=freq)  # can be different for each time series

    train_set = []
    test_set = []
    
    #select run
    if runs>=0:
        _laptime_data = [laptime_data[runs].copy()]
    else:
        _laptime_data = laptime_data.copy()
    
   
    #_data: eventid, carids, datalist[carnumbers, features, lapnumber]->[laptime, rank, track, lap]]
    for _data in _laptime_data:
        _train = []
        _test = []
        
        if events[_data[0]] == test_event:
            test_mode = True
        
        else:
            test_mode = False
            
            
        #statistics on the ts length
        ts_len = [ _entry.shape[1] for _entry in _data[2]]
        max_len = int(np.max(ts_len))
        train_len = int(np.max(ts_len) * train_ratio)
        
        
        print(f'====event:{events[_data[0]]}, train_len={train_len}, max_len={np.max(ts_len)}, min_len={np.min(ts_len)}')
                
        # process for each ts
        for rowid in range(_data[2].shape[0]):
            # rec[features, lapnumber] -> [laptime, rank, track_status, lap_status,timediff]]
            rec = _data[2][rowid].copy()
            
            #remove nan(only tails)
            nans, x= nan_helper(rec[run_ts,:])
            nan_count = np.sum(nans)             
            rec = rec[:, ~np.isnan(rec[run_ts,:])]
            
            # remove short ts
            totallen = rec.shape[1]
            if ( totallen < train_len + prediction_length):
                print(f'a short ts: carid={_data[1][rowid]}，len={totallen}')
                continue                
            
            if use_global_dict:
                carno = _data[1][rowid]
                carid = global_carids[_data[1][rowid]]
            else:
                #simulation dataset, todo, fix the carids as decoder
                carno = rowid
                carid = rowid
                
            
            if useeid:
                static_cat = [carid, _data[0]]    
            else:
                static_cat = [carid]    
                
            # selection of features
            if test_flag(oracle_mode, MODE_NOTRACK):                
                rec[COL_TRACKSTATUS, :] = 0
            if test_flag(oracle_mode, MODE_NOLAP):                
                rec[COL_LAPSTATUS, :] = 0

            test_rec_cnt = 0
            if not test_mode:
                
                # all go to train set
                _train.append({'target': rec[run_ts,:].astype(np.float32), 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [rec[COL_TRACKSTATUS,:],
                                       rec[COL_LAPSTATUS,:]]
                              }
                              )
            else:
                # reset train_len
                context_len = prediction_length*2
                if context_len < 10:
                    context_len = 10
                
                
                # multiple test ts(rolling window as half of the prediction_length)

                step = -int(prediction_length/2) if half_moving_win else -prediction_length
                
                #bug fix, fixed the split point for all cars/ts
                for endpos in range(max_len, context_len+prediction_length, 
                                    step):

                    #check if enough for this ts
                    if endpos > totallen:
                        continue
                    
                    track_rec = rec[COL_TRACKSTATUS, :endpos].copy()
                    lap_rec = rec[COL_LAPSTATUS, :endpos].copy()

                    # test mode
                    if test_flag(oracle_mode, MODE_TESTCURTRACK):
                        # since nan does not work, use cur-val instead
                        track_rec[-prediction_length:] = track_rec[-prediction_length - 1]
                        #track_rec[-prediction_length:] = random.randint(0,1)
                        #lap_rec[-prediction_length:] = lap_rec[-prediction_length - 1]
                        lap_rec[-prediction_length:] = 0
                    elif test_flag(oracle_mode, MODE_TESTZERO):
                        #set prediction part as nan
                        #track_rec[-prediction_length:] = np.nan
                        #lap_rec[-prediction_length:] = np.nan
                        track_rec[-prediction_length:] = 0
                        lap_rec[-prediction_length:] = 0                    

                    _test.append({'target': rec[run_ts,:endpos].astype(np.float32), 
                                'start': start, 
                                'feat_static_cat': static_cat,
                                'feat_dynamic_real': [track_rec,lap_rec]
                                #'feat_dynamic_real': [rec[COL_TRACKSTATUS,:endpos],
                                #       rec[COL_LAPSTATUS,:endpos]] 
                                 }
                              )   
                    test_rec_cnt += 1
            
            #add one ts
            print(f'carno:{carno}, totallen:{totallen}, nancount:{nan_count}, test_reccnt:{test_rec_cnt}')

        train_set.extend(_train)
        test_set.extend(_test)

    print(f'train len:{len(train_set)}, test len:{len(test_set)}')
    
    train_ds = ListDataset(train_set, freq=freq)
    test_ds = ListDataset(test_set, freq=freq)    
    
    return train_ds, test_ds, train_set, test_set

def save_dataset(datafile,freq, prediction_length, cardinality, train_ds, test_ds):
    with open(datafile, 'wb') as f:
        #pack [global_carids, laptime_data]
        savedata = [freq, prediction_length, cardinality, train_ds, test_ds]
        #savedata = [freq, train_set, test_set]
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)        

### test for Indy500

In [4]:
def predict(test_ds,predictor):
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )

    forecasts = list(forecast_it)
    tss = list(ts_it)
    print(f'tss len={len(tss)}, forecasts len={len(forecasts)}')
    
    return tss, forecasts

def run_prediction(test_ds, prediction_length):
    with mx.Context(mx.gpu(1)):    
        pred_ret = {}

        rootdir = '../models/remote/timediff-indy500/'
        # deepAR-Oracle
        model_name = 'deepAR-Oracle-curtrack'
        model=f'deepAR-Oracle-timediff-curtrack-indy-f1min-t{prediction_length}-e1000-r1_curtrack_t{prediction_length}'
        modeldir = rootdir + model
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  Predictor.deserialize(Path(modeldir))
        print(f'loading model...done!, ctx:{predictor.ctx}')
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # deepAR-Oracle
        model_name = 'deepAR-Oracle'
        model=f'deepAR-Oracle-timediff-all-indy-f1min-t{prediction_length}-e1000-r1_oracle_t{prediction_length}'
        modeldir = rootdir + model
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  Predictor.deserialize(Path(modeldir))
        print(f'loading model...done!, ctx:{predictor.ctx}')
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # deepAR
        model_name = 'deepAR'
        model=f'deepAR-timediff-all-indy-f1min-t{prediction_length}-e1000-r1_deepar_t{prediction_length}'
        modeldir = rootdir + model
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  Predictor.deserialize(Path(modeldir))
        print(f'loading model...done!, ctx:{predictor.ctx}')
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # naive
        model_name = 'naive'
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  NaivePredictor(freq= freq, prediction_length = prediction_length)
        tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        # arima
        model_name = 'arima'
        print(f'predicting model={model_name}, plen={prediction_length}')
        predictor =  RForecastPredictor(method_name='arima',freq= freq, 
                                        prediction_length = prediction_length,trunc_length=60)
        #tss, forecasts = predict(test_ds,predictor)
        pred_ret[model_name] = [tss, forecasts]

        return pred_ret

In [5]:
#calc rank
def eval_rank_bytimediff(test_ds,tss,forecasts,prediction_length):
    """
    timediff models
    
    works for one event only
    
    """

    carlist = []

    # carno-lap# -> elapsed_time[] array
    forecasts_et = dict()

    ds_iter =  iter(test_ds)
    for idx in range(len(test_ds)):
        test_rec = next(ds_iter)
        #global carid
        carno = decode_carids[test_rec['feat_static_cat'][0]]
        #print('car no:', carno)

        if carno not in carlist:
            carlist.append(carno)

        # calc elapsed time
        prediction_len = forecasts[idx].samples.shape[1]
        if prediction_length != prediction_len:
            print('error: prediction_len does not match, {prediction_length}:{prediction_len}')
            return []
        
        #forecast_laptime_mean = np.mean(forecasts[idx].samples, axis=0).reshape((prediction_len,1))
        forecast_laptime_mean = np.median(forecasts[idx].samples, axis=0).reshape((prediction_len,1))
        
        timediff_array = tss[idx].values.copy()

        #save the prediction
        completed_laps = len(tss[idx]) - prediction_len + 1
        #print('car no:', carno, 'completed_laps:', completed_laps)
        #key = '%s-%s'%(carno, completed_laps)
        #forecasts_et[key] = elapsed_time[-prediction_len:].copy()
        if completed_laps not in forecasts_et:
            forecasts_et[completed_laps] = {}
        forecasts_et[completed_laps][carno] = [timediff_array[-prediction_len:].copy(),
                                                   forecast_laptime_mean.copy()]


    # calc rank
    rank_ret = []
    for lap in forecasts_et.keys():
        #get car list for this lap
        carlist = list(forecasts_et[lap].keys())
        #print('carlist:', carlist)

        caridmap={key:idx for idx, key in enumerate(carlist)}

        #fill in data
        time_diff = np.zeros((2, len(carlist), prediction_len))
        for carno in carlist:
            carid = caridmap[carno]
            time_diff[0, carid, :] = forecasts_et[lap][carno][0].reshape((prediction_len))
            time_diff[1, carid, :] = forecasts_et[lap][carno][1].reshape((prediction_len))

        #calculate rank    
        idx = np.argsort(time_diff[0], axis=0)
        true_rank = np.argsort(idx, axis=0)

        idx = np.argsort(time_diff[1], axis=0)
        pred_rank = np.argsort(idx, axis=0)

        rank_ret.append([lap, time_diff, true_rank, pred_rank])
        
    return rank_ret,forecasts_et
    
   
   
def get_acc(rank_ret,prediction_length):    
    # evaluate
    #top1 accuracy
    top1acc = 0
    top1acc_farmost = 0
    top5acc = 0
    top5acc_farmost = 0
    tau = 0
    rmse = 0.
    
    for rec in rank_ret:
        trueRank = rec[2]
        predRank = rec[3]

        #top1 , rank = 0, first col is not prediction
        top1acc += np.sum((trueRank==0) & (predRank==0)) 
        
        top1acc_farmost += np.sum((trueRank[:,-1]==0) & (predRank[:,-1]==0))
        
        #top5
        top5acc += np.sum((trueRank<5) & (predRank<5)) 
        
        top5acc_farmost += np.sum((trueRank[:,-1]<5) & (predRank[:,-1]<5))
        
        # tau
        tao, _ = stats.kendalltau(trueRank, predRank)
        tau += tao
        
        #rmse
        rmse += mean_squared_error(predRank,trueRank)
        

    print(f'total:{len(rank_ret)}, prediction_length:{prediction_length}') 
    print('top1acc=', top1acc *1.0/ (len(rank_ret)*prediction_length),
          'top1acc_farmost=', top1acc_farmost *1.0/ (len(rank_ret)),
          'top5acc=', top5acc *1.0/ (5*len(rank_ret)*prediction_length),
          'top5acc_farmost=', top5acc_farmost *1.0/ (5*len(rank_ret)),
         )
    print('tau = ', tau/len(rank_ret),
         'rmse = ', rmse/len(rank_ret))
    
def get_top1acc_farmost(rank_ret,prediction_len):    
    # evaluate
    #top1 accuracy
    hitcnt = 0
    for rec in rank_ret:
        trueRank = rec[2]
        predRank = rec[3]

        #top1 , rank = 0, first col is not prediction
        hitcnt += np.sum((trueRank[:,-1]==0) & (predRank[:,-1]==0)) 

    print('total:', hitcnt, 'top1acc_farmost=', hitcnt *1.0/ (len(rank_ret)*prediction_length))    



In [6]:
def run_exp(prediction_length, half_moving_win):
    ### create test dataset
    test_cars = []
    train_ds, test_ds,_,_ = make_dataset_byevent(-1, prediction_length,freq, 
                                         oracle_mode=MODE_TESTCURTRACK,
                                         run_ts = COL_TIMEDIFF,
                                         test_cars=test_cars,
                                         half_moving_win= half_moving_win)
    pred_ret = run_prediction(test_ds, prediction_length)

    models = ['deepAR-Oracle','deepAR-Oracle-curtrack','deepAR','naive','arima']
    for model in models:
        print('model:', model)
        tss, forecasts = pred_ret[model]

        rank_ret,_ = eval_rank_bytimediff(test_ds,tss,forecasts,prediction_length)
        get_acc(rank_ret,prediction_length)
        
    return pred_ret, test_ds

### init

In [7]:
#
# parameters
#
#year = '2017'
year = '2018'
#event = 'Toronto'
#https://www.racing-reference.info/season-stats/2018/O/#
events_totalmiles=[256,500,372,268,500,310]
events_laplen = [1.022,2.5,1.5,0.894,2.5,1.25]
events = ['Phoenix','Indy500','Texas','Iowa','Pocono','Gateway']
#events = ['Gateway']

#events = ['Indy500']
#events = ['Phoenix']
events_id={key:idx for idx, key in enumerate(events)}

In [8]:
# start from here
import pickle
with open('laptime_rank_timediff-oracle-%s.pickle'%year, 'rb') as f:
    # The protocol version used is detected automatically, so we do not
    # have to specify it.
    global_carids, laptime_data = pickle.load(f, encoding='latin1')

In [9]:
freq = "1min"
#decode global_carids
decode_carids={carid:carno for carno, carid in global_carids.items()}
    
#useeid = False
#interpolate = False
#ipstr = '-ip' if interpolate else '-noip'
#ipstr = '%s-%s'%('ip' if interpolate else 'noip', 'eid' if useeid else 'noeid')
#if useeid:
#    cardinality = [len(global_carids), len(laptime_data)]
#else:
#    cardinality = [len(global_carids)]

#### test 2

In [10]:
# test half moving win
pred_ret2_nohalfwin, test_ds2 = run_exp(2, False)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:0
carno:4, totallen:241, nancount:9, test_reccnt:0
carno:5, totallen:250, nancount:0, test_reccnt:0
carno:6, totallen:250, nancount:0, test_reccnt:0
carno:9, totallen:250, nancount:0, test_reccnt:0
carno:10, totallen:229, nancount:21, test_reccnt:0
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:0
carno:15, totallen:250, nancount:0, test_reccnt:0
carno:18, totallen:249, nancount:1, test_reccnt:0
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:0
carno:21, totallen:249, nancount:1, test_reccnt:0
carno:22, totallen:250, nancount:0, test_reccnt:0
carno:23, totallen:248, nancount:2, test_reccnt:0
carno:26, totallen:249, nancount:1, test_reccnt:0
carno:27, totallen:250, nancount:0, test_reccnt:0
carno:28, totallen:250, nancount:0, test_reccnt:0
carno:30, totallen:250, nancount:0, test_reccnt:0
a short ts: carid=32，len=174
carno:59,

INFO:root:Using GPU


predicting model=deepAR-Oracle-curtrack, plen=2
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=2334, forecasts len=2334
predicting model=deepAR-Oracle, plen=2
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=2334, forecasts len=2334
predicting model=deepAR, plen=2
loading model...done!, ctx:gpu(0)
tss len=2334, forecasts len=2334
predicting model=naive, plen=2


  start_date=start + target_len,


tss len=2334, forecasts len=2334
predicting model=arima, plen=2


  method            from
  as.zoo.data.frame zoo 



model: deepAR-Oracle
total:94, prediction_length:2
top1acc= 0.7180851063829787 top1acc_farmost= 0.32978723404255317 top5acc= 0.8840425531914894 top5acc_farmost= 0.4223404255319149
tau =  0.8889987333200321
model: deepAR-Oracle-curtrack
total:94, prediction_length:2
top1acc= 0.75 top1acc_farmost= 0.3351063829787234 top5acc= 0.8840425531914894 top5acc_farmost= 0.41914893617021276
tau =  0.8904824899152046
model: deepAR
total:94, prediction_length:2
top1acc= 0.7446808510638298 top1acc_farmost= 0.3404255319148936 top5acc= 0.8957446808510638 top5acc_farmost= 0.4276595744680851
tau =  0.893190827714485
model: naive
total:94, prediction_length:2
top1acc= 0.7925531914893617 top1acc_farmost= 0.3723404255319149 top5acc= 0.9106382978723404 top5acc_farmost= 0.43829787234042555
tau =  0.9140170086620133
model: arima
total:94, prediction_length:2
top1acc= 0.7925531914893617 top1acc_farmost= 0.3723404255319149 top5acc= 0.9106382978723404 top5acc_farmost= 0.43829787234042555
tau =  0.9140170086620133


In [11]:
pred_ret2_halfwin, test_ds2h = run_exp(2, True)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:0
carno:4, totallen:241, nancount:9, test_reccnt:0
carno:5, totallen:250, nancount:0, test_reccnt:0
carno:6, totallen:250, nancount:0, test_reccnt:0
carno:9, totallen:250, nancount:0, test_reccnt:0
carno:10, totallen:229, nancount:21, test_reccnt:0
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:0
carno:15, totallen:250, nancount:0, test_reccnt:0
carno:18, totallen:249, nancount:1, test_reccnt:0
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:0
carno:21, totallen:249, nancount:1, test_reccnt:0
carno:22, totallen:250, nancount:0, test_reccnt:0
carno:23, totallen:248, nancount:2, test_reccnt:0
carno:26, totallen:249, nancount:1, test_reccnt:0
carno:27, totallen:250, nancount:0, test_reccnt:0
carno:28, totallen:250, nancount:0, test_reccnt:0
carno:30, totallen:250, nancount:0, test_reccnt:0
a short ts: carid=32，len=174
carno:59,

INFO:root:Using GPU


predicting model=deepAR-Oracle-curtrack, plen=2
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=4673, forecasts len=4673
predicting model=deepAR-Oracle, plen=2
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=4673, forecasts len=4673
predicting model=deepAR, plen=2
loading model...done!, ctx:gpu(0)
tss len=4673, forecasts len=4673
predicting model=naive, plen=2


  start_date=start + target_len,


tss len=4673, forecasts len=4673
predicting model=arima, plen=2
model: deepAR-Oracle
total:188, prediction_length:2
top1acc= 0.723404255319149 top1acc_farmost= 0.324468085106383 top5acc= 0.875 top5acc_farmost= 0.42021276595744683
tau =  0.8802275774913447
model: deepAR-Oracle-curtrack
total:188, prediction_length:2
top1acc= 0.7526595744680851 top1acc_farmost= 0.34308510638297873 top5acc= 0.8797872340425532 top5acc_farmost= 0.4207446808510638
tau =  0.8835978227105048
model: deepAR
total:188, prediction_length:2
top1acc= 0.7446808510638298 top1acc_farmost= 0.34308510638297873 top5acc= 0.8888297872340426 top5acc_farmost= 0.42925531914893617
tau =  0.8898004845562255
model: naive
total:188, prediction_length:2
top1acc= 0.7872340425531915 top1acc_farmost= 0.36436170212765956 top5acc= 0.9005319148936171 top5acc_farmost= 0.43563829787234043
tau =  0.9117318845585499
model: arima
total:188, prediction_length:2
top1acc= 0.7872340425531915 top1acc_farmost= 0.36436170212765956 top5acc= 0.9005319

In [12]:
# test half moving win
pred_ret5_halfwin, test_ds5h = run_exp(5, True)

====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:0
carno:4, totallen:241, nancount:9, test_reccnt:0
carno:5, totallen:250, nancount:0, test_reccnt:0
carno:6, totallen:250, nancount:0, test_reccnt:0
carno:9, totallen:250, nancount:0, test_reccnt:0
carno:10, totallen:229, nancount:21, test_reccnt:0
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:0
carno:15, totallen:250, nancount:0, test_reccnt:0
carno:18, totallen:249, nancount:1, test_reccnt:0
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:0
carno:21, totallen:249, nancount:1, test_reccnt:0
carno:22, totallen:250, nancount:0, test_reccnt:0
carno:23, totallen:248, nancount:2, test_reccnt:0
carno:26, totallen:249, nancount:1, test_reccnt:0
carno:27, totallen:250, nancount:0, test_reccnt:0
carno:28, totallen:250, nancount:0, test_reccnt:0
carno:30, totallen:250, nancount:0, test_reccnt:0
a short ts: carid=32，len=174
carno:59,

INFO:root:Using GPU


predicting model=deepAR-Oracle-curtrack, plen=5
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=2309, forecasts len=2309
predicting model=deepAR-Oracle, plen=5
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=2309, forecasts len=2309
predicting model=deepAR, plen=5
loading model...done!, ctx:gpu(0)
tss len=2309, forecasts len=2309
predicting model=naive, plen=5
tss len=2309, forecasts len=2309
predicting model=arima, plen=5
model: deepAR-Oracle
total:93, prediction_length:5
top1acc= 0.6 top1acc_farmost= 0.09247311827956989 top5acc= 0.813763440860215 top5acc_farmost= 0.1449462365591398
tau =  0.8081554513870658
model: deepAR-Oracle-curtrack
total:93, prediction_length:5
top1acc= 0.6043010752688172 top1acc_farmost= 0.09247311827956989 top5acc= 0.8103225806451613 top5acc_farmost= 0.1432258064516129
tau =  0.8074000835014611
model: deepAR
total:93, prediction_length:5
top1acc= 0.5978494623655914 top1acc_farmost= 0.1010752688172043 top5acc= 0.8055913978494623 top5acc_farmost= 0.14451612903225808
tau =  0.8043145925053686
model: naive
total:93, prediction_length:5
top1acc= 0.6752688172043011 top1acc_farmost= 0.11182795698924732 top5acc= 0.821505376344086 top5acc_farmost= 0.146666666666666

In [13]:
# test half moving win
pred_ret5_nohalfwin, test_ds5 = run_exp(5, False)

INFO:root:Using GPU


====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:0
carno:4, totallen:241, nancount:9, test_reccnt:0
carno:5, totallen:250, nancount:0, test_reccnt:0
carno:6, totallen:250, nancount:0, test_reccnt:0
carno:9, totallen:250, nancount:0, test_reccnt:0
carno:10, totallen:229, nancount:21, test_reccnt:0
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:0
carno:15, totallen:250, nancount:0, test_reccnt:0
carno:18, totallen:249, nancount:1, test_reccnt:0
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:0
carno:21, totallen:249, nancount:1, test_reccnt:0
carno:22, totallen:250, nancount:0, test_reccnt:0
carno:23, totallen:248, nancount:2, test_reccnt:0
carno:26, totallen:249, nancount:1, test_reccnt:0
carno:27, totallen:250, nancount:0, test_reccnt:0
carno:28, totallen:250, nancount:0, test_reccnt:0
carno:30, totallen:250, nancount:0, test_reccnt:0
a short ts: carid=32，len=174
carno:59,

INFO:root:Using GPU


tss len=915, forecasts len=915
predicting model=deepAR-Oracle, plen=5
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=915, forecasts len=915
predicting model=deepAR, plen=5
loading model...done!, ctx:gpu(0)
tss len=915, forecasts len=915
predicting model=naive, plen=5
tss len=915, forecasts len=915
predicting model=arima, plen=5
model: deepAR-Oracle
total:37, prediction_length:5
top1acc= 0.6054054054054054 top1acc_farmost= 0.0972972972972973 top5acc= 0.8108108108108109 top5acc_farmost= 0.14486486486486486
tau =  0.801372488694228
model: deepAR-Oracle-curtrack
total:37, prediction_length:5
top1acc= 0.5567567567567567 top1acc_farmost= 0.08648648648648649 top5acc= 0.7978378378378378 top5acc_farmost= 0.14054054054054055
tau =  0.8000436144119011
model: deepAR
total:37, prediction_length:5
top1acc= 0.5945945945945946 top1acc_farmost= 0.10270270270270271 top5acc= 0.8054054054054054 top5acc_farmost= 0.14702702702702702
tau =  0.8047120849750005
model: naive
total:37, prediction_length:5
top1acc= 0.6378378378378379 top1acc_farmost= 0.11891891891891893 top5acc= 0.8140540540540541 top5acc_farmost= 0.148

In [14]:
pred_ret10_halfwin, test_ds10 = run_exp(10, True)

INFO:root:Using GPU


====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:0
carno:4, totallen:241, nancount:9, test_reccnt:0
carno:5, totallen:250, nancount:0, test_reccnt:0
carno:6, totallen:250, nancount:0, test_reccnt:0
carno:9, totallen:250, nancount:0, test_reccnt:0
carno:10, totallen:229, nancount:21, test_reccnt:0
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:0
carno:15, totallen:250, nancount:0, test_reccnt:0
carno:18, totallen:249, nancount:1, test_reccnt:0
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:0
carno:21, totallen:249, nancount:1, test_reccnt:0
carno:22, totallen:250, nancount:0, test_reccnt:0
carno:23, totallen:248, nancount:2, test_reccnt:0
carno:26, totallen:249, nancount:1, test_reccnt:0
carno:27, totallen:250, nancount:0, test_reccnt:0
carno:28, totallen:250, nancount:0, test_reccnt:0
carno:30, totallen:250, nancount:0, test_reccnt:0
a short ts: carid=32，len=174
carno:59,

INFO:root:Using GPU


tss len=840, forecasts len=840
predicting model=deepAR-Oracle, plen=10
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=840, forecasts len=840
predicting model=deepAR, plen=10
loading model...done!, ctx:gpu(0)
tss len=840, forecasts len=840
predicting model=naive, plen=10
tss len=840, forecasts len=840
predicting model=arima, plen=10
model: deepAR-Oracle
total:34, prediction_length:10
top1acc= 0.538235294117647 top1acc_farmost= 0.041176470588235294 top5acc= 0.7182352941176471 top5acc_farmost= 0.06
tau =  0.6870592548016649
model: deepAR-Oracle-curtrack
total:34, prediction_length:10
top1acc= 0.48823529411764705 top1acc_farmost= 0.03529411764705882 top5acc= 0.7064705882352941 top5acc_farmost= 0.06
tau =  0.6922372704782136
model: deepAR
total:34, prediction_length:10
top1acc= 0.5058823529411764 top1acc_farmost= 0.03529411764705882 top5acc= 0.7094117647058824 top5acc_farmost= 0.0611764705882353
tau =  0.6965878244609476
model: naive
total:34, prediction_length:10
top1acc= 0.5441176470588235 top1acc_farmost= 0.041176470588235294 top5acc= 0.7229411764705882 top5acc_farmost= 0.05941176470588235
tau =

In [15]:
pred_ret10_nohalfwin, test_ds10h = run_exp(10, False)

INFO:root:Using GPU


====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:0
carno:4, totallen:241, nancount:9, test_reccnt:0
carno:5, totallen:250, nancount:0, test_reccnt:0
carno:6, totallen:250, nancount:0, test_reccnt:0
carno:9, totallen:250, nancount:0, test_reccnt:0
carno:10, totallen:229, nancount:21, test_reccnt:0
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:0
carno:15, totallen:250, nancount:0, test_reccnt:0
carno:18, totallen:249, nancount:1, test_reccnt:0
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:0
carno:21, totallen:249, nancount:1, test_reccnt:0
carno:22, totallen:250, nancount:0, test_reccnt:0
carno:23, totallen:248, nancount:2, test_reccnt:0
carno:26, totallen:249, nancount:1, test_reccnt:0
carno:27, totallen:250, nancount:0, test_reccnt:0
carno:28, totallen:250, nancount:0, test_reccnt:0
carno:30, totallen:250, nancount:0, test_reccnt:0
a short ts: carid=32，len=174
carno:59,

INFO:root:Using GPU


tss len=417, forecasts len=417
predicting model=deepAR-Oracle, plen=10
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=417, forecasts len=417
predicting model=deepAR, plen=10
loading model...done!, ctx:gpu(0)
tss len=417, forecasts len=417
predicting model=naive, plen=10
tss len=417, forecasts len=417
predicting model=arima, plen=10
model: deepAR-Oracle
total:17, prediction_length:10
top1acc= 0.5470588235294118 top1acc_farmost= 0.047058823529411764 top5acc= 0.7141176470588235 top5acc_farmost= 0.058823529411764705
tau =  0.6745912040519533
model: deepAR-Oracle-curtrack
total:17, prediction_length:10
top1acc= 0.48823529411764705 top1acc_farmost= 0.041176470588235294 top5acc= 0.7152941176470589 top5acc_farmost= 0.05647058823529412
tau =  0.677916635742106
model: deepAR
total:17, prediction_length:10
top1acc= 0.5 top1acc_farmost= 0.03529411764705882 top5acc= 0.7047058823529412 top5acc_farmost= 0.06
tau =  0.6889694933386825
model: naive
total:17, prediction_length:10
top1acc= 0.5647058823529412 top1acc_farmost= 0.047058823529411764 top5acc= 0.7094117647058824 top5acc_farmost= 0.05764705882352941
ta

### test

In [16]:
plen = 10
pred_ret10_halfwin, test_ds10 = run_exp(10, True)



====event:Phoenix, train_len=200, max_len=250, min_len=250
carno:1, totallen:250, nancount:0, test_reccnt:0
carno:4, totallen:241, nancount:9, test_reccnt:0
carno:5, totallen:250, nancount:0, test_reccnt:0
carno:6, totallen:250, nancount:0, test_reccnt:0
carno:9, totallen:250, nancount:0, test_reccnt:0
carno:10, totallen:229, nancount:21, test_reccnt:0
a short ts: carid=12，len=154
carno:14, totallen:250, nancount:0, test_reccnt:0
carno:15, totallen:250, nancount:0, test_reccnt:0
carno:18, totallen:249, nancount:1, test_reccnt:0
a short ts: carid=19，len=40
carno:20, totallen:250, nancount:0, test_reccnt:0
carno:21, totallen:249, nancount:1, test_reccnt:0
carno:22, totallen:250, nancount:0, test_reccnt:0
carno:23, totallen:248, nancount:2, test_reccnt:0
carno:26, totallen:249, nancount:1, test_reccnt:0
carno:27, totallen:250, nancount:0, test_reccnt:0
carno:28, totallen:250, nancount:0, test_reccnt:0
carno:30, totallen:250, nancount:0, test_reccnt:0
a short ts: carid=32，len=174
carno:59,

INFO:root:Using GPU


loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=840, forecasts len=840
predicting model=deepAR-Oracle, plen=10
loading model...done!, ctx:gpu(0)


INFO:root:Using GPU


tss len=840, forecasts len=840
predicting model=deepAR, plen=10
loading model...done!, ctx:gpu(0)
tss len=840, forecasts len=840
predicting model=naive, plen=10
tss len=840, forecasts len=840
predicting model=arima, plen=10
model: deepAR-Oracle
total:34, prediction_length:10
top1acc= 0.5441176470588235 top1acc_farmost= 0.03823529411764706 top5acc= 0.7147058823529412 top5acc_farmost= 0.060588235294117644
tau =  0.68994002656321
model: deepAR-Oracle-curtrack
total:34, prediction_length:10
top1acc= 0.48823529411764705 top1acc_farmost= 0.03529411764705882 top5acc= 0.7129411764705882 top5acc_farmost= 0.06
tau =  0.6914667990268262
model: deepAR
total:34, prediction_length:10
top1acc= 0.5264705882352941 top1acc_farmost= 0.03823529411764706 top5acc= 0.7041176470588235 top5acc_farmost= 0.05941176470588235
tau =  0.6969266701162369
model: naive
total:34, prediction_length:10
top1acc= 0.5441176470588235 top1acc_farmost= 0.041176470588235294 top5acc= 0.7229411764705882 top5acc_farmost= 0.05941176

In [17]:
model = 'deepAR-Oracle'
tss10, forecasts10 = pred_ret10_halfwin[model]
rank_ret10, fet10 = eval_rank_bytimediff(test_ds10,tss10,forecasts10,plen)

In [18]:
sorted(fet10.keys())

[26,
 31,
 36,
 41,
 46,
 51,
 56,
 61,
 66,
 71,
 76,
 81,
 86,
 91,
 96,
 101,
 106,
 111,
 116,
 121,
 126,
 131,
 136,
 141,
 146,
 151,
 156,
 161,
 166,
 171,
 176,
 181,
 186,
 191]

In [19]:
[rank_ret10[x][1].shape for x in range(0,len(rank_ret10))]

[(2, 18, 10),
 (2, 23, 10),
 (2, 24, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10)]

In [20]:
print(len(rank_ret10))

34


In [21]:
from scipy import stats


In [22]:
model = 'naive'
ntss10, nforecasts10 = pred_ret10_halfwin[model]
rank_ret10_naive,_ = eval_rank_bytimediff(test_ds10,ntss10,nforecasts10,plen)



In [23]:
rank_ret10_naive[0][2].shape

(18, 10)

In [24]:
tau = 0
for idx, rr in enumerate(rank_ret10_naive):
    x1 = rr[2][:,:]
    x2 = rr[3][:,:]
    
    tao, _ = stats.kendalltau(x1, x2)
    print(idx, tao)
    tau += tao

print(len(rank_ret10))
print(tau/len(rank_ret10))

0 0.510718954248366
1 0.9490909090909092
2 0.8902898550724639
3 0.7062666666666667
4 0.46303333333333335
5 0.6076666666666667
6 0.8134
7 0.8136666666666668
8 0.8842333333333333
9 0.8917666666666667
10 0.6408333333333334
11 0.5338333333333334
12 0.44843333333333335
13 0.4428
14 0.6515666666666667
15 0.9184
16 1.0
17 0.5808333333333333
18 0.4141
19 0.45103333333333334
20 0.1024
21 0.5658666666666666
22 0.9646333333333335
23 0.9156333333333333
24 0.7707666666666666
25 0.7121666666666667
26 0.9128333333333334
27 0.8568000000000001
28 0.8597666666666668
29 0.9051333333333335
30 0.9928
31 0.8747333333333334
32 0.6895666666666668
33 0.6854
34
0.7182490113258355


In [25]:
len(test_ds10), len(rank_ret10)

(840, 34)

In [26]:
[rank_ret10[x][1].shape for x in range(0,len(rank_ret10))]

[(2, 18, 10),
 (2, 23, 10),
 (2, 24, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10),
 (2, 25, 10)]

In [27]:
rank_ret2[15][3]

NameError: name 'rank_ret2' is not defined

In [None]:
tau = 0
for rr in rank_ret:
    x1 = rr[2][:,:]
    x2 = rr[3][:,:]
    
    tao, _ = stats.kendalltau(x1, x2)
    print(tao)
    tau += tao

print(len(rank_ret))
print(tau/len(rank_ret))

In [None]:
forecasts[0].samples.shape

In [None]:
next(iter(test_ds))