### stage_model_regressor

predicting target: end_rank = start_rank + change(old target)

base: 14./stage_model_regressor_withneighbor-newfeatures

prediction models of chg_of_rank_in_stage on stage dataset

data format:
    target , eventid ,    car_number,    stageid,     features...

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import math


In [2]:
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble.forest import RandomForestRegressor
from sklearn.linear_model.ridge import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model.stochastic_gradient import SGDRegressor
from sklearn.svm.classes import SVR
from sklearn.utils import shuffle
from sklearn import metrics
import xgboost as xgb



In [3]:
# bulid regression model
regressors = ['currank','avgrank','dice','lasso','ridge','rf','svr','xgb']
train_x, train_y, test_x, test_y = None, None, None, None

def get_regressor(regressor = 'lr'):
    if regressor == "lasso":
        clf = LassoCV(cv=5, random_state=0)
    elif regressor == "ridge":
        clf = RidgeCV(alphas=np.logspace(-6, 6, 13))
    elif regressor == "rf":
        clf = RandomForestRegressor(n_estimators=100)
    elif regressor == 'svr':
        clf = SVR(kernel='rbf')
    elif regressor == 'xgb':
        clf = xgb.XGBRegressor(objective="reg:linear", random_state=42, max_depth=3)
    elif regressor == 'dice':
        clf = RandomDice('1234')
    elif regressor == 'currank':
        clf = CurRank()
    elif regressor == 'avgrank':
        clf = AverageRank()        
    else:
        clf = None
        
    return clf


class CurRank():
    """
    predict with current rank
    """
    def __init__(self):
        pass
    def fit(self, x, y):
        pass
    def predict(self, test_x):
        pred_y = [0 for x in range(test_x.shape[0])]
        return np.array(pred_y)
    
class AverageRank():
    """
    print('[*] predict with average rankchg (change_in_rank_all):idx = 15')
    change_in_rank_all = test[:,15]
    pred_y_avg = np.array([1 if x > 0 else (-1 if x < 0 else 0) for x in change_in_rank_all])
    """
    def __init__(self):
        pass
    def fit(self, x, y):
        pass
    def predict(self, test_x):
        pred_y = []
        for x in test_x:
            #13, change_in_rank_all
            pred_y.append(x[13])
        #pred_y_avg = np.array([1 if x > 0 else (-1 if x < 0 else 0) for x in pred_y])
        pred_y_avg = pred_y
        return np.array(pred_y_avg)   

class RandomDice():
    """
    a random dice model
    """
    def __init__(self, seed='1234'):
        self.dist = []
        self.val = []
        random.seed(seed)
    
    def fit(self, x, y):
        total = y.shape[0]
        yval = set(y)
        
        ratio = 0.
        for val in yval:
            self.val.append(val)
            ratio += np.sum(y==val)*1.0 / total
            self.dist.append(ratio)
            
    def predict(self, test_x):
        pred_y = []
        for x in test_x:
            dice = random.random()
            #search in self.dist
            find_idx = -1
            for idx, ratio in enumerate(self.dist):
                if dice <= ratio:
                    find_idx = idx
                    break
            
            #or the last one match
            pred_y.append(self.val[find_idx])
            
        return np.array(pred_y)

def evaluate(test_y, pred_y):
    mae = metrics.mean_absolute_error(test_y, pred_y) 
    rmse = math.sqrt(metrics.mean_squared_error(test_y, pred_y))
    r2 = metrics.r2_score(test_y, pred_y)
    print('rmse=%.2f, mae=%.2f, r2=%.2f'%(rmse, mae, r2))
    return mae,rmse, r2
    
#
#features
#    cols=[Myidx, 'target','eventid','car_number','stageid',
#             'firststage','pit_in_caution','start_position',
#             'start_rank','start_rank_ratio','top_pack','bottom_pack',
#             'average_rank','average_rank_all',
#             'change_in_rank','change_in_rank_all','rate_of_change','rate_of_change_all']    
def split_by_eventid(stagedata, eventid):
    """
    split by eventid
    """
    #if not eventid in stagedata:
    #    print('error, %d not found in stagedata'%eventid)
    #    return
    
    train = stagedata[stagedata['eventid'] != eventid].to_numpy()
    test  = stagedata[stagedata['eventid'] == eventid].to_numpy()

    #2:car_number
    train_x = train[:,2:]
    #train_y = np.array([1 if x > 0 else (-1 if x < 0 else 0) for x in train[:,1]])
    train_y = train[:,1]
    test_x = test[:,2:]
    #test_y = np.array([1 if x > 0 else (-1 if x < 0 else 0) for x in test[:,1]])
    test_y = test[:,1]
    
    #change target to endrank
    #train_y = train_y + train[:,8] 
    #test_y = test_y + test[:,8]
    
    train = stagedata[stagedata['eventid'] != eventid]
    test  = stagedata[stagedata['eventid'] == eventid]
    
    return train, test, train_x, train_y, test_x, test_y


def split_by_stageid(stagedata, stageid):
    """
    split by stageid
    """
    #if not eventid in stagedata:
    #    print('error, %d not found in stagedata'%eventid)
    #    return
    
    train = stagedata[stagedata['stageid'] <= stageid].to_numpy()
    test  = stagedata[stagedata['stageid'] > stageid].to_numpy()

    train_x = train[:,2:]
    #train_y = np.array([1 if x > 0 else (-1 if x < 0 else 0) for x in train[:,1]])
    train_y = train[:,1]
    test_x = test[:,2:]
    #test_y = np.array([1 if x > 0 else (-1 if x < 0 else 0) for x in test[:,1]])
    test_y = test[:,1]
    
    return train, test, train_x, train_y, test_x, test_y


def regressor_model(name='svr'):
    ### test learning models
    print('[*] predict with %s model'%name)
    clf = get_regressor(name)
    clf.fit(train_x, train_y)

    pred_y = clf.predict(test_x)
    
    #int only
    #pred_y = pred_y.astype(int)

    
    score = evaluate(test_y, pred_y)
    return score, pred_y

In [4]:
def do_rerank(dfout, col=4):
    """
    output of prediction of target can be float
    ['carno','startlap', 'startrank','endrank']
    resort the endrank globally
    
    """
    
    #df = dfout.sort_values(by=['startlap','carno'])
    print('rerank...')
    laps = set(dfout.startlap.values)
    
    dfs = []
    for lap in laps:
        df = dfout[dfout['startlap']==lap].to_numpy()
        
        #print('in',df)
        
        idx = np.argsort(df[:,col], axis=0)
        true_rank = np.argsort(idx, axis=0)
    
        df[:,col] = true_rank
        
        #print('out',df)
        if len(dfs) == 0:
            dfs = df
        else:
            dfs = np.vstack((dfs, df))
        #dfs.append(df)
        #np.vstack(df)
        
    #dfret = pd.concat(dfs)
    #data = np.array(dfs)
    dfret = pd.DataFrame(dfs, columns =['carno','startlap', 'startrank','endrank','pred_endrank'])
    
    return dfret

def build_df(testdf, pred_y, dorerank=True):
    """
    build a standard stint prediction result:
    carno	startlap	startrank	endrank	diff	sign	pred_endrank	pred_diff	pred_sign
    """
    
    print('build_df: len testdf=%d, len of pred_y=%d'%(len(testdf), len(pred_y)))
    
    test = testdf[['car_number','start_lap','start_rank','target']].values
    test[:,1] = test[:,1]-1
    test[:,2] = test[:,2]-1
    test[:,3] = test[:,2] + test[:,3]
    dfout = pd.DataFrame(test, columns =['carno','startlap', 'startrank','endrank'])
    

    # add predictions
    dfout['pred_endrank'] = pred_y +  dfout['startrank']
    
    if dorerank:
        dfout = do_rerank(dfout,col=4)    
    
    dfout['diff'] = dfout['endrank'] - dfout['startrank']
    signVec = dfout['diff'].values.copy()
    for idx in range(len(signVec)):
        sign = 0
        if signVec[idx] > 0:
            sign = 1
        elif signVec[idx] < 0:
            sign = -1
        signVec[idx] = sign
    dfout['sign'] = signVec


    #dfout['pred_diff'] = pred_y 
    dfout['pred_diff'] = dfout['pred_endrank'] - dfout['startrank']
    signVec = dfout['pred_diff'].values.copy()
    for idx in range(len(signVec)):
        sign = 0
        if signVec[idx] > 0:
            sign = 1
        elif signVec[idx] < 0:
            sign = -1
        signVec[idx] = sign
    dfout['pred_sign'] = signVec
    return dfout
    

def test_cv():
    global train_x, train_y, test_x, test_y
    
    cols = ['runid','trainsize','testsize','testdistribution']
    cols.extend(regressors)
    print('cols:%s'%cols)
    retdf0 = pd.DataFrame([],columns=cols)
    retdf1 = pd.DataFrame([],columns=cols)

    events = set(stagedata['eventid'])

    years = ['2013','2014','2015','2016','2017','2018','2019']
    #events = ['Indy500']
    eventsname = [f'Indy500-{x}' for x in years]
    events_id={key:idx for idx, key in enumerate(eventsname)}
    for eventid in events:
        print('Testset = %s'%eventsname[eventid])

        train, test, train_x, train_y, test_x, test_y = split_by_eventid(stagedata, eventid)
        test_distribution = '+:%d,0:%d,-:%d'%(np.sum(test_y>0),np.sum(test_y==0),np.sum(test_y<0))
        #print('Testset by stageid= %s, trainsize=%d, testsize=%d, dist=%s'%
        #      (stageid, train_x.shape[0], test_x.shape[0], test_distribution))

        #record
        rec0 = [eventsname[eventid],train_x.shape[0],test_x.shape[0],test_distribution]
        rec1 = [eventsname[eventid],train_x.shape[0],test_x.shape[0],test_distribution]

        pred_y = [0 for x in range(len(regressors))]
        acc0 = [0 for x in range(len(regressors))]
        acc1 = [0 for x in range(len(regressors))]
        for idx, clf in enumerate(regressors):
            acc, pred_y[idx] = regressor_model(clf)
            acc0[idx] = acc[0]
            acc1[idx] = acc[2]

        rec0.extend(acc0)
        rec1.extend(acc1)
        #print('rec:%s'%rec)

        #new df
        df = pd.DataFrame([rec0],columns=cols)
        retdf0 = pd.concat([retdf0, df])        

        df = pd.DataFrame([rec1],columns=cols)
        retdf1 = pd.concat([retdf1, df])        


    #retdf0.to_csv('regressors_stagedata_splitbyevent%s_rmse.csv'%suffix)
    #retdf1.to_csv('regressors_stagedata_splitbyevent%s_r2.csv'%suffix)
    retdf0.to_csv('crossvalid_stagedata_regressor_%s.csv'%suffix, float_format='%.3f')

    df_event_rmse = retdf0
    df_event_r2 = retdf1
    return df_event_rmse
    
def test_20182019(dorerank=True):
    global train_x, train_y, test_x, test_y

    pred_df = {'2018':{}, '2019':{}}
    
    ### train 2013-2017
    #load data
    cols = ['runid','trainsize','testsize','testdistribution']
    cols.extend(regressors)
    print('cols:%s'%cols)
    retdf0 = pd.DataFrame([],columns=cols)
    retdf1 = pd.DataFrame([],columns=cols)

    events = set(stagedata['eventid'])

    years = ['2013','2014','2015','2016','2017','2018','2019']
    #events = ['Indy500']
    eventsname = [f'Indy500-{x}' for x in years]
    events_id={key:idx for idx, key in enumerate(eventsname)}

    #first 
    eventid = events_id['Indy500-2018']
    ignore_eventid = events_id['Indy500-2019']
    stdata_2018 = stagedata[stagedata['eventid']!=ignore_eventid]

    print('Testset = %s'%eventsname[eventid])

    traindf, testdf, train_x, train_y, test_x, test_y = split_by_eventid(stdata_2018, eventid)
    test_distribution = '+:%d,0:%d,-:%d'%(np.sum(test_y>0),np.sum(test_y==0),np.sum(test_y<0))
    #print('Testset by stageid= %s, trainsize=%d, testsize=%d, dist=%s'%
    #      (stageid, train_x.shape[0], test_x.shape[0], test_distribution))

    #record
    rec0 = [eventsname[eventid],train_x.shape[0],test_x.shape[0],test_distribution]
    rec1 = [eventsname[eventid],train_x.shape[0],test_x.shape[0],test_distribution]

    pred_y = [0 for x in range(len(regressors))]
    acc0 = [0 for x in range(len(regressors))]
    acc1 = [0 for x in range(len(regressors))]
    for idx, clf in enumerate(regressors):
        acc, pred_y[idx] = regressor_model(clf)
        acc0[idx] = acc[0]
        acc1[idx] = acc[2]
        
        #build pred df
        pred_df['2018'][clf] = build_df(testdf, pred_y[idx],dorerank=dorerank)
        

    rec0.extend(acc0)
    rec1.extend(acc1)
    #print('rec:%s'%rec)

    #new df
    df = pd.DataFrame([rec0],columns=cols)
    retdf0 = pd.concat([retdf0, df])        

    #second 
    eventid = events_id['Indy500-2019']
    ignore_eventid = events_id['Indy500-2018']
    stdata_2019 = stagedata[stagedata['eventid']!=ignore_eventid]

    print('Testset = %s'%eventsname[eventid])

    traindf, testdf, train_x, train_y, test_x, test_y = split_by_eventid(stdata_2019, eventid)
    test_distribution = '+:%d,0:%d,-:%d'%(np.sum(test_y>0),np.sum(test_y==0),np.sum(test_y<0))
    #print('Testset by stageid= %s, trainsize=%d, testsize=%d, dist=%s'%
    #      (stageid, train_x.shape[0], test_x.shape[0], test_distribution))

    #record
    rec0 = [eventsname[eventid],train_x.shape[0],test_x.shape[0],test_distribution]
    rec1 = [eventsname[eventid],train_x.shape[0],test_x.shape[0],test_distribution]

    acc0 = [0 for x in range(len(regressors))]
    acc1 = [0 for x in range(len(regressors))]
    for idx, clf in enumerate(regressors):
        acc, pred_y[idx] = regressor_model(clf)
        acc0[idx] = acc[0]
        acc1[idx] = acc[2]
        
        #build pred df
        pred_df['2019'][clf] = build_df(testdf, pred_y[idx],dorerank=dorerank)


    rec0.extend(acc0)
    rec1.extend(acc1)
    #print('rec:%s'%rec)

    #new df
    df = pd.DataFrame([rec0],columns=cols)
    retdf0 = pd.concat([retdf0, df])    

    #retdf0.to_csv(f'stint_regressor_result_{suffix}.csv', float_format='%.3f')
    
    return retdf0, pred_df   

In [5]:
import pickle
def save_result(dfs, datafile):
    with open(datafile, 'wb') as f:
        #pack [global_carids, laptime_data]
        savedata = [dfs]
        #savedata = [freq, train_set, test_set]
        # Pickle the 'data' dictionary using the highest protocol available.
        pickle.dump(savedata, f, pickle.HIGHEST_PROTOCOL)

### test oracle with stint_len

In [6]:
#load data
_trim = 0
_include_final = True
_include_stintlen = True
#_include_stintlen = False
include_str = '1' if _include_final else '0'
stint_str = '1' if _include_stintlen else ''
suffix = f'indy500-2013-2019-end{include_str}{stint_str}-t{_trim}'



In [7]:

for plen in [2,4,6,8]:

    output_file = f'shortterm-indy500-2013-2019-end{include_str}{stint_str}-t{plen}-.csv'
    stagedata = pd.read_csv(output_file)
    stagedata.fillna(0, inplace=True)
    #stagedata.info()
    retdf_rerank, preddf_rerank = test_20182019(dorerank=True)
    retdf_norank, preddf_norank = test_20182019(dorerank=False)

    #output_file = f'stage-indy500-2013-2019-end{include_str}{stint_str}-t{_trim}.csv'
    #outfile=f'shortterm-dfout-mlmodels-indy500-tr2013_2017-te2018_2019-end{include_str}-normal-t{_trim}.pickle'
    #save_result(preddf, outfile)
    outfile=f'shortterm-dfout-mlmodels-indy500-tr2013_2017-te2018_2019-end{include_str}-rerank-t{plen}.pickle'
    save_result(preddf_rerank, outfile)
    outfile=f'shortterm-dfout-mlmodels-indy500-tr2013_2017-te2018_2019-end{include_str}-norerank-t{plen}.pickle'
    save_result(preddf_norank, outfile)


cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=3.25, mae=1.36, r2=-0.00
build_df: len testdf=5340, len of pred_y=5340
rerank...
[*] predict with avgrank model
rmse=17.57, mae=15.58, r2=-28.14
build_df: len testdf=5340, len of pred_y=5340
rerank...
[*] predict with dice model
rmse=4.37, mae=2.25, r2=-0.81
build_df: len testdf=5340, len of pred_y=5340
rerank...
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  positive)


rmse=3.10, mae=1.61, r2=0.09
build_df: len testdf=5340, len of pred_y=5340
rerank...
[*] predict with ridge model
rmse=3.10, mae=1.61, r2=0.09
build_df: len testdf=5340, len of pred_y=5340
rerank...
[*] predict with rf model
rmse=3.34, mae=1.90, r2=-0.05
build_df: len testdf=5340, len of pred_y=5340
rerank...
[*] predict with svr model
rmse=3.25, mae=1.39, r2=0.00
build_df: len testdf=5340, len of pred_y=5340
rerank...
[*] predict with xgb model
rmse=3.62, mae=1.94, r2=-0.23
build_df: len testdf=5340, len of pred_y=5340
rerank...
Testset = Indy500-2019
[*] predict with currank model
rmse=3.07, mae=1.18, r2=-0.00
build_df: len testdf=5629, len of pred_y=5629
rerank...
[*] predict with avgrank model
rmse=18.16, mae=16.11, r2=-33.95
build_df: len testdf=5629, len of pred_y=5629
rerank...
[*] predict with dice model
rmse=4.15, mae=2.08, r2=-0.82
build_df: len testdf=5629, len of pred_y=5629
rerank...
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  positive)


rmse=2.93, mae=1.45, r2=0.09
build_df: len testdf=5629, len of pred_y=5629
rerank...
[*] predict with ridge model
rmse=2.92, mae=1.43, r2=0.10
build_df: len testdf=5629, len of pred_y=5629
rerank...
[*] predict with rf model
rmse=2.88, mae=1.58, r2=0.12
build_df: len testdf=5629, len of pred_y=5629
rerank...
[*] predict with svr model
rmse=3.07, mae=1.23, r2=0.00
build_df: len testdf=5629, len of pred_y=5629
rerank...
[*] predict with xgb model
rmse=3.04, mae=1.51, r2=0.02
build_df: len testdf=5629, len of pred_y=5629
rerank...
cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=3.25, mae=1.36, r2=-0.00
build_df: len testdf=5340, len of pred_y=5340
[*] predict with avgrank model
rmse=17.57, mae=15.58, r2=-28.14
build_df: len testdf=5340, len of pred_y=5340
[*] predict with dice model
rmse=4.37, mae=2.25, r2=-0.81
build_df: len testdf=5340, len of pred_y

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  positive)


rmse=3.10, mae=1.61, r2=0.09
build_df: len testdf=5340, len of pred_y=5340
[*] predict with ridge model
rmse=3.10, mae=1.61, r2=0.09
build_df: len testdf=5340, len of pred_y=5340
[*] predict with rf model
rmse=3.44, mae=1.92, r2=-0.11
build_df: len testdf=5340, len of pred_y=5340
[*] predict with svr model
rmse=3.25, mae=1.39, r2=0.00
build_df: len testdf=5340, len of pred_y=5340
[*] predict with xgb model
rmse=3.62, mae=1.94, r2=-0.23
build_df: len testdf=5340, len of pred_y=5340
Testset = Indy500-2019
[*] predict with currank model
rmse=3.07, mae=1.18, r2=-0.00
build_df: len testdf=5629, len of pred_y=5629
[*] predict with avgrank model
rmse=18.16, mae=16.11, r2=-33.95
build_df: len testdf=5629, len of pred_y=5629
[*] predict with dice model
rmse=4.15, mae=2.08, r2=-0.82
build_df: len testdf=5629, len of pred_y=5629
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  positive)


rmse=2.93, mae=1.45, r2=0.09
build_df: len testdf=5629, len of pred_y=5629
[*] predict with ridge model
rmse=2.92, mae=1.43, r2=0.10
build_df: len testdf=5629, len of pred_y=5629
[*] predict with rf model
rmse=2.93, mae=1.60, r2=0.09
build_df: len testdf=5629, len of pred_y=5629
[*] predict with svr model
rmse=3.07, mae=1.23, r2=0.00
build_df: len testdf=5629, len of pred_y=5629
[*] predict with xgb model
rmse=3.04, mae=1.51, r2=0.02
build_df: len testdf=5629, len of pred_y=5629
cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=4.45, mae=2.32, r2=-0.00
build_df: len testdf=5274, len of pred_y=5274
rerank...
[*] predict with avgrank model
rmse=17.95, mae=15.80, r2=-15.29
build_df: len testdf=5274, len of pred_y=5274
rerank...
[*] predict with dice model
rmse=5.68, mae=3.40, r2=-0.63
build_df: len testdf=5274, len of pred_y=5274
rerank...
[*] predict wi

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=4.03, mae=2.51, r2=0.18
build_df: len testdf=5274, len of pred_y=5274
rerank...
[*] predict with ridge model
rmse=4.02, mae=2.50, r2=0.18
build_df: len testdf=5274, len of pred_y=5274
rerank...
[*] predict with rf model
rmse=3.99, mae=2.62, r2=0.20
build_df: len testdf=5274, len of pred_y=5274
rerank...
[*] predict with svr model
rmse=4.36, mae=2.29, r2=0.04
build_df: len testdf=5274, len of pred_y=5274
rerank...
[*] predict with xgb model
rmse=3.97, mae=2.45, r2=0.20
build_df: len testdf=5274, len of pred_y=5274
rerank...
Testset = Indy500-2019
[*] predict with currank model
rmse=4.15, mae=1.94, r2=-0.00
build_df: len testdf=5565, len of pred_y=5565
rerank...
[*] predict with avgrank model
rmse=18.46, mae=16.30, r2=-18.81
build_df: len testdf=5565, len of pred_y=5565
rerank...
[*] predict with dice model
rmse=5.41, mae=3.13, r2=-0.70
build_df: len testdf=5565, len of pred_y=5565
rerank...
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=3.78, mae=2.20, r2=0.17
build_df: len testdf=5565, len of pred_y=5565
rerank...
[*] predict with ridge model
rmse=3.76, mae=2.17, r2=0.18
build_df: len testdf=5565, len of pred_y=5565
rerank...
[*] predict with rf model
rmse=3.76, mae=2.26, r2=0.18
build_df: len testdf=5565, len of pred_y=5565
rerank...
[*] predict with svr model
rmse=4.07, mae=1.95, r2=0.04
build_df: len testdf=5565, len of pred_y=5565
rerank...
[*] predict with xgb model
rmse=3.65, mae=2.16, r2=0.22
build_df: len testdf=5565, len of pred_y=5565
rerank...
cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=4.45, mae=2.32, r2=-0.00
build_df: len testdf=5274, len of pred_y=5274
[*] predict with avgrank model
rmse=17.95, mae=15.80, r2=-15.29
build_df: len testdf=5274, len of pred_y=5274
[*] predict with dice model
rmse=5.68, mae=3.40, r2=-0.63
build_df: len testdf=5274, len of pred_y

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=4.03, mae=2.51, r2=0.18
build_df: len testdf=5274, len of pred_y=5274
[*] predict with ridge model
rmse=4.02, mae=2.50, r2=0.18
build_df: len testdf=5274, len of pred_y=5274
[*] predict with rf model
rmse=3.97, mae=2.61, r2=0.20
build_df: len testdf=5274, len of pred_y=5274
[*] predict with svr model
rmse=4.36, mae=2.29, r2=0.04
build_df: len testdf=5274, len of pred_y=5274
[*] predict with xgb model
rmse=3.97, mae=2.45, r2=0.20
build_df: len testdf=5274, len of pred_y=5274
Testset = Indy500-2019
[*] predict with currank model
rmse=4.15, mae=1.94, r2=-0.00
build_df: len testdf=5565, len of pred_y=5565
[*] predict with avgrank model
rmse=18.46, mae=16.30, r2=-18.81
build_df: len testdf=5565, len of pred_y=5565
[*] predict with dice model
rmse=5.41, mae=3.13, r2=-0.70
build_df: len testdf=5565, len of pred_y=5565
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=3.78, mae=2.20, r2=0.17
build_df: len testdf=5565, len of pred_y=5565
[*] predict with ridge model
rmse=3.76, mae=2.17, r2=0.18
build_df: len testdf=5565, len of pred_y=5565
[*] predict with rf model
rmse=3.76, mae=2.29, r2=0.18
build_df: len testdf=5565, len of pred_y=5565
[*] predict with svr model
rmse=4.07, mae=1.95, r2=0.04
build_df: len testdf=5565, len of pred_y=5565
[*] predict with xgb model
rmse=3.65, mae=2.16, r2=0.22
build_df: len testdf=5565, len of pred_y=5565
cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=5.20, mae=3.05, r2=-0.00
build_df: len testdf=5208, len of pred_y=5208
rerank...
[*] predict with avgrank model
rmse=18.26, mae=15.92, r2=-11.36
build_df: len testdf=5208, len of pred_y=5208
rerank...
[*] predict with dice model
rmse=6.60, mae=4.34, r2=-0.61
build_df: len testdf=5208, len of pred_y=5208
rerank...
[*] predict wi

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng

  positive)


rmse=4.52, mae=3.06, r2=0.24
build_df: len testdf=5208, len of pred_y=5208
rerank...
[*] predict with ridge model
rmse=4.51, mae=3.05, r2=0.25
build_df: len testdf=5208, len of pred_y=5208
rerank...
[*] predict with rf model
rmse=4.42, mae=3.02, r2=0.28
build_df: len testdf=5208, len of pred_y=5208
rerank...
[*] predict with svr model
rmse=4.99, mae=2.95, r2=0.08
build_df: len testdf=5208, len of pred_y=5208
rerank...
[*] predict with xgb model
rmse=4.30, mae=2.92, r2=0.31
build_df: len testdf=5208, len of pred_y=5208
rerank...
Testset = Indy500-2019
[*] predict with currank model
rmse=4.71, mae=2.42, r2=-0.00
build_df: len testdf=5501, len of pred_y=5501
rerank...
[*] predict with avgrank model
rmse=18.68, mae=16.42, r2=-14.75
build_df: len testdf=5501, len of pred_y=5501
rerank...
[*] predict with dice model
rmse=6.16, mae=3.86, r2=-0.71
build_df: len testdf=5501, len of pred_y=5501
rerank...
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng

  positive)


rmse=4.19, mae=2.59, r2=0.21
build_df: len testdf=5501, len of pred_y=5501
rerank...
[*] predict with ridge model
rmse=4.16, mae=2.56, r2=0.22
build_df: len testdf=5501, len of pred_y=5501
rerank...
[*] predict with rf model
rmse=4.07, mae=2.62, r2=0.25
build_df: len testdf=5501, len of pred_y=5501
rerank...
[*] predict with svr model
rmse=4.55, mae=2.41, r2=0.07
build_df: len testdf=5501, len of pred_y=5501
rerank...
[*] predict with xgb model
rmse=4.02, mae=2.58, r2=0.27
build_df: len testdf=5501, len of pred_y=5501
rerank...
cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=5.20, mae=3.05, r2=-0.00
build_df: len testdf=5208, len of pred_y=5208
[*] predict with avgrank model
rmse=18.26, mae=15.92, r2=-11.36
build_df: len testdf=5208, len of pred_y=5208
[*] predict with dice model
rmse=6.60, mae=4.34, r2=-0.61
build_df: len testdf=5208, len of pred_y

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng

  positive)


rmse=4.52, mae=3.06, r2=0.24
build_df: len testdf=5208, len of pred_y=5208
[*] predict with ridge model
rmse=4.51, mae=3.05, r2=0.25
build_df: len testdf=5208, len of pred_y=5208
[*] predict with rf model
rmse=4.44, mae=3.02, r2=0.27
build_df: len testdf=5208, len of pred_y=5208
[*] predict with svr model
rmse=4.99, mae=2.95, r2=0.08
build_df: len testdf=5208, len of pred_y=5208
[*] predict with xgb model
rmse=4.30, mae=2.92, r2=0.31
build_df: len testdf=5208, len of pred_y=5208
Testset = Indy500-2019
[*] predict with currank model
rmse=4.71, mae=2.42, r2=-0.00
build_df: len testdf=5501, len of pred_y=5501
[*] predict with avgrank model
rmse=18.68, mae=16.42, r2=-14.75
build_df: len testdf=5501, len of pred_y=5501
[*] predict with dice model
rmse=6.16, mae=3.86, r2=-0.71
build_df: len testdf=5501, len of pred_y=5501
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng

  positive)


rmse=4.19, mae=2.59, r2=0.21
build_df: len testdf=5501, len of pred_y=5501
[*] predict with ridge model
rmse=4.16, mae=2.56, r2=0.22
build_df: len testdf=5501, len of pred_y=5501
[*] predict with rf model
rmse=4.10, mae=2.66, r2=0.24
build_df: len testdf=5501, len of pred_y=5501
[*] predict with svr model
rmse=4.55, mae=2.41, r2=0.07
build_df: len testdf=5501, len of pred_y=5501
[*] predict with xgb model
rmse=4.02, mae=2.58, r2=0.27
build_df: len testdf=5501, len of pred_y=5501
cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=5.73, mae=3.61, r2=-0.00
build_df: len testdf=5142, len of pred_y=5142
rerank...
[*] predict with avgrank model
rmse=18.53, mae=16.00, r2=-9.45
build_df: len testdf=5142, len of pred_y=5142
rerank...
[*] predict with dice model
rmse=7.37, mae=5.04, r2=-0.65
build_df: len testdf=5142, len of pred_y=5142
rerank...
[*] predict wit

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=4.84, mae=3.42, r2=0.29
build_df: len testdf=5142, len of pred_y=5142
rerank...
[*] predict with ridge model
rmse=4.82, mae=3.41, r2=0.29
build_df: len testdf=5142, len of pred_y=5142
rerank...
[*] predict with rf model
rmse=4.84, mae=3.41, r2=0.29
build_df: len testdf=5142, len of pred_y=5142
rerank...
[*] predict with svr model
rmse=5.41, mae=3.43, r2=0.11
build_df: len testdf=5142, len of pred_y=5142
rerank...
[*] predict with xgb model
rmse=5.31, mae=3.66, r2=0.14
build_df: len testdf=5142, len of pred_y=5142
rerank...
Testset = Indy500-2019
[*] predict with currank model
rmse=5.06, mae=2.76, r2=-0.00
build_df: len testdf=5437, len of pred_y=5437
rerank...
[*] predict with avgrank model
rmse=18.86, mae=16.51, r2=-12.88
build_df: len testdf=5437, len of pred_y=5437
rerank...
[*] predict with dice model
rmse=6.83, mae=4.42, r2=-0.82
build_df: len testdf=5437, len of pred_y=5437
rerank...
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=4.44, mae=2.85, r2=0.23
build_df: len testdf=5437, len of pred_y=5437
rerank...
[*] predict with ridge model
rmse=4.42, mae=2.81, r2=0.24
build_df: len testdf=5437, len of pred_y=5437
rerank...
[*] predict with rf model
rmse=4.42, mae=2.99, r2=0.24
build_df: len testdf=5437, len of pred_y=5437
rerank...
[*] predict with svr model
rmse=4.83, mae=2.70, r2=0.09
build_df: len testdf=5437, len of pred_y=5437
rerank...
[*] predict with xgb model
rmse=4.39, mae=2.97, r2=0.25
build_df: len testdf=5437, len of pred_y=5437
rerank...
cols:['runid', 'trainsize', 'testsize', 'testdistribution', 'currank', 'avgrank', 'dice', 'lasso', 'ridge', 'rf', 'svr', 'xgb']
Testset = Indy500-2018
[*] predict with currank model
rmse=5.73, mae=3.61, r2=-0.00
build_df: len testdf=5142, len of pred_y=5142
[*] predict with avgrank model
rmse=18.53, mae=16.00, r2=-9.45
build_df: len testdf=5142, len of pred_y=5142
[*] predict with dice model
rmse=7.37, mae=5.04, r2=-0.65
build_df: len testdf=5142, len of pred_y=

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=4.84, mae=3.42, r2=0.29
build_df: len testdf=5142, len of pred_y=5142
[*] predict with ridge model
rmse=4.82, mae=3.41, r2=0.29
build_df: len testdf=5142, len of pred_y=5142
[*] predict with rf model
rmse=4.82, mae=3.40, r2=0.29
build_df: len testdf=5142, len of pred_y=5142
[*] predict with svr model
rmse=5.41, mae=3.43, r2=0.11
build_df: len testdf=5142, len of pred_y=5142
[*] predict with xgb model
rmse=5.31, mae=3.66, r2=0.14
build_df: len testdf=5142, len of pred_y=5142
Testset = Indy500-2019
[*] predict with currank model
rmse=5.06, mae=2.76, r2=-0.00
build_df: len testdf=5437, len of pred_y=5437
[*] predict with avgrank model
rmse=18.86, mae=16.51, r2=-12.88
build_df: len testdf=5437, len of pred_y=5437
[*] predict with dice model
rmse=6.83, mae=4.42, r2=-0.82
build_df: len testdf=5437, len of pred_y=5437
[*] predict with lasso model


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


rmse=4.44, mae=2.85, r2=0.23
build_df: len testdf=5437, len of pred_y=5437
[*] predict with ridge model
rmse=4.42, mae=2.81, r2=0.24
build_df: len testdf=5437, len of pred_y=5437
[*] predict with rf model
rmse=4.36, mae=2.94, r2=0.26
build_df: len testdf=5437, len of pred_y=5437
[*] predict with svr model
rmse=4.83, mae=2.70, r2=0.09
build_df: len testdf=5437, len of pred_y=5437
[*] predict with xgb model
rmse=4.39, mae=2.97, r2=0.25
build_df: len testdf=5437, len of pred_y=5437


### test

In [8]:
stagedata[stagedata['car_number']==98]

Unnamed: 0.1,Unnamed: 0,target,start_lap,stint_len,eventid,car_number,stageid,firststage,pit_in_caution,start_position,...,laptime_std_all,laps_prev,laps_after_last_pitstop,pittime_prev,prev_nb0_change_in_rank,prev_nb1_change_in_rank,prev_nb2_change_in_rank,follow_nb0_change_in_rank,follow_nb1_change_in_rank,follow_nb2_change_in_rank
5106,5106,2,10,8,0,98,0,0,0,11,...,0,11,11,64.05195,-0.4526,-0.5550,-0.9964,0.3136,0.5123,0.9277
5107,5107,2,11,8,0,98,1,1,0,11,...,0,12,12,64.05195,-0.3613,-0.6827,-0.9490,0.5579,0.6209,0.6751
5108,5108,2,12,8,0,98,2,1,0,11,...,0,13,13,64.05195,-0.5604,-0.8552,-1.0442,0.1987,0.4362,0.7589
5109,5109,2,13,8,0,98,3,1,0,11,...,0,14,14,64.05195,-0.3435,-0.6692,-0.9142,0.3676,0.7287,0.9847
5110,5110,1,14,8,0,98,4,1,0,11,...,0,15,15,64.05195,-0.8033,-1.0647,-1.4325,0.1949,0.4596,0.4705
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36696,36696,0,182,8,6,98,172,1,0,10,...,0,41,2,89.85775,-44.9729,-223.8363,-224.2769,0.0000,0.0000,0.0000
36697,36697,0,183,8,6,98,173,1,0,10,...,0,41,3,89.85775,-43.0108,-158.8810,-160.1068,0.0000,0.0000,0.0000
36698,36698,0,184,8,6,98,174,1,0,10,...,0,41,4,89.85775,-42.9688,-128.1491,-131.3714,0.0000,0.0000,0.0000
36699,36699,0,185,8,6,98,175,1,0,10,...,0,41,5,89.85775,-43.3346,-128.4681,-130.1105,0.0000,0.0000,0.0000


In [9]:
df = preddf_rerank['2018']['xgb']

In [10]:
df

Unnamed: 0,carno,startlap,startrank,endrank,pred_endrank,diff,sign,pred_diff,pred_sign
0,1.0,9.0,3.0,3.0,2.0,0.0,0.0,-1.0,-1.0
1,3.0,9.0,7.0,7.0,6.0,0.0,0.0,-1.0,-1.0
2,4.0,9.0,12.0,12.0,11.0,0.0,0.0,-1.0,-1.0
3,6.0,9.0,15.0,15.0,16.0,0.0,0.0,1.0,1.0
4,7.0,9.0,28.0,28.0,27.0,0.0,0.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...
5137,60.0,190.0,2.0,15.0,13.0,13.0,1.0,11.0,1.0
5138,64.0,190.0,0.0,16.0,5.0,16.0,1.0,5.0,1.0
5139,66.0,190.0,15.0,10.0,15.0,-5.0,-1.0,0.0,0.0
5140,88.0,190.0,17.0,13.0,14.0,-4.0,-1.0,-3.0,-1.0


In [11]:
### test blackhorse car=27
df2018 = preddf_oracle['2018']['rf']
car27 = df2018[df2018['carno']==27]

NameError: name 'preddf_oracle' is not defined

In [None]:
car27

In [None]:
evaluate(car27['pred_diff'].values,car27['diff'].values)

In [None]:
### test blackhorse car=27
df2018 = preddf_oracle['2018']['xgb']
car27 = df2018[df2018['carno']==12]
car27

In [None]:
evaluate(car27['pred_diff'].values,car27['diff'].values)

In [None]:
car27 = df2018
evaluate(car27['pred_diff'].values,car27['diff'].values)

In [None]:
len(df2018)

In [None]:
retdf_oracle

In [None]:
top10 = [12, 20, 9, 27, 28, 22, 29, 1, 6]
car27 = df2018[df2018['carno'].isin(top10)]
evaluate(car27['pred_diff'].values,car27['diff'].values)

In [None]:
top10 = [12, 20, 9, 27, 28]
car27 = df2018[df2018['carno'].isin(top10)]
evaluate(car27['pred_diff'].values,car27['diff'].values)

In [None]:
df2018 = preddf['2018']['xgb']
car27 = df2018[df2018['carno'].isin(top10)]
evaluate(car27['pred_diff'].values,car27['diff'].values)