In [25]:
import pandas as pd
import numpy as np

pd.set_option('display.max_column',None)
pd.set_option('display.max_row',5000)

In [26]:
records = pd.read_csv("/Users/mk2/Documents - Gabriel’s MacBook Pro/dadaset/records.csv")

### Preprocess

In [27]:
records = records.drop(columns= ["Unnamed: 0"])
records.infer_objects()
records["date"] = pd.to_datetime(records["date"],format='%d/%m/%y')

In [28]:
records = records.sort_values(by=["date","race_index","result"], ascending=False)
records.reset_index(inplace=True, drop=True)

In [29]:
records.columns

Index(['race_id', 'race_index', 'result', 'date', 'venue', 'dist',
       'turf_quality', 'race_class', 'draw', 'rating', 'trainer', 'jockey',
       'lbw', 'win_odds', 'actual_weight', 'running_pos1', 'running_pos2',
       'running_pos3', 'running_pos4', 'running_pos5', 'finish_time',
       'declared_weight', 'gear', 'id', 'name', 'details', 'country_of_origin',
       'age', 'colour', 'sex', 'import_type', 'season_stakes', 'total_stakes',
       'win_counts', 'second_count', 'third_count', 'horse_exp',
       'no_starts_past_10', 'current_stable_loc', 'arrival_date', 'trainer.1',
       'owner', 'current_rating', 'start_ofseason_rating', 'sire', 'dam',
       'dams_sire', 'same_sire'],
      dtype='object')

In [30]:
records.drop(columns=['season_stakes', 'total_stakes',
       'win_counts', 'second_count', 'third_count', 'horse_exp',
       'no_starts_past_10', 'current_stable_loc', 'arrival_date','current_rating', 'start_ofseason_rating'],inplace=True)

### Variables

#### Age

In [31]:
records.loc[:,('rest_time')] = (records.groupby('name')["date"].diff(periods=-1).dt.days.fillna(0))

records.loc[:,('cumul_rest_time')] = records.groupby('name')['rest_time'].cumsum().fillna(0)

records.loc[:,('real_age')] = (records['age'] - records['cumul_rest_time']/365).round()

In [32]:
records["real_age"].corr(records["result"])

-0.0018761449865074755

#### General

In [33]:
records.loc[:,('speed')] = (records['dist'] / records['finish_time']).round(2)

records.loc[:,('horse_exp')] = records.groupby(['name']).cumcount(ascending=False)

records.loc[:,("distance_exp")] = records.groupby(["name","dist"]).cumcount(ascending=False)

records.loc[:,("horse_jockey_exp")] = records.groupby(["name","jockey"]).cumcount(ascending=False)

#### Prev

In [34]:
records.loc[:,('prev_speed')] = records.groupby('name')[('speed')].shift(periods=-1)

records.loc[:,('prev_result')] = records.groupby('name')[('result')].shift(periods=-1)

records["prev_actual_weight"] = records.groupby('name')['actual_weight'].shift(periods=-1)

records["prev_declared_weight"] = records.groupby(['name'])['declared_weight'].shift(periods=-1)

records.loc[:,('prev_time')] = records.groupby(["name","dist"])[('finish_time')].shift(periods=-1)

records.loc[:,('prev_running_pos4')] = records.groupby('name')[('running_pos4')].shift(periods=-1)

records.loc[:,('prev_running_pos3')] = records.groupby('name')[('running_pos3')].shift(periods=-1)

records.loc[:,('prev_running_pos2')] = records.groupby('name')[('running_pos2')].shift(periods=-1)

records.loc[:,('prev_running_pos1')] = records.groupby('name')[('running_pos1')].shift(periods=-1)

#### Change

In [35]:
records["actual_wt_change"] = records['actual_weight'] - records['prev_actual_weight']

records["declared_wt_change"] = records['declared_weight'] - records['prev_declared_weight']

records["prev_speed_change"] = records['prev_speed'] - records['prev_speed'].shift(periods=-1)

#### Ranks

In [36]:
records.loc[:,("odds_rank")] = records.loc[:,("win_odds")].rank(pct=True, ascending=False)

records.loc[:,("real_age_ranl")] = records.groupby(["race_index","date"])["real_age"].rank(pct=True, ascending=False)

records.loc[:,("cond_odds_rank")] = records.groupby(["race_index","date"])["win_odds"].rank(pct=True, ascending=False)

records.loc[:,("prev_speed_rank")] = records.groupby(["race_index","date"])["prev_speed"].rank(pct=True, ascending=True)

records.loc[:,("distance_exp_rank")] = records.groupby(["race_index","date"])["distance_exp"].rank(pct=True, ascending=True)

records.loc[:,("actual_weight_rank")] = records.groupby(["race_index","date"])["actual_weight"].rank(pct=True, ascending=True)

records.loc[:,("declared_weight_rank")] = records.groupby(["race_index","date"])["declared_weight"].rank(pct=True, ascending=True)

records.loc[:,("actual_wt_change_rank")] = records.groupby(["race_index","date"])["actual_wt_change"].rank(pct=True, ascending=True)

records.loc[:,('prev_running_pos4_rank')] = records.groupby(["race_index","date"])["prev_running_pos4"].rank(pct=True, ascending=True)

records.loc[:,('prev_running_pos3_rank')] = records.groupby(["race_index","date"])["prev_running_pos3"].rank(pct=True, ascending=True)

records.loc[:,('prev_running_pos2_rank')] = records.groupby(["race_index","date"])["prev_running_pos2"].rank(pct=True, ascending=True)

records.loc[:,('prev_running_pos1_rank')] = records.groupby(["race_index","date"])["prev_running_pos1"].rank(pct=True, ascending=True)

In [37]:
records.groupby(["name"])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x114f90790>

In [38]:
records.corrwith(records["speed"])#.plot(kind='barh',vfigsize=(10,20),fontsize=12, grid = True)

race_id                   0.006925
race_index                0.006925
result                   -0.268330
dist                     -0.809395
race_class               -0.201318
draw                     -0.019604
rating                    0.184387
win_odds                 -0.052534
actual_weight            -0.031711
running_pos1             -0.039087
running_pos2             -0.071448
running_pos3             -0.203434
running_pos4             -0.209901
running_pos5             -0.255934
finish_time              -0.846463
declared_weight           0.169127
age                      -0.167271
rest_time                 0.074353
cumul_rest_time           0.022770
real_age                 -0.226136
speed                     1.000000
horse_exp                -0.152001
distance_exp              0.102089
horse_jockey_exp         -0.027991
prev_speed                0.623814
prev_result              -0.097171
prev_actual_weight       -0.006463
prev_declared_weight      0.173050
prev_time           

In [39]:
records

Unnamed: 0,race_id,race_index,result,date,venue,dist,turf_quality,race_class,draw,rating,trainer,jockey,lbw,win_odds,actual_weight,running_pos1,running_pos2,running_pos3,running_pos4,running_pos5,finish_time,declared_weight,gear,id,name,details,country_of_origin,age,colour,sex,import_type,trainer.1,owner,sire,dam,dams_sire,same_sire,rest_time,cumul_rest_time,real_age,speed,horse_exp,distance_exp,horse_jockey_exp,prev_speed,prev_result,prev_actual_weight,prev_declared_weight,prev_time,prev_running_pos4,prev_running_pos3,prev_running_pos2,prev_running_pos1,actual_wt_change,declared_wt_change,prev_speed_change,odds_rank,real_age_ranl,cond_odds_rank,prev_speed_rank,distance_exp_rank,actual_weight_rank,declared_weight_rank,actual_wt_change_rank,prev_running_pos4_rank,prev_running_pos3_rank,prev_running_pos2_rank,prev_running_pos1_rank
0,462,462.0,14.0,2022-02-27,"ST / Turf / ""B""",1200.0,G,2.0,8.0,88.0,W Y So,K C Leung,11,117.0,121.0,1.0,1.0,14.0,,,70.94,1231.0,TT,HK_2018_C343,CLASSIC UNICORN,"{'country_of_origin': 'NZ', 'age': 6, 'colour'...",NZ,6,Chestnut,Gelding,PPG,W Y So,Li Fung Lok,Per Incanto,Grace And Eva,Stravinsky,"['A SMILE LIKE YOURS', 'CHARITY GRACE', 'CHIEF...",21.0,21.0,6.0,16.92,15,2,10,17.42,10.0,125.0,1219.0,,,10.0,2.0,3.0,-4.0,12.0,0.50,0.070342,0.285714,0.142857,0.571429,0.285714,0.642857,0.785714,0.357143,,0.892857,0.178571,0.178571
1,462,462.0,13.0,2022-02-27,"ST / Turf / ""B""",1200.0,G,2.0,7.0,89.0,P O'Sullivan,C L Chau,7,48.0,119.0,13.0,13.0,13.0,,,70.28,1174.0,V,HK_2018_C135,BAND OF BROTHERS,"{'country_of_origin': 'NZ', 'age': 7, 'colour'...",NZ,7,Chestnut,Gelding,PPG,P O'Sullivan,Z Power Syndicate,Sakhee's Secret,Jan Valachi,Kingdom Bay,['SICOMORO'],24.0,24.0,7.0,17.07,23,2,0,16.92,5.0,113.0,1176.0,68.79,5.0,3.0,4.0,4.0,6.0,-2.0,-0.47,0.187127,0.107143,0.428571,0.285714,0.285714,0.285714,0.500000,0.928571,0.25,0.428571,0.285714,0.321429
2,462,462.0,12.0,2022-02-27,"ST / Turf / ""B""",1200.0,G,2.0,9.0,96.0,A T Millard,A Hamelin,6-1/4,109.0,129.0,11.0,12.0,12.0,,,70.17,1155.0,B/TT,HK_2017_B025,SILVER FIG,"{'country_of_origin': 'AUS', 'age': 7, 'colour...",AUS,7,Grey,Gelding,PPG,A T Millard,Financial Investors Group Syndicate,Hard Spun,Bonifacio,Even The Score,"['CROWN AVENUE', 'ELITE PATCH', 'GOOD BEAUTY']",28.0,28.0,7.0,17.10,24,22,6,17.39,7.0,128.0,1162.0,69.02,,7.0,11.0,11.0,1.0,-7.0,-0.17,0.077425,0.107143,0.214286,0.500000,0.821429,0.857143,0.357143,0.642857,,0.714286,1.000000,0.857143
3,462,462.0,11.0,2022-02-27,"ST / Turf / ""B""",1200.0,G,2.0,1.0,86.0,P F Yiu,V Borges,6,16.0,119.0,8.0,9.0,11.0,,,70.12,1127.0,CP/TT,HK_2019_D394,CZARSON,"{'country_of_origin': 'AUS', 'age': 5, 'colour...",AUS,5,Chestnut,Gelding,PP,P F Yiu,Justin Li Cheng Yan,Stratum,Wobbly Boots,Show A Heart,"['NEXTMODEL', 'PERFECT MARYKNOLL']",28.0,28.0,5.0,17.11,21,17,2,17.56,2.0,116.0,1128.0,68.32,,2.0,7.0,12.0,3.0,-1.0,0.66,0.435048,0.571429,0.642857,0.857143,0.678571,0.285714,0.142857,0.821429,,0.321429,0.678571,0.964286
4,462,462.0,10.0,2022-02-27,"ST / Turf / ""B""",1200.0,G,2.0,11.0,83.0,C W Chang,H Bentley,5-1/2,155.0,116.0,12.0,10.0,10.0,,,70.05,1150.0,TT,HK_2019_D197,WIND N GRASS,"{'country_of_origin': 'IRE', 'age': 6, 'colour...",IRE,6,Brown,Gelding,PP,C W Chang,The Hon & Mrs Kenneth Lau Ip Keung,Lawman,Roystonea,Polish Precedent,['SPECIAL M'],24.0,24.0,6.0,17.13,31,26,1,16.90,7.0,113.0,1138.0,68.92,7.0,5.0,6.0,6.0,3.0,12.0,-0.53,0.042809,0.285714,0.071429,0.214286,0.928571,0.071429,0.285714,0.821429,0.50,0.571429,0.500000,0.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16432,197,197.0,5.0,2015-11-21,"ST / Turf / ""B+2""",1200.0,G,2.0,6.0,85.0,P O'Sullivan,Z Purton,5-1/4,5.7,120.0,6.0,5.0,5.0,,,69.56,1135.0,B1,HK_2014_T372,HARRIER JET,"{'country_of_origin': 'AUS', 'age': 10, 'colou...",AUS,10,Brown,Gelding,PP,P O'Sullivan,Wei Yu & Liu He,Dane Shadow,Borobodur,Strategic,"[""LET'S DO IT""]",0.0,2269.0,4.0,17.25,0,0,0,,,,,,,,,,,,,0.800893,1.000000,1.000000,,1.000000,1.000000,1.000000,,,,,
16433,115,115.0,10.0,2015-10-18,"ST / Turf / ""C+3""",1400.0,GF,4.0,13.0,48.0,W Y So,B Prebble,4-1/4,23.0,122.0,14.0,13.0,13.0,10.0,,83.66,1104.0,H,HK_2014_T262,HANG'S DECISION,"{'country_of_origin': 'NZ', 'age': 10, 'colour...",NZ,10,Bay,Gelding,PPG,W Y So,Wong Yuet Sing & Tony Wong Sze Ho,Postponed,Bislieri,Encosta de Lago,['Nil'],11.0,2324.0,4.0,16.73,3,0,1,16.76,9.0,123.0,1093.0,,,9.0,12.0,12.0,-1.0,11.0,-0.55,0.338990,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000,1.000000
16434,89,89.0,9.0,2015-10-07,"HV / Turf / ""A""",1200.0,GY,4.0,7.0,50.0,W Y So,B Prebble,3,16.0,123.0,12.0,12.0,9.0,,,71.62,1093.0,H,HK_2014_T262,HANG'S DECISION,"{'country_of_origin': 'NZ', 'age': 10, 'colour...",NZ,10,Bay,Gelding,PPG,W Y So,Wong Yuet Sing & Tony Wong Sze Ho,Postponed,Bislieri,Encosta de Lago,['Nil'],9.0,2333.0,4.0,16.76,2,1,0,17.31,8.0,121.0,1101.0,71.23,,8.0,12.0,12.0,2.0,-8.0,0.46,0.435048,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000,1.000000
16435,56,56.0,8.0,2015-09-28,"ST / Turf / ""A""",1000.0,G,4.0,2.0,50.0,W Y So,C Y Ho,3-3/4,169.0,121.0,12.0,12.0,8.0,,,57.78,1101.0,H1,HK_2014_T262,HANG'S DECISION,"{'country_of_origin': 'NZ', 'age': 10, 'colour...",NZ,10,Bay,Gelding,PPG,W Y So,Wong Yuet Sing & Tony Wong Sze Ho,Postponed,Bislieri,Encosta de Lago,['Nil'],106.0,2439.0,3.0,17.31,1,0,0,16.85,13.0,125.0,1059.0,,,13.0,13.0,13.0,-4.0,42.0,,0.036618,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,,1.000000,1.000000,1.000000


#### Experimental

In [40]:
records.mad()
var

#records.loc[:,('prev_result')] = records.groupby('name')[('result')].shift(periods=-1)

#records.loc[:,("actual_wt_change_rank")]

NameError: name 'var' is not defined

#### Rolling

In [91]:
roll_records = records.copy()

roll_records = roll_records.set_index('date', drop=True)

roll_records.sort_index(ascending=True,inplace=True)

In [92]:
#prev_result_mean

roll_records["168D_average"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("168D", min_periods=1).mean())
roll_records["168D_average_rank"] = roll_records.groupby(["race_index"])["168D_average"].rank(pct=True, ascending=True)

roll_records["112D_average"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("112D", min_periods=1).mean())
roll_records["112D_average_rank"] = roll_records.groupby(["race_index"])["112D_average"].rank(pct=True, ascending=True)

roll_records["84D_average"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("84D", min_periods=1).mean())
roll_records["84D_average_rank"] = roll_records.groupby(["race_index"])["84D_average"].rank(pct=True, ascending=True)

In [93]:
#prev_result_median

roll_records["168D_median"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("168D", min_periods=1).median())
roll_records["168D_median_rank"] = roll_records.groupby(["race_index"])["168D_median"].rank(pct=True, ascending=True)

roll_records["112D_median"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("112D", min_periods=1).median())
roll_records["112D_median_rank"] = roll_records.groupby(["race_index"])["112D_median"].rank(pct=True, ascending=True)

roll_records["84D_median"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("84D", min_periods=1).median())
roll_records["84D_median_rank"] = roll_records.groupby(["race_index"])["84D_median"].rank(pct=True, ascending=True)

In [94]:
#prev_result_max

roll_records["168D_worst"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("168D", min_periods=1).max())
roll_records["168D_worst_rank"] = roll_records.groupby(["race_index"])["168D_worst"].rank(pct=True, ascending=True)

roll_records["112D_worst"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("112D", min_periods=1).max())
roll_records["112D_worst_rank"] = roll_records.groupby(["race_index"])["112D_worst"].rank(pct=True, ascending=True)

roll_records["84D_worst"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("84D", min_periods=1).max())
roll_records["84D_worst_rank"] = roll_records.groupby(["race_index"])["84D_worst"].rank(pct=True, ascending=True)

In [95]:
#prev_result_min

roll_records["168D_best"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("168D", min_periods=1).min())
roll_records["168D_best_rank"] = roll_records.groupby(["race_index"])["168D_best"].rank(pct=True, ascending=True)

roll_records["112D_best"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("112D", min_periods=1).min())
roll_records["112D_best_rank"] = roll_records.groupby(["race_index"])["112D_best"].rank(pct=True, ascending=True)

roll_records["84D_best"] = roll_records.groupby(["name"])["prev_result"].transform(lambda s: s.rolling("84D", min_periods=1).min())
roll_records["84D_best_rank"] = roll_records.groupby(["race_index"])["84D_best"].rank(pct=True, ascending=True)

In [96]:
#prev_speed_mean

roll_records["168D_speed"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("168D", min_periods=1).mean())
roll_records["168D_speed_rank"] = roll_records.groupby(["race_index"])["168D_speed"].rank(pct=True, ascending=True)

roll_records["112D_speed"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("112D", min_periods=1).mean())
roll_records["112D_speed_rank"] = roll_records.groupby(["race_index"])["112D_speed"].rank(pct=True, ascending=True)

roll_records["84D_speed"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("84D", min_periods=1).mean())
roll_records["84D_speed_rank"] = roll_records.groupby(["race_index"])["84D_speed"].rank(pct=True, ascending=True)

In [97]:
#prev_speed_median

roll_records["168D_speed_median"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("168D", min_periods=1).median())
roll_records["168D_speed_median_rank"] = roll_records.groupby(["race_index"])["168D_speed_median"].rank(pct=True, ascending=True)

roll_records["112D_speed_median"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("112D", min_periods=1).median())
roll_records["112D_speed_median_rank"] = roll_records.groupby(["race_index"])["112D_speed_median"].rank(pct=True, ascending=True)

roll_records["84D_speed_median"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("84D", min_periods=1).median())
roll_records["84D_speed_median_rank"] = roll_records.groupby(["race_index"])["84D_speed_median"].rank(pct=True, ascending=True)

In [98]:
#prev_speed_max

roll_records["168D_speed_max"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("168D", min_periods=1).max())
roll_records["168D_speed_max_rank"] = roll_records.groupby(["race_index"])["168D_speed_max"].rank(pct=True, ascending=True)

roll_records["112D_speed_max"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("112D", min_periods=1).max())
roll_records["112D_speed_max_rank"] = roll_records.groupby(["race_index"])["112D_speed_max"].rank(pct=True, ascending=True)

roll_records["84D_speed_max"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("84D", min_periods=1).max())
roll_records["84D_speed_max_rank"] = roll_records.groupby(["race_index"])["84D_speed_max"].rank(pct=True, ascending=True)

In [99]:
#prev_speed_min

roll_records["168D_speed_min"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("168D", min_periods=1).min())
roll_records["168D_speed_min_rank"] = roll_records.groupby(["race_index"])["168D_speed_min"].rank(pct=True, ascending=True)

roll_records["112D_speed_min"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("112D", min_periods=1).min())
roll_records["112D_speed_min_rank"] = roll_records.groupby(["race_index"])["112D_speed_min"].rank(pct=True, ascending=True)

roll_records["84D_speed_min"] = roll_records.groupby(["name"])["prev_speed"].transform(lambda s: s.rolling("84D", min_periods=1).min())
roll_records["84D_speed_min_rank"] = roll_records.groupby(["race_index"])["84D_speed_min"].rank(pct=True, ascending=True)

In [62]:
#prev_jockey_result_mean

roll_records["168D_jockey"] = roll_records.groupby(["jockey"])["prev_result"].transform(lambda s: s.rolling("168D", min_periods=1).mean())
roll_records["168D_jockey_rank"] = roll_records.groupby(["race_index"])["168D_jockey"].rank(pct=True, ascending=True)

roll_records["112D_jockey"] = roll_records.groupby(["jockey"])["prev_result"].transform(lambda s: s.rolling("112D", min_periods=1).mean())
roll_records["112D_jockey_rank"] = roll_records.groupby(["race_index"])["112D_jockey"].rank(pct=True, ascending=True)

roll_records["84D_jockey"] = roll_records.groupby(["jockey"])["prev_result"].transform(lambda s: s.rolling("84D", min_periods=1).mean())
roll_records["84D_jockey_rank"] = roll_records.groupby(["race_index"])["84D_jockey"].rank(pct=True, ascending=True)

ValueError: Length mismatch: Expected axis has 16432 elements, new values have 16437 elements

In [100]:
roll_records.corrwith(roll_records["result"])#.plot(kind='barh',figsize=(10,20),fontsize=12, grid = True)

race_id                   0.008526
race_index                0.008526
result                    1.000000
dist                      0.005744
race_class               -0.001794
draw                      0.134974
rating                   -0.039867
win_odds                  0.402648
actual_weight            -0.076979
running_pos1              0.187518
running_pos2              0.238052
running_pos3              0.602781
running_pos4              0.864890
running_pos5              0.938434
finish_time               0.033994
declared_weight          -0.059077
age                      -0.019834
rest_time                 0.003478
cumul_rest_time          -0.024625
real_age                 -0.001876
speed                    -0.268330
horse_exp                -0.045238
distance_exp             -0.071984
horse_jockey_exp         -0.091361
prev_speed               -0.093283
prev_result               0.310293
prev_actual_weight       -0.039032
prev_declared_weight     -0.051977
prev_time           

## race_checker

In [None]:
records = records[records.result.notna()]
participants_count = records.groupby(['race_index','date'])['id'].count()
result_sum = records.groupby(['race_index','date'])['result'].sum()

race_checker = (lambda p,r: r == (p / 2)*(1 + p))
race_checker(participants_count, result_sum)

TypeError: incompatible index of inserted column with frame index

In [None]:

def race_checker(p, r):
    
    participants = records.groupby(['race_index','date'])['id'].count()
    result = records.groupby(['race_index','date'])['result'].sum()
    
    for x in p :
        
        if x == 14 and y == 105:
            return True       
        else:
            return False


ValueError: too many values to unpack (expected 2)

In [None]:
records.groupby(['race_index','date'])['result'].sum()

race_index  date      
1.0         2019-09-01      2.0
            2020-09-06     12.0
            2021-09-05     34.0
2.0         2019-09-01     11.0
            2020-09-06     14.0
            2021-09-05     28.0
3.0         2020-09-06     78.0
            2021-09-05     53.0
4.0         2019-09-01     24.0
            2020-09-06     47.0
            2021-09-05     42.0
5.0         2018-09-02      4.0
            2019-09-01     19.0
            2020-09-06     45.0
            2021-09-05     56.0
6.0         2018-09-02      3.0
            2019-09-01     23.0
            2020-09-06     30.0
            2021-09-05     87.0
7.0         2018-09-02      6.0
            2019-09-01      8.0
            2020-09-06     72.0
            2021-09-05     52.0
8.0         2017-09-03      6.0
            2018-09-02     26.0
            2019-09-01     25.0
            2020-09-06     10.0
            2021-09-05     91.0
9.0         2017-09-03      4.0
            2018-09-02     14.0
            2019-