In [1]:
import pandas as pd
import json
from pandas import json_normalize
from tqdm import tqdm_notebook as tqdm
import numpy as np
import ast


pd.set_option('display.precision',6)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

In [2]:
fighters_df = pd.read_csv('data/0.fighters_raw.csv', index_col=0)
fighters_df['dateOfBirth'] = pd.to_datetime(fighters_df['dateOfBirth'])
fighters_df['birthYear'] = fighters_df['dateOfBirth'].dt.year
use_cols = ['name', 'armSpan', 'city', 'country', 'dateOfBirth',
       'height', 'legSwing', 'weight', 'timezone', 'weightCategory.name']

fighters_df[use_cols].isna().sum()

name                      0
armSpan                1942
city                   2308
country                1211
dateOfBirth              70
height                  334
legSwing               2895
weight                  139
timezone               1361
weightCategory.name       0
dtype: int64

In [3]:
df = pd.read_csv('data/0.events_raw.csv', index_col=0)
df = df.set_index('id').sort_index()

df['eventDate.date'] = pd.to_datetime(df['eventDate.date'])
df = df.sort_values('eventDate.date').reset_index()
df['fighters'] = df['fighters'].apply(lambda x: ast.literal_eval(x))

df

Unnamed: 0,id,avgOdds,city,completed,country,duration,eventDate.date,eventDate.timezone,eventDate.timezone_type,fighterId_1,fighterId_2,fighters,link,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId
0,5208,[],Denver,True,USA,26.0,1993-11-12 00:00:00,Europe/Berlin,3,1923,1925,"[{'fighterId': 1923, 'fightStats': {'hitsTotal...",http://www.ufcstats.com/fight-details/567a09fd...,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0
1,5207,[],Denver,True,USA,260.0,1993-11-12 00:00:00,Europe/Berlin,3,1870,1908,"[{'fighterId': 1870, 'fightStats': {'hitsTotal...",http://www.ufcstats.com/fight-details/2d2bbc86...,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1908.0
2,5206,[],Denver,True,USA,109.0,1993-11-12 00:00:00,Europe/Berlin,3,1631,1895,"[{'fighterId': 1631, 'fightStats': {'hitsTotal...",http://www.ufcstats.com/fight-details/46acd54c...,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1631.0
3,5205,[],Denver,True,USA,138.0,1993-11-12 00:00:00,Europe/Berlin,3,1646,1924,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",http://www.ufcstats.com/fight-details/cecdc0da...,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0
4,5204,[],Denver,True,USA,57.0,1993-11-12 00:00:00,Europe/Berlin,3,1631,1646,"[{'fighterId': 1631, 'fightStats': {'hitsTotal...",http://www.ufcstats.com/fight-details/ffd16691...,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7095,40493,"[{'fighterId': 348, 'value': 1.95}, {'fighterI...",Las Vegas,True,USA,300.0,2021-02-27 00:00:00,Europe/Berlin,3,348,651,"[{'fighterId': 348, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/f3ad8ad8...,UFC Fight Night,3.0,America/Los_Angeles,11,Женский наилегчайший вес,['DEC'],0.0
7096,40492,"[{'fighterId': 53, 'value': 2}, {'fighterId': ...",Las Vegas,False,USA,,2021-02-27 01:45:00,Europe/Berlin,3,53,275,"[{'fighterId': 53, 'fightStats': [], 'roundSta...",http://www.ufcstats.com/fight-details/dd3dc5fe...,UFC Fight Night,,America/Los_Angeles,6,Полусредний вес,[],
7097,40507,"[{'fighterId': 145, 'value': 3.95}, {'fighterI...",Las Vegas,False,USA,,2021-02-27 03:45:00,Europe/Berlin,3,145,499,"[{'fighterId': 145, 'fightStats': [], 'roundSt...",http://www.ufcstats.com/fight-details/8ad9b2f0...,UFC Fight Night,,America/Los_Angeles,10,Женский минимальный вес,[],
7098,40453,"[{'fighterId': 628, 'value': 2.1}, {'fighterId...",Las Vegas,True,USA,295.0,2021-03-06 00:00:00,Europe/Berlin,3,628,687,"[{'fighterId': 628, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/873306ad...,UFC 259,1.0,America/Los_Angeles,2,Наилегчайший вес,['KO'],687.0


In [4]:
def parse_odds(df):  # Parse odds from avgOdds dict
    # Добавить кэффы в df
    for i in df.index[:]:
        avgodds = df.loc[i, 'avgOdds']

        if avgodds == '[]':
            continue

        # Преобразую данные о коэффах из строки
        ids1, odd1 = int(avgodds.split()[1].split(',')[0]), float(avgodds.split()[3].split('}')[0])
        ids2, odd2 = int(avgodds.split()[5].split(',')[0]), float(avgodds.split()[7].split('}')[0])

        # Проверить совпадает ли fighterId_1 с ids1 (который в avgOdds)
        if df.loc[i, 'fighterId_1'] == ids1:
            df.at[i, 'odd1'] = odd1
            df.at[i, 'odd2'] = odd2

        if df.loc[i, 'fighterId_1'] == ids2:
            df.at[i, 'odd1'] = odd2
            df.at[i, 'odd2'] = odd1
    
    return df.drop('avgOdds', axis=1)

df = parse_odds(df)
df = df[~df['odd1'].isna()].reset_index()

# When fighterID_1 wins, then Winner == 1 
df['winner'] = df['winnerId'] == df['fighterId_1']

df

Unnamed: 0,index,id,city,completed,country,duration,eventDate.date,eventDate.timezone,eventDate.timezone_type,fighterId_1,fighterId_2,fighters,link,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,odd1,odd2,winner
0,1564,4273,Las Vegas,True,USA,61.0,2008-07-19 00:00:00,Europe/Berlin,3,503,1429,"[{'fighterId': 503, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/4eafb265...,UFC,1.0,America/Los_Angeles,8,Полутяжелый вес,['KO'],503.0,3.53,1.34,True
1,2481,3667,Louisville,True,USA,300.0,2011-03-03 00:00:00,Europe/Berlin,3,983,1366,"[{'fighterId': 983, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/bbbb4c62...,UFC on VERSUS,3.0,America/Chicago,5,Легкий вес,['DEC'],983.0,2.38,1.49,True
2,3262,2969,Newark,True,USA,300.0,2013-04-27 00:00:00,Europe/Berlin,3,1181,1231,"[{'fighterId': 1181, 'fightStats': {'hitsTotal...",http://www.ufcstats.com/fight-details/44ad16cd...,UFC 159,3.0,Europe/London,4,Полулегкий вес,['DEC'],1181.0,2.93,1.53,True
3,3263,2970,Newark,True,USA,273.0,2013-04-27 00:00:00,Europe/Berlin,3,122,1188,"[{'fighterId': 122, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/8151421e...,UFC 159,1.0,Europe/London,8,Полутяжелый вес,['KO'],122.0,1.13,9.00,True
4,3264,2968,Newark,True,USA,300.0,2013-04-27 00:00:00,Europe/Berlin,3,428,1136,"[{'fighterId': 428, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/6d6a1834...,UFC 159,3.0,Europe/London,4,Полулегкий вес,['DEC'],1136.0,2.06,1.95,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3729,7095,40493,Las Vegas,True,USA,300.0,2021-02-27 00:00:00,Europe/Berlin,3,348,651,"[{'fighterId': 348, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/f3ad8ad8...,UFC Fight Night,3.0,America/Los_Angeles,11,Женский наилегчайший вес,['DEC'],0.0,1.95,1.92,False
3730,7096,40492,Las Vegas,False,USA,,2021-02-27 01:45:00,Europe/Berlin,3,53,275,"[{'fighterId': 53, 'fightStats': [], 'roundSta...",http://www.ufcstats.com/fight-details/dd3dc5fe...,UFC Fight Night,,America/Los_Angeles,6,Полусредний вес,[],,2.00,1.87,False
3731,7097,40507,Las Vegas,False,USA,,2021-02-27 03:45:00,Europe/Berlin,3,145,499,"[{'fighterId': 145, 'fightStats': [], 'roundSt...",http://www.ufcstats.com/fight-details/8ad9b2f0...,UFC Fight Night,,America/Los_Angeles,10,Женский минимальный вес,[],,3.95,1.27,False
3732,7098,40453,Las Vegas,True,USA,295.0,2021-03-06 00:00:00,Europe/Berlin,3,628,687,"[{'fighterId': 628, 'fightStats': {'hitsTotal'...",http://www.ufcstats.com/fight-details/873306ad...,UFC 259,1.0,America/Los_Angeles,2,Наилегчайший вес,['KO'],687.0,2.10,1.79,False


In [5]:
def parse_data_from_fight(fightStats, duration):
    if len(fightStats) == 0:
        return [np.nan]*23
    
    hitsTotal = fightStats.get('hitsTotal')
    hitsSuccessful = fightStats.get('hitsSuccessful')
    accentedHitsTotal = fightStats.get('accentedHitsTotal')
    accentedHitsSuccessful = fightStats.get('accentedHitsSuccessful')
    takedownTotal = fightStats.get('takedownTotal')
    takedownSuccessful = fightStats.get('takedownSuccessful')
    accentedHitsPositionDistanceTotal = fightStats.get('accentedHitsPositionDistanceTotal')
    accentedHitsPositionDistanceSuccessful = fightStats.get('accentedHitsPositionDistanceSuccessful')
    accentedHitsPositionClinchTotal = fightStats.get('accentedHitsPositionClinchTotal')
    accentedHitsPositionClinchSuccessful = fightStats.get('accentedHitsPositionClinchSuccessful')
    accentedHitsPositionParterTotal = fightStats.get('accentedHitsPositionParterTotal')
    accentedHitsPositionParterSuccessful = fightStats.get('accentedHitsPositionParterSuccessful')

    try: hitsSuccessful_percent = hitsSuccessful/hitsTotal
    except ZeroDivisionError: hitsSuccessful_percent = np.nan

    try: accentedHitsSuccessful_percent = accentedHitsSuccessful/hitsTotal
    except ZeroDivisionError: accentedHitsSuccessful_percent = np.nan

    try: accentedHits_percent = accentedHitsTotal/hitsTotal
    except ZeroDivisionError: accentedHits_percent = np.nan

    try: takedownSuccessful_percent = takedownSuccessful/takedownTotal
    except ZeroDivisionError: takedownSuccessful_percent = np.nan

    try: accentedHitsPositionDistanceSuccessful_percent = accentedHitsPositionDistanceTotal/accentedHitsPositionDistanceSuccessful
    except ZeroDivisionError: accentedHitsPositionDistanceSuccessful_percent = np.nan

    try: accentedHitsPositionClinchSuccessful_percent = accentedHitsPositionClinchTotal/accentedHitsPositionClinchSuccessful
    except ZeroDivisionError: accentedHitsPositionClinchSuccessful_percent = np.nan

    try: accentedHitsPositionParterSuccessful_percent = accentedHitsPositionParterTotal/accentedHitsPositionParterSuccessful
    except ZeroDivisionError: accentedHitsPositionParterSuccessful_percent = np.nan

    try: takedowns_to_hits = takedownSuccessful / hitsSuccessful
    except ZeroDivisionError: takedowns_to_hits = np.nan

    try: HitsPositionDistance_to_hits = accentedHitsPositionDistanceSuccessful / hitsSuccessful
    except ZeroDivisionError: HitsPositionDistance_to_hits = np.nan

    try: HitsPositionClinch_to_hits = accentedHitsPositionClinchSuccessful / hitsSuccessful
    except ZeroDivisionError: HitsPositionClinch_to_hits = np.nan

    try: HitsPositionParter_to_hits = accentedHitsPositionParterSuccessful / hitsSuccessful
    except ZeroDivisionError: HitsPositionParter_to_hits = np.nan
    
    hitsPM = (60 * hitsTotal) / duration 
    accentedHitsPM = (60 * accentedHitsTotal) / duration 
    takedownsPM = (60 * takedownTotal) / duration 
    accentedHitsDistancePM = (60 * accentedHitsPositionDistanceTotal) / duration 
    accentedHitsClinchPM = (60 * accentedHitsPositionClinchTotal) / duration 
    accentedHitsParterPM = (60 * accentedHitsPositionParterTotal) / duration 
        
    hitsSuccessfulPM = (60 * hitsSuccessful) / duration 
    accentedHitsSuccessfulPM = (60 * accentedHitsSuccessful) / duration 
    takedownsSuccessfulPM = (60 * takedownSuccessful) / duration 
    accentedHitsDistanceSuccessfulPM = (60 * accentedHitsPositionDistanceSuccessful) / duration 
    accentedHitsClinchSuccessfulPM = (60 * accentedHitsPositionClinchSuccessful) / duration 
    accentedHitsParterSuccessfulPM = (60 * accentedHitsPositionParterSuccessful) / duration 
        
    return  hitsPM, accentedHitsPM, takedownsPM, \
            accentedHitsDistancePM, accentedHitsClinchPM, accentedHitsParterPM, \
            hitsSuccessfulPM, accentedHitsSuccessfulPM, takedownsSuccessfulPM, \
            accentedHitsDistanceSuccessfulPM, accentedHitsClinchSuccessfulPM, accentedHitsParterSuccessfulPM, \
            hitsSuccessful_percent, accentedHitsSuccessful_percent, accentedHits_percent, \
            takedownSuccessful_percent, accentedHitsPositionDistanceSuccessful_percent, \
            accentedHitsPositionClinchSuccessful_percent, accentedHitsPositionParterSuccessful_percent, \
            takedowns_to_hits, HitsPositionDistance_to_hits, HitsPositionClinch_to_hits, \
            HitsPositionParter_to_hits 



In [6]:
fightStats_cols = ['hitsPM', 'accentedHitsPM', 'takedownsPM', 
            'accentedHitsDistancePM', 'accentedHitsClinchPM', 'accentedHitsParterPM', 
            
            'hitsSuccessfulPM', 'accentedHitsSuccessfulPM', 'takedownsSuccessfulPM', 
            'accentedHitsDistanceSuccessfulPM', 'accentedHitsClinchSuccessfulPM', 'accentedHitsParterSuccessfulPM',

            'hitsSuccessful_percent', 'accentedHitsSuccessful_percent', 'accentedHits_percent', 
            'takedownSuccessful_percent', 'accentedHitsPositionDistanceSuccessful_prcent',
            'accentedHitsPositionClinchSuccessful_percent',  'accentedHitsPositionParterSuccessful_percent', 
            'takedowns_to_hits', 'HitsPositionDistance_to_hits', 'HitsPositionClinch_to_hits', 
            'HitsPositionParter_to_hits',
            ]

fighter1_cols = [i+'_fighter1' for i in fightStats_cols]
fighter2_cols = [i+'_fighter2' for i in fightStats_cols]


In [7]:
df_stats = pd.DataFrame(index = df.index, columns=fighter1_cols + fighter2_cols + ['roundsCount'])

suppl_cols =  ['id', 'city', 'completed', 'country', 'duration',
               'eventDate.date','fighterId_1', 'fighterId_2', 'name', 'rounds',
               'timezone', 'weightCategory.name', 'winMethods',
               'winnerId', 'odd1', 'odd2', 'winner',
               ]

df_stats[suppl_cols] = df[suppl_cols]
df_stats

Unnamed: 0,hitsPM_fighter1,accentedHitsPM_fighter1,takedownsPM_fighter1,accentedHitsDistancePM_fighter1,accentedHitsClinchPM_fighter1,accentedHitsParterPM_fighter1,hitsSuccessfulPM_fighter1,accentedHitsSuccessfulPM_fighter1,takedownsSuccessfulPM_fighter1,accentedHitsDistanceSuccessfulPM_fighter1,accentedHitsClinchSuccessfulPM_fighter1,accentedHitsParterSuccessfulPM_fighter1,hitsSuccessful_percent_fighter1,accentedHitsSuccessful_percent_fighter1,accentedHits_percent_fighter1,takedownSuccessful_percent_fighter1,accentedHitsPositionDistanceSuccessful_prcent_fighter1,accentedHitsPositionClinchSuccessful_percent_fighter1,accentedHitsPositionParterSuccessful_percent_fighter1,takedowns_to_hits_fighter1,HitsPositionDistance_to_hits_fighter1,HitsPositionClinch_to_hits_fighter1,HitsPositionParter_to_hits_fighter1,hitsPM_fighter2,accentedHitsPM_fighter2,takedownsPM_fighter2,accentedHitsDistancePM_fighter2,accentedHitsClinchPM_fighter2,accentedHitsParterPM_fighter2,hitsSuccessfulPM_fighter2,accentedHitsSuccessfulPM_fighter2,takedownsSuccessfulPM_fighter2,accentedHitsDistanceSuccessfulPM_fighter2,accentedHitsClinchSuccessfulPM_fighter2,accentedHitsParterSuccessfulPM_fighter2,hitsSuccessful_percent_fighter2,accentedHitsSuccessful_percent_fighter2,accentedHits_percent_fighter2,takedownSuccessful_percent_fighter2,accentedHitsPositionDistanceSuccessful_prcent_fighter2,accentedHitsPositionClinchSuccessful_percent_fighter2,accentedHitsPositionParterSuccessful_percent_fighter2,takedowns_to_hits_fighter2,HitsPositionDistance_to_hits_fighter2,HitsPositionClinch_to_hits_fighter2,HitsPositionParter_to_hits_fighter2,roundsCount,id,city,completed,country,duration,eventDate.date,fighterId_1,fighterId_2,name,rounds,timezone,weightCategory.name,winMethods,winnerId,odd1,odd2,winner
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4273,Las Vegas,True,USA,61.0,2008-07-19 00:00:00,503,1429,UFC,1.0,America/Los_Angeles,Полутяжелый вес,['KO'],503.0,3.53,1.34,True
1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3667,Louisville,True,USA,300.0,2011-03-03 00:00:00,983,1366,UFC on VERSUS,3.0,America/Chicago,Легкий вес,['DEC'],983.0,2.38,1.49,True
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2969,Newark,True,USA,300.0,2013-04-27 00:00:00,1181,1231,UFC 159,3.0,Europe/London,Полулегкий вес,['DEC'],1181.0,2.93,1.53,True
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2970,Newark,True,USA,273.0,2013-04-27 00:00:00,122,1188,UFC 159,1.0,Europe/London,Полутяжелый вес,['KO'],122.0,1.13,9.00,True
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2968,Newark,True,USA,300.0,2013-04-27 00:00:00,428,1136,UFC 159,3.0,Europe/London,Полулегкий вес,['DEC'],1136.0,2.06,1.95,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3729,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40493,Las Vegas,True,USA,300.0,2021-02-27 00:00:00,348,651,UFC Fight Night,3.0,America/Los_Angeles,Женский наилегчайший вес,['DEC'],0.0,1.95,1.92,False
3730,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40492,Las Vegas,False,USA,,2021-02-27 01:45:00,53,275,UFC Fight Night,,America/Los_Angeles,Полусредний вес,[],,2.00,1.87,False
3731,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40507,Las Vegas,False,USA,,2021-02-27 03:45:00,145,499,UFC Fight Night,,America/Los_Angeles,Женский минимальный вес,[],,3.95,1.27,False
3732,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40453,Las Vegas,True,USA,295.0,2021-03-06 00:00:00,628,687,UFC 259,1.0,America/Los_Angeles,Наилегчайший вес,['KO'],687.0,2.10,1.79,False


In [8]:
for i in tqdm(df.index[:]):
    fighters = df.loc[i, 'fighters']
    fighterId_1, fighterId_2, duration = df.loc[i, ['fighterId_1', 'fighterId_2', 'duration']]

    # Check index of fighters in dict
    if fighters[0]['fighterId'] == fighterId_1: 
        fighterId_1_ind = 0
        fighterId_2_ind = 1
    else:
        fighterId_1_ind = 1
        fighterId_2_ind = 0
        
    fighter1_stats = parse_data_from_fight(fighters[fighterId_1_ind]['fightStats'], duration)
    fighter2_stats = parse_data_from_fight(fighters[fighterId_2_ind]['fightStats'], duration)
    
    df_stats.loc[i, fighter1_cols] = fighter1_stats
    df_stats.loc[i, fighter2_cols] = fighter2_stats
    
df_stats    

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(df.index[:]):


  0%|          | 0/3734 [00:00<?, ?it/s]

Unnamed: 0,hitsPM_fighter1,accentedHitsPM_fighter1,takedownsPM_fighter1,accentedHitsDistancePM_fighter1,accentedHitsClinchPM_fighter1,accentedHitsParterPM_fighter1,hitsSuccessfulPM_fighter1,accentedHitsSuccessfulPM_fighter1,takedownsSuccessfulPM_fighter1,accentedHitsDistanceSuccessfulPM_fighter1,accentedHitsClinchSuccessfulPM_fighter1,accentedHitsParterSuccessfulPM_fighter1,hitsSuccessful_percent_fighter1,accentedHitsSuccessful_percent_fighter1,accentedHits_percent_fighter1,takedownSuccessful_percent_fighter1,accentedHitsPositionDistanceSuccessful_prcent_fighter1,accentedHitsPositionClinchSuccessful_percent_fighter1,accentedHitsPositionParterSuccessful_percent_fighter1,takedowns_to_hits_fighter1,HitsPositionDistance_to_hits_fighter1,HitsPositionClinch_to_hits_fighter1,HitsPositionParter_to_hits_fighter1,hitsPM_fighter2,accentedHitsPM_fighter2,takedownsPM_fighter2,accentedHitsDistancePM_fighter2,accentedHitsClinchPM_fighter2,accentedHitsParterPM_fighter2,hitsSuccessfulPM_fighter2,accentedHitsSuccessfulPM_fighter2,takedownsSuccessfulPM_fighter2,accentedHitsDistanceSuccessfulPM_fighter2,accentedHitsClinchSuccessfulPM_fighter2,accentedHitsParterSuccessfulPM_fighter2,hitsSuccessful_percent_fighter2,accentedHitsSuccessful_percent_fighter2,accentedHits_percent_fighter2,takedownSuccessful_percent_fighter2,accentedHitsPositionDistanceSuccessful_prcent_fighter2,accentedHitsPositionClinchSuccessful_percent_fighter2,accentedHitsPositionParterSuccessful_percent_fighter2,takedowns_to_hits_fighter2,HitsPositionDistance_to_hits_fighter2,HitsPositionClinch_to_hits_fighter2,HitsPositionParter_to_hits_fighter2,roundsCount,id,city,completed,country,duration,eventDate.date,fighterId_1,fighterId_2,name,rounds,timezone,weightCategory.name,winMethods,winnerId,odd1,odd2,winner
0,12.786885,12.786885,0.0,4.918033,0.0,7.868852,12.786885,12.786885,0.0,4.918033,0.0,7.868852,1.0,1.0,1.0,,1.0,,1.0,0.0,0.384615,0.0,0.615385,2.95082,2.95082,0.0,2.95082,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,4273,Las Vegas,True,USA,61.0,2008-07-19 00:00:00,503,1429,UFC,1.0,America/Los_Angeles,Полутяжелый вес,['KO'],503.0,3.53,1.34,True
1,26.4,20.6,2.2,14.2,3.4,3.0,16.4,11.0,1.0,5.6,2.4,3.0,0.621212,0.416667,0.780303,0.454545,2.535714,1.416667,1.0,0.060976,0.341463,0.146341,0.182927,11.0,9.0,1.2,8.0,0.8,0.2,4.2,2.4,0.4,1.6,0.6,0.2,0.381818,0.218182,0.818182,0.333333,5.0,1.333333,1.0,0.095238,0.380952,0.142857,0.047619,,3667,Louisville,True,USA,300.0,2011-03-03 00:00:00,983,1366,UFC on VERSUS,3.0,America/Chicago,Легкий вес,['DEC'],983.0,2.38,1.49,True
2,22.2,10.8,2.6,7.6,1.2,2.0,16.8,6.0,1.0,3.2,0.8,2.0,0.756757,0.27027,0.486486,0.384615,2.375,1.5,1.0,0.059524,0.190476,0.047619,0.119048,18.4,15.4,0.0,14.6,0.4,0.4,6.0,3.0,0.0,2.2,0.4,0.4,0.326087,0.163043,0.836957,,6.636364,1.0,1.0,0.0,0.366667,0.066667,0.066667,,2969,Newark,True,USA,300.0,2013-04-27 00:00:00,1181,1231,UFC 159,3.0,Europe/London,Полулегкий вес,['DEC'],1181.0,2.93,1.53,True
3,9.450549,5.934066,1.318681,0.21978,1.538462,4.175824,7.692308,4.395604,0.659341,0.0,1.098901,3.296703,0.813953,0.465116,0.627907,0.5,,1.4,1.266667,0.085714,0.0,0.142857,0.428571,11.208791,3.296703,0.0,1.098901,2.197802,0.0,8.351648,1.318681,0.0,0.0,1.318681,0.0,0.745098,0.117647,0.294118,,,1.666667,,0.0,0.0,0.157895,0.0,,2970,Newark,True,USA,273.0,2013-04-27 00:00:00,122,1188,UFC 159,1.0,Europe/London,Полутяжелый вес,['KO'],122.0,1.13,9.00,True
4,39.2,18.4,0.4,12.0,3.8,2.6,26.6,8.2,0.2,3.0,3.2,2.0,0.678571,0.209184,0.469388,0.5,4.0,1.1875,1.3,0.007519,0.112782,0.120301,0.075188,40.2,21.8,0.8,10.4,6.4,5.0,25.6,10.0,0.2,4.4,3.0,2.6,0.636816,0.248756,0.542289,0.25,2.363636,2.133333,1.923077,0.007812,0.171875,0.117188,0.101562,,2968,Newark,True,USA,300.0,2013-04-27 00:00:00,428,1136,UFC 159,3.0,Europe/London,Полулегкий вес,['DEC'],1136.0,2.06,1.95,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3729,28.4,16.0,0.0,8.0,7.2,0.8,22.2,10.4,0.0,4.0,5.8,0.6,0.78169,0.366197,0.56338,,2.0,1.241379,1.333333,0.0,0.18018,0.261261,0.027027,31.2,14.4,2.4,12.6,1.2,0.6,21.4,6.0,0.6,4.6,1.0,0.4,0.685897,0.192308,0.461538,0.25,2.73913,1.2,1.5,0.028037,0.214953,0.046729,0.018692,,40493,Las Vegas,True,USA,300.0,2021-02-27 00:00:00,348,651,UFC Fight Night,3.0,America/Los_Angeles,Женский наилегчайший вес,['DEC'],0.0,1.95,1.92,False
3730,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40492,Las Vegas,False,USA,,2021-02-27 01:45:00,53,275,UFC Fight Night,,America/Los_Angeles,Полусредний вес,[],,2.00,1.87,False
3731,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40507,Las Vegas,False,USA,,2021-02-27 03:45:00,145,499,UFC Fight Night,,America/Los_Angeles,Женский минимальный вес,[],,3.95,1.27,False
3732,8.745763,3.457627,0.20339,3.254237,0.0,0.20339,5.491525,1.220339,0.20339,1.016949,0.0,0.20339,0.627907,0.139535,0.395349,1.0,3.2,,1.0,0.037037,0.185185,0.0,0.037037,6.508475,4.474576,0.0,4.067797,0.40678,0.0,4.271186,2.644068,0.0,2.237288,0.40678,0.0,0.65625,0.40625,0.6875,,1.818182,1.0,,0.0,0.52381,0.095238,0.0,,40453,Las Vegas,True,USA,295.0,2021-03-06 00:00:00,628,687,UFC 259,1.0,America/Los_Angeles,Наилегчайший вес,['KO'],687.0,2.10,1.79,False


In [9]:
def get_figher_statistics_from_past(df_stats, fighterId, eventDate):
    '''
    Aggregate statistics from fighter from the past.
    Check both positions 1 and 2.
    '''
    first_pos_df  = df_stats[(df_stats['fighterId_1'] == fighterId) & (df_stats['eventDate.date'] < eventDate)]
    first_pos_df  = pd.DataFrame(first_pos_df[fighter1_cols].values)

    second_pos_df = df_stats[(df_stats['fighterId_2'] == fighterId) & (df_stats['eventDate.date'] < eventDate)]
    second_pos_df = pd.DataFrame(second_pos_df[fighter2_cols].values)

    joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)

    return joined_df.mean()

valid_date = '2019-01-01 00:00:00'
test_date = '2020-01-01 00:00:00'

dates = [valid_date, test_date]
train_df_stats = df_stats[df_stats['eventDate.date'] < valid_date]
valid_df_stats = df_stats[(df_stats['eventDate.date'] > valid_date) & (df_stats['eventDate.date'] < test_date)]
test_df_stats = df_stats[df_stats['eventDate.date'] > test_date]

train_df_stats = df_stats[df_stats['eventDate.date'] < valid_date]
valid_df_stats = df_stats[(df_stats['eventDate.date'] > valid_date) & (df_stats['eventDate.date'] < test_date)]
test_df_stats = df_stats[df_stats['eventDate.date'] > test_date]

train_df_stats.shape, valid_df_stats.shape, test_df_stats.shape

((2622, 64), (501, 64), (611, 64))

In [10]:
train_df = train_df_stats.copy()
train_df['fighter1_fightsAmount'] = 0 
train_df['fighter2_fightsAmount'] = 0 

for i in tqdm(train_df_stats.index):
    fighterId_1, fighterId_2 = train_df.loc[i, ['fighterId_1', 'fighterId_2']]
    
    fighter1_history_stats = get_figher_statistics_from_past(train_df, fighterId_1, valid_date)
    train_df.loc[i, fighter1_cols] = fighter1_history_stats.values
    
    fighter2_history_stats = get_figher_statistics_from_past(train_df, fighterId_2, valid_date)
    train_df.loc[i, fighter2_cols] = fighter2_history_stats.values
    
    fighter1_fightsAmount = (((train_df['fighterId_1'] == fighterId_1) & (train_df['eventDate.date'] < valid_date)) | 
                             ((train_df['fighterId_2'] == fighterId_1) & (train_df['eventDate.date'] < valid_date))).sum()
    
    fighter2_fightsAmount = (((train_df['fighterId_1'] == fighterId_2) & (train_df['eventDate.date'] < valid_date)) 
                             |((train_df['fighterId_2'] == fighterId_2) & (train_df['eventDate.date'] < valid_date))).sum()
    
    train_df.loc[i, ['fighter1_fightsAmount', 
                       'fighter2_fightsAmount']] = fighter1_fightsAmount, fighter2_fightsAmount
train_df

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(train_df_stats.index):


  0%|          | 0/2622 [00:00<?, ?it/s]

  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(dro

Unnamed: 0,hitsPM_fighter1,accentedHitsPM_fighter1,takedownsPM_fighter1,accentedHitsDistancePM_fighter1,accentedHitsClinchPM_fighter1,accentedHitsParterPM_fighter1,hitsSuccessfulPM_fighter1,accentedHitsSuccessfulPM_fighter1,takedownsSuccessfulPM_fighter1,accentedHitsDistanceSuccessfulPM_fighter1,accentedHitsClinchSuccessfulPM_fighter1,accentedHitsParterSuccessfulPM_fighter1,hitsSuccessful_percent_fighter1,accentedHitsSuccessful_percent_fighter1,accentedHits_percent_fighter1,takedownSuccessful_percent_fighter1,accentedHitsPositionDistanceSuccessful_prcent_fighter1,accentedHitsPositionClinchSuccessful_percent_fighter1,accentedHitsPositionParterSuccessful_percent_fighter1,takedowns_to_hits_fighter1,HitsPositionDistance_to_hits_fighter1,HitsPositionClinch_to_hits_fighter1,HitsPositionParter_to_hits_fighter1,hitsPM_fighter2,accentedHitsPM_fighter2,takedownsPM_fighter2,accentedHitsDistancePM_fighter2,accentedHitsClinchPM_fighter2,accentedHitsParterPM_fighter2,hitsSuccessfulPM_fighter2,accentedHitsSuccessfulPM_fighter2,takedownsSuccessfulPM_fighter2,accentedHitsDistanceSuccessfulPM_fighter2,accentedHitsClinchSuccessfulPM_fighter2,accentedHitsParterSuccessfulPM_fighter2,hitsSuccessful_percent_fighter2,accentedHitsSuccessful_percent_fighter2,accentedHits_percent_fighter2,takedownSuccessful_percent_fighter2,accentedHitsPositionDistanceSuccessful_prcent_fighter2,accentedHitsPositionClinchSuccessful_percent_fighter2,accentedHitsPositionParterSuccessful_percent_fighter2,takedowns_to_hits_fighter2,HitsPositionDistance_to_hits_fighter2,HitsPositionClinch_to_hits_fighter2,HitsPositionParter_to_hits_fighter2,roundsCount,id,city,completed,country,duration,eventDate.date,fighterId_1,fighterId_2,name,rounds,timezone,weightCategory.name,winMethods,winnerId,odd1,odd2,winner,fighter1_fightsAmount,fighter2_fightsAmount
0,21.488098,15.715088,0.0,12.58256,1.257895,1.874633,14.205776,9.478651,0.0,6.711035,0.959649,1.807967,0.672354,0.492925,0.787932,,2.199784,1.25,1.25,0.0,0.526001,0.059969,0.132857,2.95082,2.95082,0.0,2.95082,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,4273,Las Vegas,True,USA,61.0,2008-07-19,503,1429,UFC,1.0,America/Los_Angeles,Полутяжелый вес,['KO'],503.0,3.53,1.34,True,6,1
1,30.651827,26.498256,2.412708,20.891113,3.532143,2.075,17.088787,13.185216,0.4,8.678073,3.332143,1.175,0.497159,0.376625,0.869485,0.235985,2.787169,1.145833,2.277778,0.034699,0.593536,0.082658,0.088593,11.0,9.0,1.2,8.0,0.8,0.2,4.2,2.4,0.4,1.6,0.6,0.2,0.381818,0.218182,0.818182,0.333333,5.0,1.333333,1.0,0.095238,0.380952,0.142857,0.047619,,3667,Louisville,True,USA,300.0,2011-03-03,983,1366,UFC on VERSUS,3.0,America/Chicago,Легкий вес,['DEC'],983.0,2.38,1.49,True,8,1
2,21.0,14.4,2.6,12.3,1.0,1.1,12.1,6.1,0.6,4.2,0.8,1.1,0.565247,0.291701,0.697789,0.230769,2.822115,1.25,1.0,0.043275,0.446589,0.077864,0.073037,18.4,15.4,0.0,14.6,0.4,0.4,6.0,3.0,0.0,2.2,0.4,0.4,0.326087,0.163043,0.836957,,6.636364,1.0,1.0,0.0,0.366667,0.066667,0.066667,,2969,Newark,True,USA,300.0,2013-04-27,1181,1231,UFC 159,3.0,Europe/London,Полулегкий вес,['DEC'],1181.0,2.93,1.53,True,2,1
3,38.730283,35.616333,1.529616,27.775974,5.178815,2.661544,24.179319,21.202,0.525191,15.045355,3.923587,2.233057,0.652539,0.540325,0.881584,0.415152,1.898378,1.34821,1.781111,0.029785,0.546326,0.1593,0.133634,10.736761,2.868936,0.290692,1.007415,1.373716,0.487805,8.050184,1.03289,0.139373,0.081633,0.672512,0.278746,0.716864,0.101487,0.320384,0.333333,7.0,2.055556,1.75,0.011494,0.022222,0.102823,0.022989,,2970,Newark,True,USA,273.0,2013-04-27,122,1188,UFC 159,1.0,Europe/London,Полутяжелый вес,['KO'],122.0,1.13,9.00,True,6,3
4,79.297047,64.70611,0.807491,58.069817,3.369763,3.266529,42.049615,28.860789,0.291386,23.676044,2.567653,2.617091,0.547508,0.325347,0.748979,0.416667,2.963317,1.506944,1.183333,0.00507,0.506825,0.068305,0.058396,24.228571,18.388571,0.16,13.232727,1.72,3.435844,12.274286,7.474286,0.04,4.838442,0.84,1.795844,0.481865,0.326102,0.808458,0.25,3.042727,2.244444,1.884615,0.001563,0.469873,0.049736,0.188061,,2968,Newark,True,USA,300.0,2013-04-27,428,1136,UFC 159,3.0,Europe/London,Полулегкий вес,['DEC'],1136.0,2.06,1.95,False,3,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2617,35.880336,26.886581,2.296592,18.5169,3.339109,5.030572,20.17403,12.854511,1.086845,7.127744,1.981456,3.745311,0.519396,0.341271,0.785349,0.412988,2.816148,2.09125,1.213365,0.043784,0.479754,0.097526,0.123712,12.678139,11.894895,0.860619,9.008993,1.419943,1.465959,6.706839,5.951862,0.188653,3.916512,0.881601,1.153749,0.531124,0.478565,0.930293,0.330572,2.444131,1.564762,1.350785,0.040929,0.637121,0.126602,0.125471,,321,Los Angeles,True,USA,300.0,2018-12-29,23,188,UFC 232,3.0,America/Los_Angeles,Полутяжелый вес,['DEC'],23.0,2.10,1.85,True,13,10
2618,30.019841,29.647524,0.331688,28.607934,0.833401,0.20619,11.907286,11.57101,0.163101,10.899866,0.522782,0.148362,0.373519,0.360912,0.985701,0.387706,2.883184,1.63784,1.508333,0.014169,0.902436,0.050008,0.010179,32.019373,27.724217,0.525926,20.680342,2.494587,4.549288,19.17265,15.260969,0.355556,9.574929,2.02792,3.65812,0.627983,0.497025,0.855494,0.5,2.034786,1.116667,1.262678,0.017602,0.50806,0.097135,0.193872,,325,Los Angeles,True,USA,300.0,2018-12-29,352,465,UFC 232,2.0,America/Los_Angeles,Легчайший вес,['KO'],465.0,3.99,1.30,False,6,3
2619,17.768552,16.726238,1.021014,12.184433,1.096329,3.445476,9.752363,8.727021,0.59536,5.180509,0.801038,2.745474,0.563189,0.525987,0.96238,0.585355,2.240366,1.934719,1.505327,0.058488,0.608359,0.069143,0.266338,29.705411,27.705877,1.145818,18.328411,3.039229,6.338237,17.332112,15.613561,0.427766,9.179386,2.080811,4.353364,0.58124,0.525119,0.934457,0.466104,2.150014,1.506306,1.175617,0.022276,0.58208,0.120018,0.212322,,324,Los Angeles,True,USA,254.0,2018-12-29,225,548,UFC 232,2.0,America/Los_Angeles,Полулегкий вес,['KO'],548.0,1.85,2.17,False,8,6
2620,16.454797,10.494333,0.192593,9.569032,0.369746,0.555556,9.469076,4.351807,0.02963,3.781437,0.103704,0.466667,0.484887,0.300264,0.742679,0.125,2.459087,1.5,1.080357,0.000877,0.662676,0.009783,0.017164,22.695697,17.689705,0.0,16.080251,0.698743,0.910712,8.959017,5.301227,0.0,4.304036,0.630078,0.367112,0.490021,0.251729,0.679003,,3.386403,1.0,2.625,0.0,0.398191,0.039775,0.075699,,326,Los Angeles,True,USA,166.0,2018-12-29,423,505,UFC 232,1.0,America/Los_Angeles,Полулегкий вес,['SUB'],423.0,1.23,4.80,True,3,4


In [11]:
valid_df = valid_df_stats.copy()
valid_df['fighter1_fightsAmount'] = 0 
valid_df['fighter2_fightsAmount'] = 0 

for i in tqdm(valid_df_stats.index):
    fighterId_1, fighterId_2 = valid_df.loc[i, ['fighterId_1', 'fighterId_2']]
    
    fighter1_history_stats = get_figher_statistics_from_past(df_stats, fighterId_1, test_date)
    valid_df.loc[i, fighter1_cols] = fighter1_history_stats.values
    
    fighter2_history_stats = get_figher_statistics_from_past(df_stats, fighterId_2, test_date)
    valid_df.loc[i, fighter2_cols] = fighter2_history_stats.values
    
    fighter1_fightsAmount = (((valid_df['fighterId_1'] == fighterId_1) & (valid_df['eventDate.date'] < test_date)) | 
                             ((valid_df['fighterId_2'] == fighterId_1) & (valid_df['eventDate.date'] < test_date))).sum()
    
    fighter2_fightsAmount = (((valid_df['fighterId_1'] == fighterId_2) & (valid_df['eventDate.date'] < test_date)) 
                             |((valid_df['fighterId_2'] == fighterId_2) & (valid_df['eventDate.date'] < test_date))).sum()
    
    valid_df.loc[i, ['fighter1_fightsAmount', 
                       'fighter2_fightsAmount']] = fighter1_fightsAmount, fighter2_fightsAmount
    
valid_df

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(valid_df_stats.index):


  0%|          | 0/501 [00:00<?, ?it/s]

  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(dro

Unnamed: 0,hitsPM_fighter1,accentedHitsPM_fighter1,takedownsPM_fighter1,accentedHitsDistancePM_fighter1,accentedHitsClinchPM_fighter1,accentedHitsParterPM_fighter1,hitsSuccessfulPM_fighter1,accentedHitsSuccessfulPM_fighter1,takedownsSuccessfulPM_fighter1,accentedHitsDistanceSuccessfulPM_fighter1,accentedHitsClinchSuccessfulPM_fighter1,accentedHitsParterSuccessfulPM_fighter1,hitsSuccessful_percent_fighter1,accentedHitsSuccessful_percent_fighter1,accentedHits_percent_fighter1,takedownSuccessful_percent_fighter1,accentedHitsPositionDistanceSuccessful_prcent_fighter1,accentedHitsPositionClinchSuccessful_percent_fighter1,accentedHitsPositionParterSuccessful_percent_fighter1,takedowns_to_hits_fighter1,HitsPositionDistance_to_hits_fighter1,HitsPositionClinch_to_hits_fighter1,HitsPositionParter_to_hits_fighter1,hitsPM_fighter2,accentedHitsPM_fighter2,takedownsPM_fighter2,accentedHitsDistancePM_fighter2,accentedHitsClinchPM_fighter2,accentedHitsParterPM_fighter2,hitsSuccessfulPM_fighter2,accentedHitsSuccessfulPM_fighter2,takedownsSuccessfulPM_fighter2,accentedHitsDistanceSuccessfulPM_fighter2,accentedHitsClinchSuccessfulPM_fighter2,accentedHitsParterSuccessfulPM_fighter2,hitsSuccessful_percent_fighter2,accentedHitsSuccessful_percent_fighter2,accentedHits_percent_fighter2,takedownSuccessful_percent_fighter2,accentedHitsPositionDistanceSuccessful_prcent_fighter2,accentedHitsPositionClinchSuccessful_percent_fighter2,accentedHitsPositionParterSuccessful_percent_fighter2,takedowns_to_hits_fighter2,HitsPositionDistance_to_hits_fighter2,HitsPositionClinch_to_hits_fighter2,HitsPositionParter_to_hits_fighter2,roundsCount,id,city,completed,country,duration,eventDate.date,fighterId_1,fighterId_2,name,rounds,timezone,weightCategory.name,winMethods,winnerId,odd1,odd2,winner,fighter1_fightsAmount,fighter2_fightsAmount
2622,34.649763,29.065403,0.34218,26.123223,2.94218,0.0,19.321801,14.179621,0.14218,11.679621,2.5,0.0,0.453592,0.354911,0.88071,0.5,2.668803,1.12,,0.05,0.740503,0.069832,0.0,60.547703,48.67744,0.366376,24.223598,2.173563,22.280279,35.350052,25.440958,0.189504,11.248085,1.212632,12.980241,0.519321,0.41088,0.868946,0.5,2.241294,1.661111,1.520833,0.016742,0.574776,0.046292,0.200478,,340,Brooklyn,True,USA,211.0,2019-01-20 02:00:00,41,379,UFC Fight Night 143,1.0,America/New_York,Легчайший вес,['SUB'],379.0,5.48,1.20,False,2,3
2623,11.348693,10.943702,0.0,8.410272,0.150754,2.382676,5.655183,5.400945,0.0,3.503109,0.150754,1.747083,0.456171,0.440042,0.962132,,3.791667,1.0,1.277778,0.0,0.530357,0.0625,0.378571,2.935145,2.850399,1.621034,2.426671,0.423729,0.0,1.630272,1.545526,0.0,1.291289,0.254237,0.0,0.515152,0.5,0.984848,0.0,2.166667,1.666667,,0.0,0.888889,0.083333,0.0,,338,Brooklyn,True,USA,236.0,2019-01-20 02:00:00,226,649,UFC Fight Night 143,1.0,America/New_York,Полутяжелый вес,['KO'],226.0,1.40,3.75,True,2,3
2624,39.985714,37.742857,2.142857,31.957143,0.4,5.385714,15.257143,13.014286,2.142857,12.214286,0.4,0.4,0.398026,0.358004,0.959978,1.0,2.666667,1.0,2.75,0.083333,0.729167,0.083333,0.083333,47.576679,36.699086,2.693747,27.124854,5.053893,4.520339,25.9154,16.252247,0.845143,10.361679,2.72251,3.168057,0.542867,0.361012,0.797066,0.341425,2.97086,1.693167,1.530072,0.040363,0.404411,0.126181,0.145539,,341,Brooklyn,True,USA,300.0,2019-01-20 02:00:00,444,579,UFC Fight Night 143,3.0,America/New_York,Легкий вес,['DEC'],579.0,1.89,2.15,False,1,1
2625,43.599667,36.983476,1.452707,25.088651,4.234773,7.660052,25.072964,19.312444,0.778292,10.946674,2.812513,5.553257,0.549617,0.398291,0.827542,0.485606,3.037891,1.531113,1.577919,0.033019,0.389529,0.181719,0.159021,101.105755,86.279785,1.551154,75.580621,7.218616,3.480549,50.929473,38.268034,0.277922,30.79912,5.000996,2.467918,0.446933,0.35372,0.888931,0.302778,3.023864,1.682246,1.548408,0.012695,0.653925,0.082896,0.092185,,332,Brooklyn,True,USA,32.0,2019-01-20 02:00:00,493,567,UFC Fight Night 143,1.0,America/New_York,Наилегчайший вес,['KO'],493.0,3.69,1.28,True,2,1
2626,12.176294,11.243597,0.405405,9.652888,1.185303,0.405405,4.384384,3.582406,0.135135,2.780427,0.396573,0.405405,0.295074,0.239598,0.936394,0.333333,2.754808,2.75,1.0,0.03125,0.632353,0.090074,0.09375,25.511261,24.653153,0.05,12.902252,0.809459,10.941441,18.197297,17.339189,0.0,5.68964,0.708108,10.941441,0.582874,0.533668,0.950794,0.0,2.494805,1.083333,1.0,0.0,0.564512,0.056371,0.258135,,331,Brooklyn,True,USA,148.0,2019-01-20 02:00:00,174,512,UFC Fight Night 143,2.0,America/New_York,Тяжелый вес,['DQ'],174.0,5.01,1.20,True,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3118,41.259703,35.481877,0.281206,27.707593,1.163081,6.611203,19.459609,14.224207,0.0,8.047232,0.942679,5.234296,0.545315,0.414032,0.856331,0.0,3.464035,1.194444,1.163293,0.0,0.421954,0.0538,0.272226,44.099738,43.110382,0.540854,41.00437,1.260751,0.84526,14.819457,13.894567,0.057143,12.377341,0.946252,0.570974,0.348584,0.324285,0.972624,0.045455,3.373425,1.692063,1.190476,0.002551,0.80126,0.091161,0.049447,,39661,Busan,True,South Korea,198.0,2019-12-21 00:00:00,491,551,UFC Fight Night 165,1.0,Asia/Seoul,Полулегкий вес,['KO'],491.0,1.89,1.94,True,2,1
3119,18.160799,16.478481,0.194824,15.449006,0.930303,0.099171,7.333237,5.745309,0.075758,5.139211,0.539394,0.066704,0.416565,0.306364,0.884206,0.4,3.215613,1.791667,1.0,0.01422,0.698689,0.078542,0.008163,29.823897,26.530057,0.4832,22.026663,2.96043,1.542963,16.033147,12.989306,0.178852,9.522789,2.238411,1.228106,0.57347,0.423292,0.841367,0.588889,2.303305,1.610403,1.183036,0.01252,0.554077,0.069348,0.122682,,39662,Busan,True,South Korea,257.0,2019-12-21 00:00:00,442,476,UFC Fight Night 165,1.0,Asia/Seoul,Наилегчайший вес,['KO'],476.0,2.88,1.43,False,3,3
3120,20.366341,16.744553,0.0,13.252374,2.018994,1.473184,12.674651,9.370461,0.0,6.178282,1.868994,1.323184,0.63392,0.489444,0.838195,,2.229456,1.191358,2.0,0.0,0.558892,0.135945,0.089753,53.847164,52.58666,0.252101,47.434349,3.277311,1.875,24.024422,22.763918,0.0,17.863708,3.02521,1.875,0.470513,0.457692,0.987179,0.0,4.282609,1.083333,1.0,0.0,0.501163,0.069767,0.4,,39704,Busan,True,South Korea,64.0,2019-12-21 00:00:00,316,2573,UFC Fight Night 165,1.0,Asia/Seoul,Не определена,['KO'],2573.0,1.85,1.99,False,2,2
3121,21.149848,19.995853,0.30794,16.452993,2.601156,0.941704,14.022136,12.868141,0.30794,9.94116,2.254335,0.672646,0.712963,0.537037,0.824074,1.0,1.660606,1.153846,1.4,0.042857,0.5,0.092857,0.178571,17.410762,15.376233,0.0,12.776233,2.6,0.0,8.372646,6.638117,0.0,5.238117,1.4,0.0,0.515432,0.410494,0.885802,,2.244681,1.857143,,0.0,0.705195,0.090909,0.0,,39688,Busan,True,South Korea,223.0,2019-12-21 00:00:00,189,1927,UFC Fight Night 165,1.0,Asia/Seoul,Не определена,['SUB'],189.0,2.58,1.51,True,1,2


In [12]:
test_df = test_df_stats.copy()
test_df['fighter1_fightsAmount'] = 0 
test_df['fighter2_fightsAmount'] = 0 

for i in tqdm(test_df_stats.index):
    fighterId_1, fighterId_2 = test_df.loc[i, ['fighterId_1', 'fighterId_2']]
    
    fighter1_history_stats = get_figher_statistics_from_past(df_stats, fighterId_1, test_date)
    test_df.loc[i, fighter1_cols] = fighter1_history_stats.values
    
    fighter2_history_stats = get_figher_statistics_from_past(df_stats, fighterId_2, test_date)
    test_df.loc[i, fighter2_cols] = fighter2_history_stats.values
    
    fighter1_fightsAmount = (((test_df['fighterId_1'] == fighterId_1)) | 
                             ((test_df['fighterId_2'] == fighterId_1))).sum()
    
    fighter2_fightsAmount = (((test_df['fighterId_1'] == fighterId_2) ) 
                             |((test_df['fighterId_2'] == fighterId_2))).sum()
    
    test_df.loc[i, ['fighter1_fightsAmount', 
                       'fighter2_fightsAmount']] = fighter1_fightsAmount, fighter2_fightsAmount

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm(test_df_stats.index):


  0%|          | 0/611 [00:00<?, ?it/s]

  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(drop=True)
  joined_df = first_pos_df.append(second_pos_df).reset_index(dro

In [13]:
train_df.to_pickle('data/train_df.pkl')
valid_df.to_pickle('data/valid_df.pkl')
test_df.to_pickle('data/test_df.pkl')

In [14]:
train_df = pd.read_pickle('data/train_df.pkl')
valid_df = pd.read_pickle('data/valid_df.pkl')
test_df = pd.read_pickle('data/test_df.pkl')

In [15]:
def combine_df(to_combine_df):
    df_combined = pd.DataFrame(to_combine_df[fighter1_cols].values - to_combine_df[fighter2_cols].values, 
                               index=to_combine_df.index)
    
    df_combined.columns = fightStats_cols
    # df_combined = df_combined.fillna(0)
    df_combined['eventDate.date'] = to_combine_df['eventDate.date']
    df_combined['winner'] = to_combine_df['winner']
    df_combined['odd_diff'] = to_combine_df['odd1'] - to_combine_df['odd2']
    df_combined['fighter1_fightsAmount'] = to_combine_df['fighter1_fightsAmount']
    df_combined['fighter2_fightsAmount'] = to_combine_df['fighter2_fightsAmount']

    df_combined['odd1'] = to_combine_df['odd1']
    df_combined['odd2'] = to_combine_df['odd2']
#     df_combined = df_combined[~df_combined['odd_diff'].isna()]
    # to_combine_df = to_combine_df[(to_combine_df['fighter1_fightsAmount'] > 4) & (to_combine_df['fighter2_fightsAmount'] > 4)]
    return df_combined

train_df_combined = combine_df(train_df)
train_df_combined.to_pickle('data/train_df_combined.pkl')
train_df_combined

Unnamed: 0,hitsPM,accentedHitsPM,takedownsPM,accentedHitsDistancePM,accentedHitsClinchPM,accentedHitsParterPM,hitsSuccessfulPM,accentedHitsSuccessfulPM,takedownsSuccessfulPM,accentedHitsDistanceSuccessfulPM,accentedHitsClinchSuccessfulPM,accentedHitsParterSuccessfulPM,hitsSuccessful_percent,accentedHitsSuccessful_percent,accentedHits_percent,takedownSuccessful_percent,accentedHitsPositionDistanceSuccessful_prcent,accentedHitsPositionClinchSuccessful_percent,accentedHitsPositionParterSuccessful_percent,takedowns_to_hits,HitsPositionDistance_to_hits,HitsPositionClinch_to_hits,HitsPositionParter_to_hits,eventDate.date,winner,odd_diff,fighter1_fightsAmount,fighter2_fightsAmount,odd1,odd2
0,18.537278,12.764268,0.0,9.63174,1.257895,1.874633,14.205776,9.478651,0.0,6.711035,0.959649,1.807967,0.672354,0.492925,-0.212068,,,,,,,,,2008-07-19,True,2.19,6,1,3.53,1.34
1,19.651827,17.498256,1.212708,12.891113,2.732143,1.875,12.888787,10.785216,0.0,7.078073,2.732143,0.975,0.115341,0.158443,0.051303,-0.097348,-2.212831,-0.1875,1.277778,-0.060539,0.212583,-0.060199,0.040974,2011-03-03,True,0.89,8,1,2.38,1.49
2,2.6,-1.0,2.6,-2.3,0.6,0.7,6.1,3.1,0.6,2.0,0.4,0.7,0.23916,0.128657,-0.139168,,-3.814248,0.25,0.0,0.043275,0.079923,0.011197,0.006371,2013-04-27,True,1.40,2,1,2.93,1.53
3,27.993521,32.747397,1.238924,26.768559,3.805099,2.17374,16.129135,20.169109,0.385818,14.963723,3.251075,1.954311,-0.064325,0.438839,0.561201,0.081818,-5.101622,-0.707346,0.031111,0.018291,0.524104,0.056477,0.110646,2013-04-27,True,-7.87,6,3,1.13,9.00
4,55.068475,46.317538,0.647491,44.83709,1.649763,-0.169315,29.77533,21.386503,0.251386,18.837603,1.727653,0.821247,0.065643,-0.000754,-0.059479,0.166667,-0.07941,-0.7375,-0.701282,0.003508,0.036952,0.018569,-0.129665,2013-04-27,False,0.11,3,5,2.06,1.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2617,23.202198,14.991686,1.435973,9.507907,1.919166,3.564612,13.467191,6.902649,0.898192,3.211232,1.099856,2.591561,-0.011728,-0.137294,-0.144944,0.082416,0.372018,0.526488,-0.13742,0.002855,-0.157367,-0.029076,-0.001759,2018-12-29,True,0.25,13,10,2.10,1.85
2618,-1.999532,1.923308,-0.194238,7.927592,-1.661186,-4.343098,-7.265364,-3.689959,-0.192455,1.324937,-1.505139,-3.509757,-0.254464,-0.136113,0.130208,-0.112294,0.848397,0.521173,0.245655,-0.003432,0.394375,-0.047127,-0.183693,2018-12-29,False,2.69,6,3,3.99,1.30
2619,-11.93686,-10.979639,-0.124804,-6.143978,-1.9429,-2.892761,-7.579749,-6.886539,0.167594,-3.998876,-1.279773,-1.60789,-0.018051,0.000868,0.027923,0.119251,0.090352,0.428413,0.32971,0.036212,0.026279,-0.050875,0.054016,2018-12-29,False,-0.32,8,6,1.85,2.17
2620,-6.2409,-7.195372,0.192593,-6.511219,-0.328997,-0.355156,0.51006,-0.94942,0.02963,-0.5226,-0.526374,0.099554,-0.005134,0.048535,0.063676,,-0.927316,0.5,-1.544643,0.000877,0.264485,-0.029992,-0.058534,2018-12-29,True,-3.57,3,4,1.23,4.80


In [16]:
valid_df_combined = combine_df(valid_df)
valid_df_combined.to_pickle('data/valid_df_combined.pkl')
valid_df_combined

Unnamed: 0,hitsPM,accentedHitsPM,takedownsPM,accentedHitsDistancePM,accentedHitsClinchPM,accentedHitsParterPM,hitsSuccessfulPM,accentedHitsSuccessfulPM,takedownsSuccessfulPM,accentedHitsDistanceSuccessfulPM,accentedHitsClinchSuccessfulPM,accentedHitsParterSuccessfulPM,hitsSuccessful_percent,accentedHitsSuccessful_percent,accentedHits_percent,takedownSuccessful_percent,accentedHitsPositionDistanceSuccessful_prcent,accentedHitsPositionClinchSuccessful_percent,accentedHitsPositionParterSuccessful_percent,takedowns_to_hits,HitsPositionDistance_to_hits,HitsPositionClinch_to_hits,HitsPositionParter_to_hits,eventDate.date,winner,odd_diff,fighter1_fightsAmount,fighter2_fightsAmount,odd1,odd2
2622,-25.89794,-19.612038,-0.024196,1.899624,0.768617,-22.280279,-16.028251,-11.261337,-0.047324,0.431535,1.287368,-12.980241,-0.065729,-0.055969,0.011764,0.0,0.427509,-0.541111,,0.033258,0.165727,0.023541,-0.200478,2019-01-20 02:00:00,False,4.28,2,3,5.48,1.20
2623,8.413548,8.093302,-1.621034,5.983601,-0.272975,2.382676,4.024911,3.855419,0.0,2.21182,-0.103484,1.747083,-0.05898,-0.059958,-0.022717,,1.625,-0.666667,,0.0,-0.358532,-0.020833,0.378571,2019-01-20 02:00:00,True,-2.35,2,3,1.40,3.75
2624,-7.590965,1.043771,-0.55089,4.832289,-4.653893,0.865375,-10.658257,-3.237961,1.297714,1.852607,-2.32251,-2.768057,-0.144841,-0.003007,0.162912,0.658575,-0.304193,-0.693167,1.219928,0.04297,0.324756,-0.042848,-0.062206,2019-01-20 02:00:00,False,-0.26,1,1,1.89,2.15
2625,-57.506089,-49.296309,-0.098447,-50.491969,-2.983843,4.179503,-25.856508,-18.955591,0.50037,-19.852446,-2.188483,3.085339,0.102683,0.044572,-0.061389,0.182828,0.014026,-0.151134,0.029511,0.020324,-0.264396,0.098823,0.066837,2019-01-20 02:00:00,True,2.41,2,1,3.69,1.28
2626,-13.334967,-13.409557,0.355405,-3.249364,0.375843,-10.536036,-13.812913,-13.756783,0.135135,-2.909212,-0.311535,-10.536036,-0.2878,-0.294069,-0.0144,0.333333,0.260002,1.666667,0.0,0.03125,0.067841,0.033703,-0.164385,2019-01-20 02:00:00,True,3.81,2,4,5.01,1.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3118,-2.840035,-7.628505,-0.259648,-13.296777,-0.097671,5.765943,4.640153,0.32964,-0.057143,-4.330109,-0.003573,4.663322,0.196731,0.089747,-0.116293,-0.045455,0.09061,-0.497619,-0.027183,-0.002551,-0.379306,-0.037361,0.222779,2019-12-21 00:00:00,True,-0.05,2,1,1.89,1.94
3119,-11.663098,-10.051576,-0.288376,-6.577657,-2.030127,-1.443792,-8.69991,-7.243997,-0.103094,-4.383578,-1.699017,-1.161403,-0.156904,-0.116929,0.042839,-0.188889,0.912308,0.181264,-0.183036,0.0017,0.144612,0.009193,-0.114519,2019-12-21 00:00:00,False,1.45,3,3,2.88,1.43
3120,-33.480823,-35.842107,-0.252101,-34.181974,-1.258317,-0.401816,-11.349771,-13.393457,0.0,-11.685426,-1.156216,-0.551816,0.163407,0.031751,-0.148984,,-2.053152,0.108025,1.0,0.0,0.057729,0.066178,-0.310247,2019-12-21 00:00:00,False,-0.14,2,2,1.85,1.99
3121,3.739086,4.619619,0.30794,3.676759,0.001156,0.941704,5.649491,6.230024,0.30794,4.703043,0.854335,0.672646,0.197531,0.126543,-0.061728,,-0.584075,-0.703297,,0.042857,-0.205195,0.001948,0.178571,2019-12-21 00:00:00,True,1.07,1,2,2.58,1.51


In [17]:
test_df_combined = combine_df(test_df)
test_df_combined.to_pickle('data/test_df_combined.pkl')
test_df_combined

Unnamed: 0,hitsPM,accentedHitsPM,takedownsPM,accentedHitsDistancePM,accentedHitsClinchPM,accentedHitsParterPM,hitsSuccessfulPM,accentedHitsSuccessfulPM,takedownsSuccessfulPM,accentedHitsDistanceSuccessfulPM,accentedHitsClinchSuccessfulPM,accentedHitsParterSuccessfulPM,hitsSuccessful_percent,accentedHitsSuccessful_percent,accentedHits_percent,takedownSuccessful_percent,accentedHitsPositionDistanceSuccessful_prcent,accentedHitsPositionClinchSuccessful_percent,accentedHitsPositionParterSuccessful_percent,takedowns_to_hits,HitsPositionDistance_to_hits,HitsPositionClinch_to_hits,HitsPositionParter_to_hits,eventDate.date,winner,odd_diff,fighter1_fightsAmount,fighter2_fightsAmount,odd1,odd2
3123,10.971455,5.637201,0.133333,-0.888398,3.702762,2.822836,10.553039,7.185451,0.333333,2.416943,2.323757,2.444751,0.099652,0.074393,-0.065773,0.766667,-1.290915,0.577402,-0.320513,0.00511,-0.092885,0.004509,0.064695,2020-01-18 00:00:00,True,0.04,3,1,1.93,1.89
3124,17.120207,5.929299,-1.00819,2.879043,1.351026,1.69923,13.59643,3.680227,-0.062332,2.505188,0.737856,0.437184,0.117482,-0.032597,-0.156059,0.324194,-1.371904,0.449675,0.48563,-0.06356,0.135183,-0.026204,-0.235573,2020-01-18 00:00:00,False,0.49,2,2,2.19,1.70
3125,,,,,,,,,,,,,,,,,,,,,,,,2020-01-18 00:00:00,True,0.40,5,2,2.13,1.73
3126,,,,,,,,,,,,,,,,,,,,,,,,2020-01-18 00:00:00,False,0.28,2,2,2.06,1.78
3127,8.112056,10.747844,0.755347,10.318166,-0.652594,1.082272,0.242703,2.606983,0.469948,2.416371,-0.445272,0.635885,-0.138005,-0.069212,0.067841,0.554167,0.713704,0.577778,0.185114,0.034003,0.050531,-0.010539,0.007139,2020-01-18 00:00:00,False,0.47,3,1,2.17,1.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3729,-19.237944,-14.875817,-1.25977,-13.054159,-2.052932,0.231275,-8.14936,-3.987233,-0.333333,-3.20489,-0.853257,0.070914,-0.034903,0.089473,0.131729,,-0.767055,,0.166667,-0.041667,0.048837,-0.157752,0.44186,2021-02-27 00:00:00,False,0.03,3,4,1.95,1.92
3730,13.987078,12.957003,0.094026,11.243155,0.513364,1.200484,6.650314,5.354037,0.15308,3.302352,1.435954,0.615731,-0.006717,0.008872,0.025573,0.264428,-0.131953,-0.225117,-0.113719,-0.07412,-0.001824,0.021569,-0.023387,2021-02-27 01:45:00,False,0.13,3,4,2.00,1.87
3731,-5.90872,-10.93896,0.416667,-10.046318,-3.559308,2.666667,-3.011748,-7.125321,0.166667,-6.323432,-2.885223,2.083333,0.042738,-0.127823,-0.196828,-0.065556,0.437933,0.746291,0.170211,0.019242,-0.289854,-0.121943,0.118061,2021-02-27 03:45:00,False,2.68,3,5,3.95,1.27
3732,-23.222727,-20.243182,0.752273,-15.613636,0.520455,-5.15,-10.386364,-7.836364,0.290909,-5.606818,0.620455,-2.85,0.082173,0.08827,-0.013659,-0.27619,-0.456365,-0.75,-0.52044,0.123594,-0.005401,0.110649,-0.093866,2021-03-06 00:00:00,False,0.31,2,3,2.10,1.79
