In [1]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import json
from pandas import json_normalize
from tqdm import tqdm_notebook as tqdm
import numpy as np
import ast
from typing import Dict, List
import sys
import os
sys.path.append(os.path.join(sys.path[0], '../'))


pd.set_option('display.precision',6)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

In [4]:
fighters_df = pd.read_csv("../../data/0.fighters_raw.csv", index_col=0)
fighters_df["dateOfBirth"] = pd.to_datetime(fighters_df["dateOfBirth"])
fighters_cols = [
    "id",
    "name",
    "weight",
    "height",
    "armSpan",
    "legSwing",
    "weightCategory.id",
    "weightCategory.name",
    "dateOfBirth",
    "country",
    "city",
    "timezone",
]
fighters_df = fighters_df[fighters_cols]
fighters_df.set_index("id", inplace=True)
f_name_dict = fighters_df['name'].to_dict()
f_name_dict


{1: 'Tanner Boser',
 2: 'Giacomo Lemos',
 3: 'Shamil Abdurakhimov',
 4: 'Klidson Abreu',
 5: 'Yoshihiro Akiyama',
 6: 'Jessica Aguilar',
 7: 'Juan Adams',
 8: 'Israel Adesanya',
 9: 'Abu Azaitar',
 10: 'Rostem Akman',
 11: 'Alexandra Albu',
 12: 'Thiago Alves',
 13: 'Warlley Alves',
 14: 'Hector Aldana',
 15: 'Irene Aldana',
 16: 'Sultan Aliev',
 17: 'Arnold Allen',
 18: 'Eddie Alvarez',
 19: 'Joel Alvarez',
 20: 'Iuri Alcantara',
 21: 'Abdul Razak Alhassan',
 22: 'Thomas Almeida',
 23: 'Corey Anderson',
 24: 'Makwan Amirkhani',
 25: 'Eryk Anders',
 26: 'Megan Anderson',
 27: 'Viviane Araujo',
 28: 'Jessica Andrade',
 29: 'Magomed Ankalaev',
 30: 'Gadzhimurad Antigulov',
 31: 'Austin Arnett',
 32: 'Ben Askren',
 33: 'Julio Arce',
 34: 'Cyril Asker',
 35: 'Omari Akhmedov',
 36: 'Jessin Ayari',
 37: 'Bryan Barberena',
 38: 'Renan Barao',
 39: 'Edson Barboza',
 40: 'Enrique Barzola',
 41: 'Mario Bautista',
 42: 'Raoni Barcelos',
 43: 'Siyar Bahadurzada',
 44: 'Gabriel Benitez',
 45: 'Ryan

In [5]:
### Исправляем поле `country` для бойцов из США
# У некоторых бойцов из США в поле `country` указан штат, а не страна. \
# Также заменяем написание `United States` на `USA`, чтобы название соответствовало данным из таблицы с боями.

usa_state_names = [
    "Alaska", "Alabama", "Arkansas", "American Samoa", "Arizona", "California", "Colorado", "Connecticut",
    "District ", "of Columbia", "Delaware", "Florida", "Georgia", "Guam", "Hawaii", "Iowa", "Idaho",
    "Illinois", "Indiana", "Kansas", "Kentucky", "Louisiana", "Massachusetts", "Maryland", "Maine",
    "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", "North Carolina", "North Dakota", 
    "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York", "Ohio", "Oklahoma", 
    "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", 
    "Texas", "Utah", "Virginia", "Virgin Islands", "Vermont", "Washington", "Wisconsin", "West Virginia", 
    "Wyoming",
]

fighters_df.loc[fighters_df["country"] == "United States", "country"] = "USA"
fighters_df.loc[fighters_df["country"].isin(usa_state_names), "country"] = "USA"


### Выбросы размаха ног меняем на NaN, для дальнейшей обработки
fighters_df.replace(fighters_df.legSwing.max(), np.nan, inplace=True)
fighters_df.replace(fighters_df.legSwing.min(), np.nan, inplace=True)

### Убираем строки с выбросами роста
fighters_df = fighters_df[fighters_df['height'] < 230]
fighters_df = fighters_df[fighters_df['height'] > 145]

### Убираем строки с выбросами веса
fighters_df = fighters_df[fighters_df['weight'] > 47]
fighters_df = fighters_df[fighters_df['weight'] < 250]

### Находим все возможные весовые категории
avg_weight_in_weight_category = fighters_df.groupby(by="weightCategory.id").mean()['weight']
avg_weight_in_weight_category

### Замена пустых значений роста на размах рук
def replace_null_height_to_arm_span(row):
    if np.isnan(row['height']) and row['armSpan']:
        arm_span = row['armSpan']
        return arm_span
    return row['height']

fighters_df['height'] = fighters_df.apply(
    lambda row: replace_null_height_to_arm_span(row),
    axis=1
)

### Замена пустых значений размаха рук на рост
def replace_null_arm_span_to_height(row):
    if np.isnan(row['armSpan']) and row['height']:
        height = row['height']
        return height
    return row['armSpan']

fighters_df['armSpan'] = fighters_df.apply(
    lambda row: replace_null_arm_span_to_height(row),
    axis=1
)

### Убираем пустые значения размаха ног, средним по колонке
fighters_df['legSwing'].fillna(np.round(fighters_df['legSwing'].mean(), 1), inplace=True)


## Предобработка данных о боях

In [6]:
events_df = pd.read_csv("../../data/0.events_raw.csv", index_col=0)
events_df["eventDate.date"] = pd.to_datetime(events_df["eventDate.date"])
events_df.reset_index(inplace=True, drop=True)
events_df.head(5)

Unnamed: 0,avgOdds,city,completed,country,duration,eventDate.date,eventDate.timezone,eventDate.timezone_type,fighterId_1,fighterId_2,fighters,id,link,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId
0,[],Denver,True,USA,104.0,1993-11-12,Europe/Berlin,3,1646,1923,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5201,http://www.ufcstats.com/fight-details/64139d1d...,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0
1,[],Denver,True,USA,52.0,1993-11-12,Europe/Berlin,3,1777,1883,"[{'fighterId': 1777, 'fightStats': {'hitsTotal...",5202,http://www.ufcstats.com/fight-details/00b07967...,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0
2,[],Denver,True,USA,59.0,1993-11-12,Europe/Berlin,3,1908,1923,"[{'fighterId': 1908, 'fightStats': {'hitsTotal...",5203,http://www.ufcstats.com/fight-details/ac7ca2ec...,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0
3,[],Denver,True,USA,57.0,1993-11-12,Europe/Berlin,3,1631,1646,"[{'fighterId': 1631, 'fightStats': {'hitsTotal...",5204,http://www.ufcstats.com/fight-details/ffd16691...,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0
4,[],Denver,True,USA,138.0,1993-11-12,Europe/Berlin,3,1646,1924,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5205,http://www.ufcstats.com/fight-details/cecdc0da...,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0


In [7]:
### Убираем строки с незавершенными боями и боями, где отсутствует `winnerId`
events_df.drop(events_df[events_df["completed"] == False].index, inplace=True)
events_df.drop(events_df[events_df["winnerId"].isna()].index, inplace=True)

### Убираем строки, где `winnerId` не совпадает с айди ни одного из бойцов
events_df = events_df[~((events_df["winnerId"] != events_df["fighterId_1"]) & (events_df["winnerId"] != events_df["fighterId_2"]))]

### Удаляем лишние колонки
events_df.drop(
    columns=["completed", "eventDate.timezone_type", "link"],
    inplace=True,
)

### Извлекаем данные из колонок `avgOdds` и `fighters`
def parse_odds(row: pd.Series) -> pd.Series:
    """
    Parse 'avgOdds' column.
    :param row: Row of the events dataframe.
    :return: pd.Series with odds for the 1st and the 2nd fighters.
    """
    avg_odds = row["avgOdds"]
    if avg_odds == "[]" or avg_odds == np.nan:
        return pd.Series([np.nan] * 2)
    avg_odds = ast.literal_eval(avg_odds)
    if avg_odds[0]["fighterId"] == row["fighterId_1"]:
        return pd.Series([f.get("value", np.nan) for f in avg_odds])
    else:
        return pd.Series([f.get("value", np.nan) for f in reversed(avg_odds)])
    
events_df[["f1_odds", "f2_odds"]] = events_df[["avgOdds", "fighterId_1", "fighterId_2"]]\
        .apply(lambda row: parse_odds(row), axis=1)

events_df = events_df.drop(columns="avgOdds")
events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,fighters,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,"[{'fighterId': 1777, 'fightStats': {'hitsTotal...",5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,"[{'fighterId': 1908, 'fightStats': {'hitsTotal...",5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,"[{'fighterId': 1631, 'fightStats': {'hitsTotal...",5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,"[{'fighterId': 421, 'fightStats': {'hitsTotal'...",40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,"[{'fighterId': 3504, 'fightStats': {'hitsTotal...",40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,"[{'fighterId': 1334, 'fightStats': {'hitsTotal...",40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,"[{'fighterId': 246, 'fightStats': {'hitsTotal'...",40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48


In [8]:
#### Парсим колонку `fighters`

fighter_stats_keys = [
    "hitsTotal",
    "hitsSuccessful",
    "takedownTotal",
    "takedownSuccessful",
    "submissionAttempts",
    "takeovers",
    "accentedHitsTotal",
    "accentedHitsSuccessful",
    "knockdowns",
    "protectionPassage",
    "hitsHeadTotal",
    "hitsHeadSuccessful",
    "hitsBodyTotal",
    "hitsBodySuccessful",
    "hitsLegsTotal",
    "hitsLegsSuccessful",
    "accentedHitsPositionDistanceTotal",
    "accentedHitsPositionDistanceSuccessful",
    "accentedHitsPositionClinchTotal",
    "accentedHitsPositionClinchSuccessful",
    "accentedHitsPositionParterTotal",
    "accentedHitsPositionParterSuccessful",
]


def get_fighter_stats_cols() -> List[str]:
    """
    Get list of fight stats column names for each fighter.
    :return: List of column names with 'f1_' prefix
    for the first fighter and 'f2_' prefix for the second.
    """
    fighter_stats_cols = []
    for i in range(1, 3):
        for k in fighter_stats_keys:
            fighter_stats_cols.append(f"f{i}_{k}")
    return fighter_stats_cols


def sum_round_stats(stats: List[Dict[str, int]]) -> List[int]:
    """
    Sum stats for a fighter for all rounds of a fight.
    :param stats: List with stats from object of 'fighters' column.
    :return: Stats for all rounds for a fighter as a list.
    """
    if len(stats) == 0:
        return [np.nan for _ in range(len(fighter_stats_keys))]
    res = {k: 0 for k in fighter_stats_keys}
    for i in stats:
        for k in res:
            res[k] = i.get(k, 0)
    return list(res.values())


def parse_fight_data(row: pd.Series) -> pd.Series:
    """
    Parse 'fighters' column.
    :param row: Row of the events dataframe.
    :return: pd.Series with stats for both fighters.
    """
    fighters = row["fighters"]
    if fighters == "[]" or fighters == np.nan:
        return pd.Series([np.nan for _ in range(len(fighter_stats_keys))])
    cols = []
    fighters = ast.literal_eval(fighters)
    if fighters[0]["fighterId"] == row["fighterId_2"]:
        fighters = reversed(fighters)
    for f in fighters:
        cols.extend(sum_round_stats(f["roundStats"]))
    return pd.Series(cols)

events_df[get_fighter_stats_cols()] = events_df[
    ["fighters", "fighterId_1", "fighterId_2"]
].apply(lambda row: parse_fight_data(row), axis=1)
events_df.drop(columns="fighters", inplace=True)
events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0


In [9]:
### Добавляем данные о бойцах в датафрейм с боями

fighter_data_cols = fighters_df.drop(columns=["weightCategory.id", "weightCategory.name"]).columns
# ['name', 'weight', 'height', 'armSpan', 'legSwing', 'dateOfBirth',
#        'country', 'city', 'timezone']

events_df = events_df.join(fighters_df[fighter_data_cols].add_prefix("f1_"), 
                           on="fighterId_1")

events_df = events_df.join(fighters_df[fighter_data_cols].add_prefix("f2_"), 
                           on="fighterId_2")

events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York


In [10]:
### Добавляем признак `age`
def add_age(row: pd.Series) -> pd.Series:
    """
    Add age for both fighters.
    :param row: Row of the events dataframe.
    :return: pd.Series with age of fighters in years.
    """
    result = []
    for prefix in ["f1_", "f2_"]:
        try:
            age = row["eventDate.date"].year - row[prefix + "dateOfBirth"].year
        except Exception:
            age = np.nan
        result.append(age)
    return pd.Series(result)


events_df[["f1_age", "f2_age"]] = events_df[["eventDate.date", "f1_dateOfBirth", "f2_dateOfBirth"]]\
    .apply(lambda row: add_age(row), axis=1)

events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone,f1_age,f2_age
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,27.0,34.0
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York,24.0,23.0
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,23.0,34.0
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,29.0,27.0
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York,27.0,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo,29.0,26.0
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,,29.0,32.0
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,,33.0,37.0
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York,27.0,29.0


In [11]:
### Добавляем признаки `isHomeCity`, `isHomeCountry`, `isHomeTimezone`
# Возможные значения переменных: 0 и 1 \
# `isHomeCity` - боец дерется в родном городе \
# `isHomeCountry` - боец дерется в родной стране \
# `isHomeTimezone` - боец дерется в своем часовом поясе

def get_territorial_cols() -> List[str]:
    """
    Get list of territorial column names for each fighter.
    :return: List of column names with 'f1_' prefix
    for the first fighter and 'f2_' prefix for the second.
    """
    result = []
    for prefix in ["f1_", "f2_"]:
        for key in ["isHomeCity", "isHomeCountry", "isHomeTimezone"]:
            result.append(prefix + key)
    return result


def fill_territorial_cols(row: pd.Series) -> pd.Series:
    """
    Add binary features 'isHomeCity', 'isHomeCountry', 'isHomeTimezone'
    for each fighter.
    :param row: Row of the events dataframe.
    :return: pd.Series with features for both fighters.
    """
    result = []
    for prefix in ["f1_", "f2_"]:
        for key in ["city", "country", "timezone"]:
            result.append(int(row[key] == row[prefix + key]))
    return pd.Series(result)

events_df[get_territorial_cols()] = events_df.apply(
    lambda row: fill_territorial_cols(row), axis=1
)
events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone,f1_age,f2_age,f1_isHomeCity,f1_isHomeCountry,f1_isHomeTimezone,f2_isHomeCity,f2_isHomeCountry,f2_isHomeTimezone
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,27.0,34.0,0,0,0,0,0,0
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York,24.0,23.0,0,1,0,0,1,0
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,23.0,34.0,0,1,0,0,0,0
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,29.0,27.0,0,1,0,0,0,0
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York,27.0,30.0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo,29.0,26.0,0,1,0,0,0,0
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,,29.0,32.0,0,0,0,0,0,0
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,,33.0,37.0,0,1,0,0,0,0
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York,27.0,29.0,0,0,0,0,1,0


### **Какую статистику необходимо посчитать накопительным итогом к бою:**
- **winning_streak** - сумма побед **подряд** по одному бойцу (серия побед) count of winnerId in sequence group by winnerId
- **wins_by_knockowt** - сумма побед нокаутами  count winMethods == ['KO']  group by winnerId
* **wins_by_submissions** - сумма чистых побед (болевой прием, который приводит к сдаче соперника) count winMethods == ['SUB'] group by winnerId
* **striking_accuracy** - точность ударов sum(f1_accentedHitsSuccessful)/sum(f1_accentedHitsTotal) or sum(f2_accentedHitsSuccessful)/sum(f2_accentedHitsTotal)  group by fighterId_1 or fighterId_2
* **Strikes_Landed** - нанесено акцентовых ударов sum(f1_accentedHitsSuccessful) or sum(f2_accentedHitsSuccessful) group by fighterId_1 or fighterId_2
* **Strikes_Attempted** - выброшено акцентовых ударов sum(f1_accentedHitsTotal) or sum(f2_accentedHitsTotal) group by fighterId_1 or fighterId_2
* **grappling_accuracy** - статистика в борьбе sum(f1_takedownSuccessful)/sum(f1_takedownTotal) or sum(f2_takedownSuccessful)/sum(f2_takedownTotal)  group by fighterId_1 or fighterId_2
* **takwdowns_landed** - Тейкдаунов выполнено sum(f1_takedownSuccessful) or sum(f2_takedownSuccessful) group by fighterId_1 or fighterId_2
* **Takedowns Attempted** - попыток Тейкдаунов sum(f1_takedownTotal) or sum(f2_takedownTotal) group by fighterId_1 or fighterId_2
* **Knockdown_ratio** - НОКДАУНОВ ЗА БОЙ/СРЕД. sum(f1_knockdowns)/count of figthts or sum(f2_knockdowns)/count of figthts group by fighterId_1 or fighterId_2
* **AVG_fight_time** - СРЕДНЕЕ ВРЕМЯ БОЯ sum(duration)/count of figthts group by fighterId_1 or fighterId_2
* **KO\TKO** - ко \ все победы
* **stricing_sucss_per_duration** - sum(Strikes_Landed) * 60 \sum(duration)

### Подготовка датафрэйма для кумулятивной суммы по статистике бойцов

In [12]:
events_df = events_df.reset_index()
events_df

Unnamed: 0,index,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone,f1_age,f2_age,f1_isHomeCity,f1_isHomeCountry,f1_isHomeTimezone,f2_isHomeCity,f2_isHomeCountry,f2_isHomeTimezone
0,0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,27.0,34.0,0,0,0,0,0,0
1,1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York,24.0,23.0,0,1,0,0,1,0
2,2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,23.0,34.0,0,1,0,0,0,0
3,3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,29.0,27.0,0,1,0,0,0,0
4,4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York,27.0,30.0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6872,7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo,29.0,26.0,0,1,0,0,0,0
6873,7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,,29.0,32.0,0,0,0,0,0,0
6874,7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,,33.0,37.0,0,1,0,0,0,0
6875,7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York,27.0,29.0,0,0,0,0,1,0


In [13]:
stats_events_summary = events_df.copy()
stats_events_summary['winner_1'] = stats_events_summary['winnerId'] == stats_events_summary['fighterId_1'] 
stats_events_summary['winner_2'] = stats_events_summary['winnerId'] == stats_events_summary['fighterId_2'] 
stats_events_summary = pd.get_dummies(stats_events_summary, columns = ['winMethods'])
stats_events_summary.columns = stats_events_summary.columns.str.replace('\'','')
fighter1_events = stats_events_summary[['eventDate.date','fighterId_1','duration','winner_1','f1_hitsTotal', 'f1_hitsSuccessful', 'f1_takedownTotal',
       'f1_takedownSuccessful', 'f1_submissionAttempts', 'f1_takeovers',
       'f1_accentedHitsTotal', 'f1_accentedHitsSuccessful', 'f1_knockdowns',
       'f1_protectionPassage', 'f1_hitsHeadTotal', 'f1_hitsHeadSuccessful',
       'f1_hitsBodyTotal', 'f1_hitsBodySuccessful', 'f1_hitsLegsTotal',
       'f1_hitsLegsSuccessful', 'f1_accentedHitsPositionDistanceTotal',
       'f1_accentedHitsPositionDistanceSuccessful',
       'f1_accentedHitsPositionClinchTotal',
       'f1_accentedHitsPositionClinchSuccessful',
       'f1_accentedHitsPositionParterTotal',
       'f1_accentedHitsPositionParterSuccessful','winMethods_[DEC]', 'winMethods_[DQ]', 'winMethods_[KO]',
       'winMethods_[SUB]']]
fighter1_events[['fighter_nbr']] = 1 # add what order was in event df

fighter2_events = stats_events_summary[['eventDate.date','fighterId_2','duration','winner_2','f2_hitsTotal',
       'f2_hitsSuccessful', 'f2_takedownTotal', 'f2_takedownSuccessful',
       'f2_submissionAttempts', 'f2_takeovers', 'f2_accentedHitsTotal',
       'f2_accentedHitsSuccessful', 'f2_knockdowns', 'f2_protectionPassage',
       'f2_hitsHeadTotal', 'f2_hitsHeadSuccessful', 'f2_hitsBodyTotal',
       'f2_hitsBodySuccessful', 'f2_hitsLegsTotal', 'f2_hitsLegsSuccessful',
       'f2_accentedHitsPositionDistanceTotal',
       'f2_accentedHitsPositionDistanceSuccessful',
       'f2_accentedHitsPositionClinchTotal',
       'f2_accentedHitsPositionClinchSuccessful',
       'f2_accentedHitsPositionParterTotal',
       'f2_accentedHitsPositionParterSuccessful','winMethods_[DEC]', 'winMethods_[DQ]', 'winMethods_[KO]',
       'winMethods_[SUB]']]
fighter2_events[['fighter_nbr']] = 2 # add what order was in event df

col_name =  ['eventDate.date','fighterId','duration','winner','_hitsTotal',
    '_hitsSuccessful', '_takedownTotal', '_takedownSuccessful',
       '_submissionAttempts', '_takeovers', '_accentedHitsTotal',
       '_accentedHitsSuccessful', '_knockdowns', '_protectionPassage',
       '_hitsHeadTotal', '_hitsHeadSuccessful', '_hitsBodyTotal',
       '_hitsBodySuccessful', '_hitsLegsTotal', '_hitsLegsSuccessful',
       '_accentedHitsPositionDistanceTotal',
       '_accentedHitsPositionDistanceSuccessful',
       '_accentedHitsPositionClinchTotal',
       '_accentedHitsPositionClinchSuccessful',
       '_accentedHitsPositionParterTotal',
       '_accentedHitsPositionParterSuccessful','winMethods_[DEC]', 'winMethods_[DQ]', 'winMethods_[KO]',
       'winMethods_[SUB]', 'fighter_nbr']

fighter1_events.columns = col_name
fighter2_events.columns = col_name
f_stats_events_summ = pd.concat([fighter1_events, fighter2_events])
f_stats_events_summ.sort_values(by = ['fighterId','eventDate.date'], axis=0, inplace = True) # df with all firters ordered by ('fighterId','eventDate.date')
f_stats_events_summ[['eventDate.date']]

Unnamed: 0,eventDate.date
6266,2019-10-19 04:00:00
6353,2019-12-21 00:00:00
6532,2020-06-27 00:00:00
6585,2020-07-25 00:00:00
6733,2020-11-07 00:00:00
...,...
6820,2021-01-20 00:00:00
6806,2021-01-16 00:00:00
6836,2021-02-06 00:00:00
6813,2021-01-20 00:00:00


### Считаем накопительную статистику для всех бойцов:
 - wins_by_knockout
 - wins_by_submissions
 - count_of_fights
 - striking_accuracy
 - Strikes_Landed
 - Strikes_Attempted
 - grappling_accuracy
 - takwdowns_landed
 - Takedowns_Attempted
 - Knockdown_ratio
 - AVG_fight_time
 - stricing_sucss_per_duration

In [14]:
def add_cumulative_sum (df:pd.DataFrame, columns:List[str]) -> pd.DataFrame:
    """
    Add cumulative sum for previous fights for input columns list to input df.
    :param df: input DF (should be sorted by fighter and date)
    :param columns: Column names of the dataframe.
    :return: pd.DataFrame with cumulative sum for previous fights for input columns list.
    """
    df_cumulative = pd.DataFrame(index=df.index)
    for column in columns:
        col_name = 'cumsum' + column  
        df_cumulative[col_name] = df.groupby('fighterId')[column].cumsum() - f_stats_events_summ[column]
        
    df_cumulative[['eventDate.date', 'fighterId', 'duration', 'winner', 'fighter_nbr']] = \
        df[['eventDate.date', 'fighterId', 'duration', 'winner', 'fighter_nbr']]

    return df_cumulative

f_stats_events_cumulative = add_cumulative_sum(f_stats_events_summ, col_name[2:-1]) # df c накопленной суммой
#count_of_fights
f_stats_events_cumulative['count_of_fights'] = f_stats_events_cumulative.groupby('fighterId')['fighter_nbr'].cumcount()
f_stats_events_cumulative

Unnamed: 0,cumsumduration,cumsumwinner,cumsum_hitsTotal,cumsum_hitsSuccessful,cumsum_takedownTotal,cumsum_takedownSuccessful,cumsum_submissionAttempts,cumsum_takeovers,cumsum_accentedHitsTotal,cumsum_accentedHitsSuccessful,cumsum_knockdowns,cumsum_protectionPassage,cumsum_hitsHeadTotal,cumsum_hitsHeadSuccessful,cumsum_hitsBodyTotal,cumsum_hitsBodySuccessful,cumsum_hitsLegsTotal,cumsum_hitsLegsSuccessful,cumsum_accentedHitsPositionDistanceTotal,cumsum_accentedHitsPositionDistanceSuccessful,cumsum_accentedHitsPositionClinchTotal,cumsum_accentedHitsPositionClinchSuccessful,cumsum_accentedHitsPositionParterTotal,cumsum_accentedHitsPositionParterSuccessful,cumsumwinMethods_[DEC],cumsumwinMethods_[DQ],cumsumwinMethods_[KO],cumsumwinMethods_[SUB],eventDate.date,fighterId,duration,winner,fighter_nbr,count_of_fights
6266,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2019-10-19 04:00:00,1,300.0,True,1,0
6353,300.0,1,51.0,35.0,0.0,0.0,0.0,0.0,51.0,35.0,0.0,0.0,25.0,11.0,15.0,15.0,11.0,9.0,51.0,35.0,0.0,0.0,0.0,0.0,0,0,0,0,2019-12-21 00:00:00,1,300.0,False,1,1
6532,600.0,1,88.0,48.0,0.0,0.0,0.0,0.0,88.0,48.0,0.0,0.0,55.0,17.0,18.0,18.0,15.0,13.0,88.0,48.0,0.0,0.0,0.0,0.0,0,0,0,0,2020-06-27 00:00:00,1,161.0,True,1,2
6585,761.0,2,110.0,63.0,0.0,0.0,0.0,0.0,110.0,63.0,0.0,1.0,67.0,23.0,20.0,20.0,23.0,20.0,109.0,62.0,0.0,0.0,1.0,1.0,0,0,1,0,2020-07-25 00:00:00,1,156.0,True,1,3
6733,917.0,3,146.0,91.0,0.0,0.0,0.0,0.0,146.0,91.0,0.0,1.0,87.0,35.0,26.0,26.0,33.0,30.0,133.0,81.0,0.0,0.0,13.0,10.0,0,0,2,0,2020-11-07 00:00:00,1,300.0,False,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6820,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2021-01-20 00:00:00,3664,248.0,True,2,0
6806,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2021-01-16 00:00:00,3666,300.0,True,2,0
6836,300.0,1,93.0,42.0,2.0,0.0,0.0,0.0,93.0,42.0,0.0,0.0,47.0,13.0,26.0,15.0,20.0,14.0,90.0,41.0,3.0,1.0,0.0,0.0,1,0,0,0,2021-02-06 00:00:00,3666,300.0,False,2,1
6813,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2021-01-20 00:00:00,3667,240.0,False,2,0


In [18]:
ready_cols = ['cumsum' + col for col in col_name[3:-1]]
ready_cols = ready_cols + ['count_of_fights']

In [19]:
# Save data for future inference

with open('../../data/Catboost_v0/ready_cols_05.04.2021.txt', 'w') as outfile:
    outfile.write(str(ready_cols))
    
f_stats_events_cumulative['fighterName'] = f_stats_events_cumulative['fighterId'].replace(f_name_dict)
f_stats_events_cumulative.to_csv('../../data/Catboost_v0/f_stats_events_cumulative_05.04.2021.csv')

# Prepare data for training and validation

In [20]:
events_df_joined = events_df.join(
    f_stats_events_cumulative[f_stats_events_cumulative['fighter_nbr'] == 1][ready_cols].add_prefix("f1_")) #, on="id"

events_df_joined = events_df_joined.join(
    f_stats_events_cumulative[f_stats_events_cumulative['fighter_nbr'] == 2][ready_cols].add_prefix("f2_"))

events_df_joined#.head(5)

Unnamed: 0,index,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone,f1_age,f2_age,f1_isHomeCity,f1_isHomeCountry,f1_isHomeTimezone,f2_isHomeCity,f2_isHomeCountry,f2_isHomeTimezone,f1_cumsumwinner,f1_cumsum_hitsTotal,f1_cumsum_hitsSuccessful,f1_cumsum_takedownTotal,f1_cumsum_takedownSuccessful,f1_cumsum_submissionAttempts,f1_cumsum_takeovers,f1_cumsum_accentedHitsTotal,f1_cumsum_accentedHitsSuccessful,f1_cumsum_knockdowns,f1_cumsum_protectionPassage,f1_cumsum_hitsHeadTotal,f1_cumsum_hitsHeadSuccessful,f1_cumsum_hitsBodyTotal,f1_cumsum_hitsBodySuccessful,f1_cumsum_hitsLegsTotal,f1_cumsum_hitsLegsSuccessful,f1_cumsum_accentedHitsPositionDistanceTotal,f1_cumsum_accentedHitsPositionDistanceSuccessful,f1_cumsum_accentedHitsPositionClinchTotal,f1_cumsum_accentedHitsPositionClinchSuccessful,f1_cumsum_accentedHitsPositionParterTotal,f1_cumsum_accentedHitsPositionParterSuccessful,f1_cumsumwinMethods_[DEC],f1_cumsumwinMethods_[DQ],f1_cumsumwinMethods_[KO],f1_cumsumwinMethods_[SUB],f1_count_of_fights,f2_cumsumwinner,f2_cumsum_hitsTotal,f2_cumsum_hitsSuccessful,f2_cumsum_takedownTotal,f2_cumsum_takedownSuccessful,f2_cumsum_submissionAttempts,f2_cumsum_takeovers,f2_cumsum_accentedHitsTotal,f2_cumsum_accentedHitsSuccessful,f2_cumsum_knockdowns,f2_cumsum_protectionPassage,f2_cumsum_hitsHeadTotal,f2_cumsum_hitsHeadSuccessful,f2_cumsum_hitsBodyTotal,f2_cumsum_hitsBodySuccessful,f2_cumsum_hitsLegsTotal,f2_cumsum_hitsLegsSuccessful,f2_cumsum_accentedHitsPositionDistanceTotal,f2_cumsum_accentedHitsPositionDistanceSuccessful,f2_cumsum_accentedHitsPositionClinchTotal,f2_cumsum_accentedHitsPositionClinchSuccessful,f2_cumsum_accentedHitsPositionParterTotal,f2_cumsum_accentedHitsPositionParterSuccessful,f2_cumsumwinMethods_[DEC],f2_cumsumwinMethods_[DQ],f2_cumsumwinMethods_[KO],f2_cumsumwinMethods_[SUB],f2_count_of_fights
0,0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,27.0,34.0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,1,5.0,3.0,0.0,0.0,0.0,0.0,5.0,3.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,2.0,2.0,0,0,1,0,1
1,1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York,24.0,23.0,0,1,0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
2,2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,23.0,34.0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,1,5.0,3.0,0.0,0.0,0.0,0.0,5.0,3.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,2.0,2.0,0,0,1,1,2
3,3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,29.0,27.0,0,1,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,2,11.0,7.0,4.0,2.0,1.0,0.0,5.0,1.0,0.0,4.0,2.0,1.0,0.0,0.0,3.0,0.0,4.0,0.0,0.0,0.0,1.0,1.0,0,0,0,2,2
4,4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York,27.0,30.0,0,0,0,0,1,0,1,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0,0,0,1,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6872,7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo,29.0,26.0,0,1,0,0,0,0,4,260.0,131.0,7.0,4.0,0.0,1.0,223.0,101.0,0.0,3.0,162.0,60.0,47.0,29.0,14.0,12.0,182.0,75.0,18.0,11.0,23.0,15.0,2,0,4,0,6,3,185.0,102.0,6.0,3.0,3.0,1.0,135.0,58.0,0.0,3.0,116.0,44.0,9.0,5.0,10.0,9.0,107.0,34.0,3.0,3.0,25.0,21.0,4,0,0,1,5
6873,7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,,29.0,32.0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
6874,7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,,33.0,37.0,0,1,0,0,0,0,1,75.0,34.0,0.0,0.0,0.0,0.0,68.0,27.0,0.0,0.0,47.0,12.0,8.0,3.0,13.0,12.0,58.0,21.0,2.0,1.0,8.0,5.0,1,0,1,1,3,1,102.0,49.0,1.0,1.0,0.0,2.0,58.0,23.0,0.0,0.0,54.0,19.0,3.0,3.0,1.0,1.0,30.0,13.0,3.0,1.0,25.0,9.0,0,0,1,0,1
6875,7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York,27.0,29.0,0,0,0,0,1,0,10,534.0,336.0,14.0,11.0,5.0,1.0,384.0,196.0,0.0,12.0,332.0,163.0,39.0,22.0,13.0,11.0,220.0,80.0,18.0,10.0,146.0,106.0,6,0,2,6,14,3,154.0,102.0,6.0,3.0,2.0,0.0,109.0,68.0,0.0,5.0,81.0,43.0,7.0,6.0,21.0,19.0,94.0,55.0,3.0,2.0,12.0,11.0,2,0,0,1,3


### Считаем разницу для колонок

In [21]:
supl_cols = ['city', 'country', 'duration', 'eventDate.date',
'eventDate.timezone', 'fighterId_1', 'fighterId_2', 'id', 'name',
'rounds', 'timezone', 'weightCategory.id', 'weightCategory.name',
'winnerId', 'f1_count_of_fights', 'f2_count_of_fights', 'f1_odds', 'f2_odds']

df = pd.DataFrame(index=events_df_joined.index)
df[supl_cols] = events_df_joined[supl_cols]

# Колонки, разницу для которых мы считаем
ready_cols

def difference(df, events_df_joined, cols):
    # цикл заменяет столбцы характеристик каждого бойца столбцами разницы этих характеристик
    for col in cols:
        df[col+'_difference'] = events_df_joined['f1_'+col].astype(float) - events_df_joined['f2_'+col].astype(float)

    df['odds_difference'] = events_df_joined['f1_odds'] - events_df_joined['f2_odds'] # не стал удалять столбцы с возрастом
    df['age_difference'] = events_df_joined['f1_age'] - events_df_joined['f2_age'] # не стал удалять столбцы с возрастом
    df['winner'] = df['winnerId'] == df['fighterId_1']

    return df

def difference_reversed(df, events_df_joined, cols):
    # цикл заменяет столбцы характеристик каждого бойца столбцами разницы этих характеристик
    for col in cols:
        df[col+'_difference'] = events_df_joined['f2_'+col].astype(float) - events_df_joined['f1_'+col].astype(float)

    df['odds_difference'] = events_df_joined['f2_odds'] - events_df_joined['f1_odds'] # не стал удалять столбцы с возрастом
    df['age_difference'] = events_df_joined['f2_age'] - events_df_joined['f1_age'] # не стал удалять столбцы с возрастом
    df['winner'] = df['winnerId'] == df['fighterId_2']

    return df

df['fight_ID'] = range(0,6877)
df_combined = difference(df.copy(), events_df_joined, ready_cols)
df_combined_reversed = difference_reversed(df.copy(), events_df_joined, ready_cols)

df_combined['type'] = 'straight'
df_combined_reversed['type'] = 'reversed'

In [22]:
df_combined.columns

Index(['city', 'country', 'duration', 'eventDate.date', 'eventDate.timezone',
       'fighterId_1', 'fighterId_2', 'id', 'name', 'rounds', 'timezone',
       'weightCategory.id', 'weightCategory.name', 'winnerId',
       'f1_count_of_fights', 'f2_count_of_fights', 'f1_odds', 'f2_odds',
       'fight_ID', 'cumsumwinner_difference', 'cumsum_hitsTotal_difference',
       'cumsum_hitsSuccessful_difference', 'cumsum_takedownTotal_difference',
       'cumsum_takedownSuccessful_difference',
       'cumsum_submissionAttempts_difference', 'cumsum_takeovers_difference',
       'cumsum_accentedHitsTotal_difference',
       'cumsum_accentedHitsSuccessful_difference',
       'cumsum_knockdowns_difference', 'cumsum_protectionPassage_difference',
       'cumsum_hitsHeadTotal_difference',
       'cumsum_hitsHeadSuccessful_difference',
       'cumsum_hitsBodyTotal_difference',
       'cumsum_hitsBodySuccessful_difference',
       'cumsum_hitsLegsTotal_difference',
       'cumsum_hitsLegsSuccessful_diff

In [23]:
df_combined_reversed = df_combined_reversed.rename(columns={
                             'fighterId_1':'fighterId_2',
                             'fighterId_2':'fighterId_1',
                            
                             'f1_count_of_fights': 'f2_count_of_fights', 
                             'f2_count_of_fights': 'f1_count_of_fights', 
                             
                             'f1_odds': 'f2_odds',
                             'f2_odds': 'f1_odds'
                            })

In [24]:
df_combined_total = df_combined.append(df_combined_reversed[df_combined.columns])
df_combined_total

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winnerId,f1_count_of_fights,f2_count_of_fights,f1_odds,f2_odds,fight_ID,cumsumwinner_difference,cumsum_hitsTotal_difference,cumsum_hitsSuccessful_difference,cumsum_takedownTotal_difference,cumsum_takedownSuccessful_difference,cumsum_submissionAttempts_difference,cumsum_takeovers_difference,cumsum_accentedHitsTotal_difference,cumsum_accentedHitsSuccessful_difference,cumsum_knockdowns_difference,cumsum_protectionPassage_difference,cumsum_hitsHeadTotal_difference,cumsum_hitsHeadSuccessful_difference,cumsum_hitsBodyTotal_difference,cumsum_hitsBodySuccessful_difference,cumsum_hitsLegsTotal_difference,cumsum_hitsLegsSuccessful_difference,cumsum_accentedHitsPositionDistanceTotal_difference,cumsum_accentedHitsPositionDistanceSuccessful_difference,cumsum_accentedHitsPositionClinchTotal_difference,cumsum_accentedHitsPositionClinchSuccessful_difference,cumsum_accentedHitsPositionParterTotal_difference,cumsum_accentedHitsPositionParterSuccessful_difference,cumsumwinMethods_[DEC]_difference,cumsumwinMethods_[DQ]_difference,cumsumwinMethods_[KO]_difference,cumsumwinMethods_[SUB]_difference,count_of_fights_difference,odds_difference,age_difference,winner,type
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,1646.0,0,1,,,0,-1.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-5.0,-3.0,0.0,0.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-3.0,-1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-1.0,0.0,-1.0,,-7.0,True,straight
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,1777.0,0,0,,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,True,straight
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,1923.0,0,2,,,2,-1.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-5.0,-3.0,0.0,0.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-3.0,-1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-1.0,-1.0,-2.0,,-11.0,False,straight
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,1646.0,0,2,,,3,-2.0,-11.0,-7.0,-4.0,-2.0,-1.0,0.0,-5.0,-1.0,0.0,-4.0,-2.0,-1.0,0.0,0.0,-3.0,0.0,-4.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,-2.0,-2.0,,2.0,False,straight
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,1646.0,1,0,,,4,1.0,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,,-3.0,True,straight
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6872,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,668,421,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,668.0,5,6,2.84,1.46,6872,-1.0,-75.0,-29.0,-1.0,-1.0,3.0,0.0,-88.0,-43.0,0.0,0.0,-46.0,-16.0,-38.0,-24.0,-4.0,-3.0,-75.0,-41.0,-15.0,-8.0,2.0,6.0,2.0,0.0,-4.0,1.0,-1.0,1.38,-3.0,True,reversed
6873,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3521,3504,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,3504.0,0,0,2.53,1.56,6873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.97,3.0,False,reversed
6874,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,3463,1334,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,1334.0,1,3,2.37,1.63,6874,0.0,27.0,15.0,1.0,1.0,0.0,2.0,-10.0,-4.0,0.0,0.0,7.0,7.0,-5.0,0.0,-12.0,-11.0,-28.0,-8.0,1.0,0.0,17.0,4.0,-1.0,0.0,0.0,-1.0,-2.0,0.74,4.0,False,reversed
6875,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,2073,246,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,2073.0,3,14,1.48,2.75,6875,-7.0,-380.0,-234.0,-8.0,-8.0,-3.0,-1.0,-275.0,-128.0,0.0,-7.0,-251.0,-120.0,-32.0,-16.0,8.0,8.0,-126.0,-25.0,-15.0,-8.0,-134.0,-95.0,-4.0,0.0,-2.0,-5.0,-11.0,-1.27,2.0,True,reversed


In [26]:
print(df_combined_total.shape)
df_combined_total = df_combined_total[df_combined_total.isna().sum(axis=1) < 10]
print(df_combined_total.shape)
df_combined_total.to_pickle('../../df_combined_total.pkl')
df_combined_total

(13656, 51)
(13656, 51)


Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winnerId,f1_count_of_fights,f2_count_of_fights,f1_odds,f2_odds,fight_ID,cumsumwinner_difference,cumsum_hitsTotal_difference,cumsum_hitsSuccessful_difference,cumsum_takedownTotal_difference,cumsum_takedownSuccessful_difference,cumsum_submissionAttempts_difference,cumsum_takeovers_difference,cumsum_accentedHitsTotal_difference,cumsum_accentedHitsSuccessful_difference,cumsum_knockdowns_difference,cumsum_protectionPassage_difference,cumsum_hitsHeadTotal_difference,cumsum_hitsHeadSuccessful_difference,cumsum_hitsBodyTotal_difference,cumsum_hitsBodySuccessful_difference,cumsum_hitsLegsTotal_difference,cumsum_hitsLegsSuccessful_difference,cumsum_accentedHitsPositionDistanceTotal_difference,cumsum_accentedHitsPositionDistanceSuccessful_difference,cumsum_accentedHitsPositionClinchTotal_difference,cumsum_accentedHitsPositionClinchSuccessful_difference,cumsum_accentedHitsPositionParterTotal_difference,cumsum_accentedHitsPositionParterSuccessful_difference,cumsumwinMethods_[DEC]_difference,cumsumwinMethods_[DQ]_difference,cumsumwinMethods_[KO]_difference,cumsumwinMethods_[SUB]_difference,count_of_fights_difference,odds_difference,age_difference,winner,type
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,1646.0,0,1,,,0,-1.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-5.0,-3.0,0.0,0.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-3.0,-1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-1.0,0.0,-1.0,,-7.0,True,straight
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,1777.0,0,0,,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,True,straight
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,1923.0,0,2,,,2,-1.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-5.0,-3.0,0.0,0.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-3.0,-1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-1.0,-1.0,-2.0,,-11.0,False,straight
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,1646.0,0,2,,,3,-2.0,-11.0,-7.0,-4.0,-2.0,-1.0,0.0,-5.0,-1.0,0.0,-4.0,-2.0,-1.0,0.0,0.0,-3.0,0.0,-4.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,-2.0,-2.0,,2.0,False,straight
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,1646.0,1,0,,,4,1.0,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,,-3.0,True,straight
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6872,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,668,421,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,668.0,5,6,2.84,1.46,6872,-1.0,-75.0,-29.0,-1.0,-1.0,3.0,0.0,-88.0,-43.0,0.0,0.0,-46.0,-16.0,-38.0,-24.0,-4.0,-3.0,-75.0,-41.0,-15.0,-8.0,2.0,6.0,2.0,0.0,-4.0,1.0,-1.0,1.38,-3.0,True,reversed
6873,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3521,3504,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,3504.0,0,0,2.53,1.56,6873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.97,3.0,False,reversed
6874,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,3463,1334,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,1334.0,1,3,2.37,1.63,6874,0.0,27.0,15.0,1.0,1.0,0.0,2.0,-10.0,-4.0,0.0,0.0,7.0,7.0,-5.0,0.0,-12.0,-11.0,-28.0,-8.0,1.0,0.0,17.0,4.0,-1.0,0.0,0.0,-1.0,-2.0,0.74,4.0,False,reversed
6875,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,2073,246,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,2073.0,3,14,1.48,2.75,6875,-7.0,-380.0,-234.0,-8.0,-8.0,-3.0,-1.0,-275.0,-128.0,0.0,-7.0,-251.0,-120.0,-32.0,-16.0,8.0,8.0,-126.0,-25.0,-15.0,-8.0,-134.0,-95.0,-4.0,0.0,-2.0,-5.0,-11.0,-1.27,2.0,True,reversed


In [27]:
print(df_combined.shape)
df_combined = df_combined[df_combined.isna().sum(axis=1) < 10]
print(df_combined.shape)
df_combined.to_pickle('../../df_combined.pkl')
df_combined

(6877, 51)
(6828, 51)


Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winnerId,f1_count_of_fights,f2_count_of_fights,f1_odds,f2_odds,fight_ID,cumsumwinner_difference,cumsum_hitsTotal_difference,cumsum_hitsSuccessful_difference,cumsum_takedownTotal_difference,cumsum_takedownSuccessful_difference,cumsum_submissionAttempts_difference,cumsum_takeovers_difference,cumsum_accentedHitsTotal_difference,cumsum_accentedHitsSuccessful_difference,cumsum_knockdowns_difference,cumsum_protectionPassage_difference,cumsum_hitsHeadTotal_difference,cumsum_hitsHeadSuccessful_difference,cumsum_hitsBodyTotal_difference,cumsum_hitsBodySuccessful_difference,cumsum_hitsLegsTotal_difference,cumsum_hitsLegsSuccessful_difference,cumsum_accentedHitsPositionDistanceTotal_difference,cumsum_accentedHitsPositionDistanceSuccessful_difference,cumsum_accentedHitsPositionClinchTotal_difference,cumsum_accentedHitsPositionClinchSuccessful_difference,cumsum_accentedHitsPositionParterTotal_difference,cumsum_accentedHitsPositionParterSuccessful_difference,cumsumwinMethods_[DEC]_difference,cumsumwinMethods_[DQ]_difference,cumsumwinMethods_[KO]_difference,cumsumwinMethods_[SUB]_difference,count_of_fights_difference,odds_difference,age_difference,winner,type
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,1646.0,0,1,,,0,-1.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-5.0,-3.0,0.0,0.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-3.0,-1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-1.0,0.0,-1.0,,-7.0,True,straight
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,1777.0,0,0,,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,True,straight
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,1923.0,0,2,,,2,-1.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-5.0,-3.0,0.0,0.0,-5.0,-3.0,0.0,0.0,0.0,0.0,-3.0,-1.0,0.0,0.0,-2.0,-2.0,0.0,0.0,-1.0,-1.0,-2.0,,-11.0,False,straight
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,1646.0,0,2,,,3,-2.0,-11.0,-7.0,-4.0,-2.0,-1.0,0.0,-5.0,-1.0,0.0,-4.0,-2.0,-1.0,0.0,0.0,-3.0,0.0,-4.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,-2.0,-2.0,,2.0,False,straight
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,1646.0,1,0,,,4,1.0,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,,-3.0,True,straight
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6872,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,668.0,6,5,1.46,2.84,6872,1.0,75.0,29.0,1.0,1.0,-3.0,0.0,88.0,43.0,0.0,0.0,46.0,16.0,38.0,24.0,4.0,3.0,75.0,41.0,15.0,8.0,-2.0,-6.0,-2.0,0.0,4.0,-1.0,1.0,-1.38,3.0,False,straight
6873,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,3504.0,0,0,1.56,2.53,6873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.97,-3.0,True,straight
6874,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,1334.0,3,1,1.63,2.37,6874,0.0,-27.0,-15.0,-1.0,-1.0,0.0,-2.0,10.0,4.0,0.0,0.0,-7.0,-7.0,5.0,0.0,12.0,11.0,28.0,8.0,-1.0,0.0,-17.0,-4.0,1.0,0.0,0.0,1.0,2.0,-0.74,-4.0,True,straight
6875,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,2073.0,14,3,2.75,1.48,6875,7.0,380.0,234.0,8.0,8.0,3.0,1.0,275.0,128.0,0.0,7.0,251.0,120.0,32.0,16.0,-8.0,-8.0,126.0,25.0,15.0,8.0,134.0,95.0,4.0,0.0,2.0,5.0,11.0,1.27,-2.0,False,straight
