In [1]:
%reload_ext autoreload
%autoreload 2

import pandas as pd
import json
from pandas import json_normalize
from tqdm.notebook import tqdm

import numpy as np
import ast
from typing import Dict, List
import sys
import os

sys.path.append(os.path.join(sys.path[0], '../'))

f

pd.set_option('display.precision',6)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

In [5]:
fighters_df = pd.read_csv("../../data/0.fighters_raw.csv", index_col=0)
fighters_df["dateOfBirth"] = pd.to_datetime(fighters_df["dateOfBirth"])
fighters_cols = [
    "id",
    "name",
    "weight",
    "height",
    "armSpan",
    "legSwing",
    "weightCategory.id",
    "weightCategory.name",
    "dateOfBirth",
    "country",
    "city",
    "timezone",
]
fighters_df = fighters_df[fighters_cols]
fighters_df.set_index("id", inplace=True)
f_name_dict = fighters_df['name'].to_dict()
f_name_dict

### Исправляем поле `country` для бойцов из США
# У некоторых бойцов из США в поле `country` указан штат, а не страна. \
# Также заменяем написание `United States` на `USA`, чтобы название соответствовало данным из таблицы с боями.

usa_state_names = [
    "Alaska", "Alabama", "Arkansas", "American Samoa", "Arizona", "California", "Colorado", "Connecticut",
    "District ", "of Columbia", "Delaware", "Florida", "Georgia", "Guam", "Hawaii", "Iowa", "Idaho",
    "Illinois", "Indiana", "Kansas", "Kentucky", "Louisiana", "Massachusetts", "Maryland", "Maine",
    "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", "North Carolina", "North Dakota", 
    "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York", "Ohio", "Oklahoma", 
    "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", 
    "Texas", "Utah", "Virginia", "Virgin Islands", "Vermont", "Washington", "Wisconsin", "West Virginia", 
    "Wyoming",
]

fighters_df.loc[fighters_df["country"] == "United States", "country"] = "USA"
fighters_df.loc[fighters_df["country"].isin(usa_state_names), "country"] = "USA"


### Выбросы размаха ног меняем на NaN, для дальнейшей обработки
fighters_df.replace(fighters_df.legSwing.max(), np.nan, inplace=True)
fighters_df.replace(fighters_df.legSwing.min(), np.nan, inplace=True)

### Убираем строки с выбросами роста
fighters_df = fighters_df[fighters_df['height'] < 230]
fighters_df = fighters_df[fighters_df['height'] > 145]

### Убираем строки с выбросами веса
fighters_df = fighters_df[fighters_df['weight'] > 47]
fighters_df = fighters_df[fighters_df['weight'] < 250]

### Находим все возможные весовые категории
avg_weight_in_weight_category = fighters_df.groupby(by="weightCategory.id").mean()['weight']
avg_weight_in_weight_category


fighters_df['armSpan'] = fighters_df.apply(
    lambda row: replace_null_arm_span_to_height(row),
    axis=1)

fighters_df['height'] = fighters_df.apply(
    lambda row: replace_null_height_to_arm_span(row),
    axis=1)

### Убираем пустые значения размаха ног, средним по колонке
fighters_df['legSwing'].fillna(np.round(fighters_df['legSwing'].mean(), 1), inplace=True)


## Предобработка данных о боях

In [6]:
events_df = pd.read_csv("../../data/0.events_raw.csv", index_col=0)
events_df["eventDate.date"] = pd.to_datetime(events_df["eventDate.date"])
events_df.reset_index(inplace=True, drop=True)

### Убираем строки с незавершенными боями и боями, где отсутствует `winnerId`
events_df.drop(events_df[events_df["completed"] == False].index, inplace=True)
events_df.drop(events_df[events_df["winnerId"].isna()].index, inplace=True)

### Убираем строки, где `winnerId` не совпадает с айди ни одного из бойцов
events_df = events_df[~((events_df["winnerId"] != events_df["fighterId_1"]) & (events_df["winnerId"] != events_df["fighterId_2"]))]

### Удаляем лишние колонки
events_df.drop(
    columns=["completed", "eventDate.timezone_type", "link"],
    inplace=True,
)

### Извлекаем данные из колонок `avgOdds` и `fighters`
def parse_odds(row: pd.Series) -> pd.Series:
    """
    Parse 'avgOdds' column.
    :param row: Row of the events dataframe.
    :return: pd.Series with odds for the 1st and the 2nd fighters.
    """
    avg_odds = row["avgOdds"]
    if avg_odds == "[]" or avg_odds == np.nan:
        return pd.Series([np.nan] * 2)
    avg_odds = ast.literal_eval(avg_odds)
    if avg_odds[0]["fighterId"] == row["fighterId_1"]:
        return pd.Series([f.get("value", np.nan) for f in avg_odds])
    else:
        return pd.Series([f.get("value", np.nan) for f in reversed(avg_odds)])
    
events_df[["f1_odds", "f2_odds"]] = events_df[["avgOdds", "fighterId_1", "fighterId_2"]]\
        .apply(lambda row: parse_odds(row), axis=1)

events_df = events_df.drop(columns="avgOdds")
events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,fighters,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,"[{'fighterId': 1777, 'fightStats': {'hitsTotal...",5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,"[{'fighterId': 1908, 'fightStats': {'hitsTotal...",5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,"[{'fighterId': 1631, 'fightStats': {'hitsTotal...",5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,"[{'fighterId': 1646, 'fightStats': {'hitsTotal...",5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,"[{'fighterId': 421, 'fightStats': {'hitsTotal'...",40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,"[{'fighterId': 3504, 'fightStats': {'hitsTotal...",40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,"[{'fighterId': 1334, 'fightStats': {'hitsTotal...",40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,"[{'fighterId': 246, 'fightStats': {'hitsTotal'...",40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48


In [6]:
#### Парсим колонку `fighters`

fighter_stats_keys = [
    "hitsTotal",
    "hitsSuccessful",
    "takedownTotal",
    "takedownSuccessful",
    "submissionAttempts",
    "takeovers",
    "accentedHitsTotal",
    "accentedHitsSuccessful",
    "knockdowns",
    "protectionPassage",
    "hitsHeadTotal",
    "hitsHeadSuccessful",
    "hitsBodyTotal",
    "hitsBodySuccessful",
    "hitsLegsTotal",
    "hitsLegsSuccessful",
    "accentedHitsPositionDistanceTotal",
    "accentedHitsPositionDistanceSuccessful",
    "accentedHitsPositionClinchTotal",
    "accentedHitsPositionClinchSuccessful",
    "accentedHitsPositionParterTotal",
    "accentedHitsPositionParterSuccessful",
]


def get_fighter_stats_cols() -> List[str]:
    """
    Get list of fight stats column names for each fighter.
    :return: List of column names with 'f1_' prefix
    for the first fighter and 'f2_' prefix for the second.
    """
    fighter_stats_cols = []
    for i in range(1, 3):
        for k in fighter_stats_keys:
            fighter_stats_cols.append(f"f{i}_{k}")
    return fighter_stats_cols


def sum_round_stats(stats: List[Dict[str, int]]) -> List[int]:
    """
    Sum stats for a fighter for all rounds of a fight.
    :param stats: List with stats from object of 'fighters' column.
    :return: Stats for all rounds for a fighter as a list.
    """
    if len(stats) == 0:
        return [np.nan for _ in range(len(fighter_stats_keys))]
    res = {k: 0 for k in fighter_stats_keys}
    for i in stats:
        for k in res:
            res[k] = i.get(k, 0)
    return list(res.values())


def parse_fight_data(row: pd.Series) -> pd.Series:
    """
    Parse 'fighters' column.
    :param row: Row of the events dataframe.
    :return: pd.Series with stats for both fighters.
    """
    fighters = row["fighters"]
    if fighters == "[]" or fighters == np.nan:
        return pd.Series([np.nan for _ in range(len(fighter_stats_keys))])
    cols = []
    fighters = ast.literal_eval(fighters)
    if fighters[0]["fighterId"] == row["fighterId_2"]:
        fighters = reversed(fighters)
    for f in fighters:
        cols.extend(sum_round_stats(f["roundStats"]))
    return pd.Series(cols)

events_df[get_fighter_stats_cols()] = events_df[
    ["fighters", "fighterId_1", "fighterId_2"]
].apply(lambda row: parse_fight_data(row), axis=1)
events_df.drop(columns="fighters", inplace=True)
events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0


In [7]:
### Добавляем данные о бойцах в датафрейм с боями

fighter_data_cols = fighters_df.drop(columns=["weightCategory.id", "weightCategory.name"]).columns
# ['name', 'weight', 'height', 'armSpan', 'legSwing', 'dateOfBirth',
#        'country', 'city', 'timezone']

events_df = events_df.join(fighters_df[fighter_data_cols].add_prefix("f1_"), 
                           on="fighterId_1")

events_df = events_df.join(fighters_df[fighter_data_cols].add_prefix("f2_"), 
                           on="fighterId_2")

events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York


In [8]:
### Добавляем признак `age`
def add_age(row: pd.Series) -> pd.Series:
    """
    Add age for both fighters.
    :param row: Row of the events dataframe.
    :return: pd.Series with age of fighters in years.
    """
    result = []
    for prefix in ["f1_", "f2_"]:
        try:
            age = row["eventDate.date"].year - row[prefix + "dateOfBirth"].year
        except Exception:
            age = np.nan
        result.append(age)
    return pd.Series(result)


events_df[["f1_age", "f2_age"]] = events_df[["eventDate.date", "f1_dateOfBirth", "f2_dateOfBirth"]]\
    .apply(lambda row: add_age(row), axis=1)

events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone,f1_age,f2_age
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,27.0,34.0
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York,24.0,23.0
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,23.0,34.0
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,29.0,27.0
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York,27.0,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo,29.0,26.0
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,,29.0,32.0
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,,33.0,37.0
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York,27.0,29.0


In [9]:
### Добавляем признаки `isHomeCity`, `isHomeCountry`, `isHomeTimezone`
# Возможные значения переменных: 0 и 1 \
# `isHomeCity` - боец дерется в родном городе \
# `isHomeCountry` - боец дерется в родной стране \
# `isHomeTimezone` - боец дерется в своем часовом поясе

def get_territorial_cols() -> List[str]:
    """
    Get list of territorial column names for each fighter.
    :return: List of column names with 'f1_' prefix
    for the first fighter and 'f2_' prefix for the second.
    """
    result = []
    for prefix in ["f1_", "f2_"]:
        for key in ["isHomeCity", "isHomeCountry", "isHomeTimezone"]:
            result.append(prefix + key)
    return result


def fill_territorial_cols(row: pd.Series) -> pd.Series:
    """
    Add binary features 'isHomeCity', 'isHomeCountry', 'isHomeTimezone'
    for each fighter.
    :param row: Row of the events dataframe.
    :return: pd.Series with features for both fighters.
    """
    result = []
    for prefix in ["f1_", "f2_"]:
        for key in ["city", "country", "timezone"]:
            result.append(int(row[key] == row[prefix + key]))
    return pd.Series(result)

events_df[get_territorial_cols()] = events_df.apply(
    lambda row: fill_territorial_cols(row), axis=1
)
events_df

Unnamed: 0,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone,f1_age,f2_age,f1_isHomeCity,f1_isHomeCountry,f1_isHomeTimezone,f2_isHomeCity,f2_isHomeCountry,f2_isHomeTimezone
0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,27.0,34.0,0,0,0,0,0,0
1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York,24.0,23.0,0,1,0,0,1,0
2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,23.0,34.0,0,1,0,0,0,0
3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,29.0,27.0,0,1,0,0,0,0
4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York,27.0,30.0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo,29.0,26.0,0,1,0,0,0,0
7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,,29.0,32.0,0,0,0,0,0,0
7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,,33.0,37.0,0,1,0,0,0,0
7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York,27.0,29.0,0,0,0,0,1,0


### **Какую статистику необходимо посчитать накопительным итогом к бою:**
- **winning_streak** - сумма побед **подряд** по одному бойцу (серия побед) count of winnerId in sequence group by winnerId
- **wins_by_knockowt** - сумма побед нокаутами  count winMethods == ['KO']  group by winnerId
* **wins_by_submissions** - сумма чистых побед (болевой прием, который приводит к сдаче соперника) count winMethods == ['SUB'] group by winnerId
* **striking_accuracy** - точность ударов sum(f1_accentedHitsSuccessful)/sum(f1_accentedHitsTotal) or sum(f2_accentedHitsSuccessful)/sum(f2_accentedHitsTotal)  group by fighterId_1 or fighterId_2
* **Strikes_Landed** - нанесено акцентовых ударов sum(f1_accentedHitsSuccessful) or sum(f2_accentedHitsSuccessful) group by fighterId_1 or fighterId_2
* **Strikes_Attempted** - выброшено акцентовых ударов sum(f1_accentedHitsTotal) or sum(f2_accentedHitsTotal) group by fighterId_1 or fighterId_2
* **grappling_accuracy** - статистика в борьбе sum(f1_takedownSuccessful)/sum(f1_takedownTotal) or sum(f2_takedownSuccessful)/sum(f2_takedownTotal)  group by fighterId_1 or fighterId_2
* **takwdowns_landed** - Тейкдаунов выполнено sum(f1_takedownSuccessful) or sum(f2_takedownSuccessful) group by fighterId_1 or fighterId_2
* **Takedowns Attempted** - попыток Тейкдаунов sum(f1_takedownTotal) or sum(f2_takedownTotal) group by fighterId_1 or fighterId_2
* **Knockdown_ratio** - НОКДАУНОВ ЗА БОЙ/СРЕД. sum(f1_knockdowns)/count of figthts or sum(f2_knockdowns)/count of figthts group by fighterId_1 or fighterId_2
* **AVG_fight_time** - СРЕДНЕЕ ВРЕМЯ БОЯ sum(duration)/count of figthts group by fighterId_1 or fighterId_2
* **KO\TKO** - ко \ все победы
* **stricing_sucss_per_duration** - sum(Strikes_Landed) * 60 \sum(duration)

### Подготовка датафрэйма для кумулятивной суммы по статистике бойцов

In [10]:
events_df = events_df.reset_index()
events_df

Unnamed: 0,index,city,country,duration,eventDate.date,eventDate.timezone,fighterId_1,fighterId_2,id,name,rounds,timezone,weightCategory.id,weightCategory.name,winMethods,winnerId,f1_odds,f2_odds,f1_hitsTotal,f1_hitsSuccessful,f1_takedownTotal,f1_takedownSuccessful,f1_submissionAttempts,f1_takeovers,f1_accentedHitsTotal,f1_accentedHitsSuccessful,f1_knockdowns,f1_protectionPassage,f1_hitsHeadTotal,f1_hitsHeadSuccessful,f1_hitsBodyTotal,f1_hitsBodySuccessful,f1_hitsLegsTotal,f1_hitsLegsSuccessful,f1_accentedHitsPositionDistanceTotal,f1_accentedHitsPositionDistanceSuccessful,f1_accentedHitsPositionClinchTotal,f1_accentedHitsPositionClinchSuccessful,f1_accentedHitsPositionParterTotal,f1_accentedHitsPositionParterSuccessful,f2_hitsTotal,f2_hitsSuccessful,f2_takedownTotal,f2_takedownSuccessful,f2_submissionAttempts,f2_takeovers,f2_accentedHitsTotal,f2_accentedHitsSuccessful,f2_knockdowns,f2_protectionPassage,f2_hitsHeadTotal,f2_hitsHeadSuccessful,f2_hitsBodyTotal,f2_hitsBodySuccessful,f2_hitsLegsTotal,f2_hitsLegsSuccessful,f2_accentedHitsPositionDistanceTotal,f2_accentedHitsPositionDistanceSuccessful,f2_accentedHitsPositionClinchTotal,f2_accentedHitsPositionClinchSuccessful,f2_accentedHitsPositionParterTotal,f2_accentedHitsPositionParterSuccessful,f1_name,f1_weight,f1_height,f1_armSpan,f1_legSwing,f1_dateOfBirth,f1_country,f1_city,f1_timezone,f2_name,f2_weight,f2_height,f2_armSpan,f2_legSwing,f2_dateOfBirth,f2_country,f2_city,f2_timezone,f1_age,f2_age,f1_isHomeCity,f1_isHomeCountry,f1_isHomeTimezone,f2_isHomeCity,f2_isHomeCountry,f2_isHomeTimezone
0,0,Denver,USA,104.0,1993-11-12,Europe/Berlin,1646,1923,5201,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,4.0,3.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,27.0,34.0,0,0,0,0,0,0
1,1,Denver,USA,52.0,1993-11-12,Europe/Berlin,1777,1883,5202,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1777.0,,,9.0,3.0,2.0,1.0,1.0,0.0,9.0,3.0,0.0,1.0,7.0,3.0,1.0,0.0,1.0,0.0,9.0,3.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,5.0,0.0,1.0,0.0,1.0,1.0,7.0,1.0,0.0,0.0,0.0,0.0,Jason DeLucia,86.18,180.34,180.34,101.70,1969-07-24,USA,,America/New_York,Trent Jenkins,83.91,187.96,187.96,101.7,1970-01-01,USA,,America/New_York,24.0,23.0,0,1,0,0,1,0
2,2,Denver,USA,59.0,1993-11-12,Europe/Berlin,1908,1923,5203,UFC 1,1.0,America/Denver,9,Тяжелый вес,['KO'],1923.0,,,3.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,0.0,0.0,17.0,11.0,0.0,0.0,13.0,7.0,1.0,1.0,3.0,3.0,8.0,5.0,0.0,0.0,9.0,6.0,Kevin Rosier,124.74,193.04,193.04,101.70,1970-01-01,USA,,America/New_York,Gerard Gordeau,97.98,195.58,195.58,101.7,1959-03-30,Netherlands,,Europe/Amsterdam,23.0,34.0,0,1,0,0,0,0
3,3,Denver,USA,57.0,1993-11-12,Europe/Berlin,1631,1646,5204,UFC 1,1.0,America/Denver,8,Полутяжелый вес,['SUB'],1646.0,,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,12.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ken Shamrock,92.99,185.42,182.88,101.70,1964-02-11,USA,Macon,America/New_York,Royce Gracie,79.38,185.42,185.42,101.7,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,29.0,27.0,0,1,0,0,0,0
4,4,Denver,USA,138.0,1993-11-12,Europe/Berlin,1646,1924,5205,UFC 1,1.0,America/Denver,7,Средний вес,['SUB'],1646.0,,,7.0,4.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Royce Gracie,79.38,185.42,185.42,101.70,1966-12-12,Brazil,Rio de Janeiro,America/Sao_Paulo,Art Jimmerson,88.90,185.42,185.42,101.7,1963-08-04,USA,,America/New_York,27.0,30.0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6872,7090,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,421,668,40497,UFC Fight Night,3.0,America/Los_Angeles,5,Легкий вес,['DEC'],668.0,1.46,2.84,63.0,17.0,0.0,0.0,0.0,0.0,63.0,17.0,0.0,0.0,45.0,6.0,10.0,3.0,8.0,8.0,63.0,17.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,0.0,0.0,71.0,16.0,0.0,0.0,62.0,12.0,3.0,1.0,6.0,3.0,71.0,16.0,0.0,0.0,0.0,0.0,Alexander Hernandez,70.31,175.26,182.88,100.33,1992-10-01,USA,,America/New_York,Thiago Moises,70.31,175.26,177.80,101.7,1995-03-23,Brazil,Idaiatuba,America/Sao_Paulo,29.0,26.0,0,1,0,0,0,0
6873,7091,Las Vegas,USA,158.0,2021-02-27,Europe/Berlin,3504,3521,40498,UFC Fight Night,3.0,America/Los_Angeles,3,Легчайший вес,['KO'],3504.0,1.56,2.53,90.0,72.0,1.0,1.0,0.0,2.0,68.0,50.0,0.0,0.0,64.0,47.0,3.0,2.0,1.0,1.0,3.0,2.0,0.0,0.0,65.0,48.0,4.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,Ronnie Lawrence,61.24,172.72,172.72,101.70,1992-06-13,,,,Vince Cachero,65.77,167.64,172.72,101.7,1989-11-07,,,,29.0,32.0,0,0,0,0,0,0
6874,7093,Las Vegas,USA,300.0,2021-02-27,Europe/Berlin,1334,3463,40500,UFC Fight Night,3.0,America/Los_Angeles,8,Полутяжелый вес,['DEC'],1334.0,1.63,2.37,61.0,29.0,0.0,0.0,0.0,0.0,57.0,26.0,0.0,0.0,40.0,15.0,3.0,2.0,14.0,9.0,57.0,26.0,0.0,0.0,0.0,0.0,78.0,30.0,0.0,0.0,0.0,0.0,75.0,27.0,0.0,0.0,56.0,12.0,4.0,2.0,15.0,13.0,75.0,27.0,0.0,0.0,0.0,0.0,Dustin Jacoby,83.91,193.04,198.12,101.70,1988-04-04,USA,Fort Morgan,America/New_York,Maxim Grishin,92.99,190.50,190.50,101.7,1984-05-02,,,,33.0,37.0,0,1,0,0,0,0
6875,7098,Las Vegas,USA,208.0,2021-03-06,Europe/Berlin,246,2073,40452,UFC 259,3.0,America/Los_Angeles,6,Полусредний вес,['SUB'],2073.0,2.75,1.48,33.0,5.0,0.0,0.0,0.0,0.0,32.0,4.0,0.0,0.0,28.0,3.0,3.0,0.0,1.0,1.0,32.0,4.0,0.0,0.0,0.0,0.0,67.0,54.0,0.0,0.0,1.0,1.0,23.0,17.0,0.0,0.0,18.0,12.0,0.0,0.0,5.0,5.0,11.0,6.0,0.0,0.0,12.0,11.0,Jake Matthews,77.11,180.34,185.42,109.22,1994-08-19,Australia,,Australia/Brisbane,Sean Brady,77.11,175.26,175.26,101.7,1992-11-23,USA,,America/New_York,27.0,29.0,0,0,0,0,1,0


In [11]:
stats_events_summary = events_df.copy()
stats_events_summary['winner_1'] = stats_events_summary['winnerId'] == stats_events_summary['fighterId_1'] 
stats_events_summary['winner_2'] = stats_events_summary['winnerId'] == stats_events_summary['fighterId_2'] 
stats_events_summary['winnerId'] = stats_events_summary['winnerId'] 
stats_events_summary = pd.get_dummies(stats_events_summary, columns = ['winMethods'], dtype=int)
stats_events_summary.columns = stats_events_summary.columns.str.replace('\'','')
fighter1_events = stats_events_summary[['eventDate.date','fighterId_1','duration','winner_1','f1_hitsTotal', 'f1_hitsSuccessful', 'f1_takedownTotal',
       'f1_takedownSuccessful', 'f1_submissionAttempts', 'f1_takeovers',
       'f1_accentedHitsTotal', 'f1_accentedHitsSuccessful', 'f1_knockdowns',
       'f1_protectionPassage', 'f1_hitsHeadTotal', 'f1_hitsHeadSuccessful',
       'f1_hitsBodyTotal', 'f1_hitsBodySuccessful', 'f1_hitsLegsTotal',
       'f1_hitsLegsSuccessful', 'f1_accentedHitsPositionDistanceTotal',
       'f1_accentedHitsPositionDistanceSuccessful',
       'f1_accentedHitsPositionClinchTotal',
       'f1_accentedHitsPositionClinchSuccessful',
       'f1_accentedHitsPositionParterTotal',
       'f1_accentedHitsPositionParterSuccessful','winMethods_[DEC]', 'winMethods_[DQ]', 'winMethods_[KO]',
       'winMethods_[SUB]', 'f1_odds', 'f1_age', 'winnerId']]
fighter1_events[['fighter_nbr']] = 1 # add what order was in event df

fighter2_events = stats_events_summary[['eventDate.date','fighterId_2','duration','winner_2','f2_hitsTotal',
       'f2_hitsSuccessful', 'f2_takedownTotal', 'f2_takedownSuccessful',
       'f2_submissionAttempts', 'f2_takeovers', 'f2_accentedHitsTotal',
       'f2_accentedHitsSuccessful', 'f2_knockdowns', 'f2_protectionPassage',
       'f2_hitsHeadTotal', 'f2_hitsHeadSuccessful', 'f2_hitsBodyTotal',
       'f2_hitsBodySuccessful', 'f2_hitsLegsTotal', 'f2_hitsLegsSuccessful',
       'f2_accentedHitsPositionDistanceTotal',
       'f2_accentedHitsPositionDistanceSuccessful',
       'f2_accentedHitsPositionClinchTotal',
       'f2_accentedHitsPositionClinchSuccessful',
       'f2_accentedHitsPositionParterTotal',
       'f2_accentedHitsPositionParterSuccessful','winMethods_[DEC]', 'winMethods_[DQ]', 'winMethods_[KO]',
       'winMethods_[SUB]', 'f2_odds', 'f2_age', 'winnerId']]
fighter2_events[['fighter_nbr']] = 2 # add what order was in event df

supl_cols = ['eventDate.date', 'fighterId', 'fighter_nbr', 'odds', 'age', 'winnerId']

current_col_names =  ['current_duration', 'winner', 'current_hitsTotal', 
             'current_hitsSuccessful',  'current_takedownTotal',  'current_takedownSuccessful', 
             'current_submissionAttempts',  'current_takeovers',  'current_accentedHitsTotal', 
             'current_accentedHitsSuccessful',  'current_knockdowns',  'current_protectionPassage', 
             'current_hitsHeadTotal',  'current_hitsHeadSuccessful',  'current_hitsBodyTotal', 
             'current_hitsBodySuccessful',  'current_hitsLegsTotal',  'current_hitsLegsSuccessful', 
             'current_accentedHitsPositionDistanceTotal', 'current_accentedHitsPositionDistanceSuccessful', 
             'current_accentedHitsPositionClinchTotal', 'current_accentedHitsPositionClinchSuccessful', 
             'current_accentedHitsPositionParterTotal', 'current_accentedHitsPositionParterSuccessful', 
             'current_winMethods_[DEC]', 'current_winMethods_[DQ]', 'current_winMethods_[KO]', 
             'current_winMethods_[SUB]']

_renamecols =  ['eventDate.date', 'fighterId', 'current_duration', 'winner', 'current_hitsTotal', 
             'current_hitsSuccessful',  'current_takedownTotal',  'current_takedownSuccessful', 
             'current_submissionAttempts',  'current_takeovers',  'current_accentedHitsTotal', 
             'current_accentedHitsSuccessful',  'current_knockdowns',  'current_protectionPassage', 
             'current_hitsHeadTotal',  'current_hitsHeadSuccessful',  'current_hitsBodyTotal', 
             'current_hitsBodySuccessful',  'current_hitsLegsTotal',  'current_hitsLegsSuccessful', 
             'current_accentedHitsPositionDistanceTotal', 'current_accentedHitsPositionDistanceSuccessful', 
             'current_accentedHitsPositionClinchTotal', 'current_accentedHitsPositionClinchSuccessful', 
             'current_accentedHitsPositionParterTotal', 'current_accentedHitsPositionParterSuccessful', 
             'current_winMethods_[DEC]', 'current_winMethods_[DQ]', 'current_winMethods_[KO]', 
             'current_winMethods_[SUB]', 'odds', 'age', 'winnerId', 'fighter_nbr']



fighter1_events.columns = _renamecols
fighter2_events.columns = _renamecols
f_stats_events_summ = pd.concat([fighter1_events, fighter2_events])
f_stats_events_summ.sort_values(by = ['fighterId','eventDate.date'], axis=0, inplace = True) # df with all firters ordered by ('fighterId','eventDate.date')
f_stats_events_summ[['eventDate.date']]

Unnamed: 0,eventDate.date
6266,2019-10-19 04:00:00
6353,2019-12-21 00:00:00
6532,2020-06-27 00:00:00
6585,2020-07-25 00:00:00
6733,2020-11-07 00:00:00
...,...
6820,2021-01-20 00:00:00
6806,2021-01-16 00:00:00
6836,2021-02-06 00:00:00
6813,2021-01-20 00:00:00


### Считаем накопительную статистику для всех бойцов:

In [12]:
def add_cumulative_sum_prod(df:pd.DataFrame, current_col_names:List[str], supl_cols) -> pd.DataFrame:
    """
    Add cumulative sum for previous fights for input columns list to input df.
    :param df: input DF (should be sorted by fighter and date)
    :param columns: Column names of the dataframe.
    :return: pd.DataFrame with cumulative sum for previous fights for input columns list.
    """
    new_cumsum_colnames = []
    df_cumulative = pd.DataFrame(index=df.index)
    for column in current_col_names:
        print('cumulative:', column)
        if 'current_' in column:
            col_name = column.replace('current_', 'cumsum_')
        else:
            col_name = 'cumsum_' + column
            
        new_cumsum_colnames.append(col_name)
        df_cumulative[col_name] = df.groupby('fighterId')[column].cumsum()
        
    df_cumulative[current_col_names+['age']] = df[current_col_names+['age']]
    df_cumulative[supl_cols] = df[supl_cols]

    return df_cumulative, new_cumsum_colnames

f_stats_events_cumulative, new_cumsum_colnames = add_cumulative_sum_prod(f_stats_events_summ, current_col_names, supl_cols) # df c накопленной суммой
#count_of_fights
f_stats_events_cumulative['count_of_fights'] = f_stats_events_cumulative.groupby('fighterId')['fighter_nbr'].cumcount()
f_stats_events_cumulative

cumulative: current_duration
cumulative: winner
cumulative: current_hitsTotal
cumulative: current_hitsSuccessful
cumulative: current_takedownTotal
cumulative: current_takedownSuccessful
cumulative: current_submissionAttempts
cumulative: current_takeovers
cumulative: current_accentedHitsTotal
cumulative: current_accentedHitsSuccessful
cumulative: current_knockdowns
cumulative: current_protectionPassage
cumulative: current_hitsHeadTotal
cumulative: current_hitsHeadSuccessful
cumulative: current_hitsBodyTotal
cumulative: current_hitsBodySuccessful
cumulative: current_hitsLegsTotal
cumulative: current_hitsLegsSuccessful
cumulative: current_accentedHitsPositionDistanceTotal
cumulative: current_accentedHitsPositionDistanceSuccessful
cumulative: current_accentedHitsPositionClinchTotal
cumulative: current_accentedHitsPositionClinchSuccessful
cumulative: current_accentedHitsPositionParterTotal
cumulative: current_accentedHitsPositionParterSuccessful
cumulative: current_winMethods_[DEC]
cumulati

Unnamed: 0,cumsum_duration,cumsum_winner,cumsum_hitsTotal,cumsum_hitsSuccessful,cumsum_takedownTotal,cumsum_takedownSuccessful,cumsum_submissionAttempts,cumsum_takeovers,cumsum_accentedHitsTotal,cumsum_accentedHitsSuccessful,cumsum_knockdowns,cumsum_protectionPassage,cumsum_hitsHeadTotal,cumsum_hitsHeadSuccessful,cumsum_hitsBodyTotal,cumsum_hitsBodySuccessful,cumsum_hitsLegsTotal,cumsum_hitsLegsSuccessful,cumsum_accentedHitsPositionDistanceTotal,cumsum_accentedHitsPositionDistanceSuccessful,cumsum_accentedHitsPositionClinchTotal,cumsum_accentedHitsPositionClinchSuccessful,cumsum_accentedHitsPositionParterTotal,cumsum_accentedHitsPositionParterSuccessful,cumsum_winMethods_[DEC],cumsum_winMethods_[DQ],cumsum_winMethods_[KO],cumsum_winMethods_[SUB],current_duration,winner,current_hitsTotal,current_hitsSuccessful,current_takedownTotal,current_takedownSuccessful,current_submissionAttempts,current_takeovers,current_accentedHitsTotal,current_accentedHitsSuccessful,current_knockdowns,current_protectionPassage,current_hitsHeadTotal,current_hitsHeadSuccessful,current_hitsBodyTotal,current_hitsBodySuccessful,current_hitsLegsTotal,current_hitsLegsSuccessful,current_accentedHitsPositionDistanceTotal,current_accentedHitsPositionDistanceSuccessful,current_accentedHitsPositionClinchTotal,current_accentedHitsPositionClinchSuccessful,current_accentedHitsPositionParterTotal,current_accentedHitsPositionParterSuccessful,current_winMethods_[DEC],current_winMethods_[DQ],current_winMethods_[KO],current_winMethods_[SUB],age,eventDate.date,fighterId,fighter_nbr,odds,winnerId,count_of_fights
6266,300.0,1,51.0,35.0,0.0,0.0,0.0,0.0,51.0,35.0,0.0,0.0,25.0,11.0,15.0,15.0,11.0,9.0,51.0,35.0,0.0,0.0,0.0,0.0,0,0,0,0,300.0,True,51.0,35.0,0.0,0.0,0.0,0.0,51.0,35.0,0.0,0.0,25.0,11.0,15.0,15.0,11.0,9.0,51.0,35.0,0.0,0.0,0.0,0.0,0,0,0,0,28.0,2019-10-19 04:00:00,1,1,1.65,1.0,0
6353,600.0,1,88.0,48.0,0.0,0.0,0.0,0.0,88.0,48.0,0.0,0.0,55.0,17.0,18.0,18.0,15.0,13.0,88.0,48.0,0.0,0.0,0.0,0.0,0,0,0,0,300.0,False,37.0,13.0,0.0,0.0,0.0,0.0,37.0,13.0,0.0,0.0,30.0,6.0,3.0,3.0,4.0,4.0,37.0,13.0,0.0,0.0,0.0,0.0,0,0,0,0,28.0,2019-12-21 00:00:00,1,1,5.52,103.0,1
6532,761.0,2,110.0,63.0,0.0,0.0,0.0,0.0,110.0,63.0,0.0,1.0,67.0,23.0,20.0,20.0,23.0,20.0,109.0,62.0,0.0,0.0,1.0,1.0,0,0,1,0,161.0,True,22.0,15.0,0.0,0.0,0.0,0.0,22.0,15.0,0.0,1.0,12.0,6.0,2.0,2.0,8.0,7.0,21.0,14.0,0.0,0.0,1.0,1.0,0,0,1,0,29.0,2020-06-27 00:00:00,1,1,,1.0,2
6585,917.0,3,146.0,91.0,0.0,0.0,0.0,0.0,146.0,91.0,0.0,1.0,87.0,35.0,26.0,26.0,33.0,30.0,133.0,81.0,0.0,0.0,13.0,10.0,0,0,2,0,156.0,True,36.0,28.0,0.0,0.0,0.0,0.0,36.0,28.0,0.0,0.0,20.0,12.0,6.0,6.0,10.0,10.0,24.0,19.0,0.0,0.0,12.0,9.0,0,0,1,0,29.0,2020-07-25 00:00:00,1,1,,1.0,3
6733,1217.0,3,201.0,120.0,0.0,0.0,0.0,0.0,201.0,120.0,0.0,1.0,117.0,42.0,29.0,27.0,55.0,51.0,187.0,110.0,1.0,0.0,13.0,10.0,1,0,2,0,300.0,False,55.0,29.0,0.0,0.0,0.0,0.0,55.0,29.0,0.0,0.0,30.0,7.0,3.0,1.0,22.0,21.0,54.0,29.0,1.0,0.0,0.0,0.0,1,0,0,0,29.0,2020-11-07 00:00:00,1,1,1.29,277.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6820,248.0,1,49.0,25.0,1.0,1.0,0.0,0.0,45.0,21.0,0.0,0.0,38.0,17.0,4.0,2.0,3.0,2.0,40.0,20.0,2.0,0.0,3.0,1.0,0,0,1,0,248.0,True,49.0,25.0,1.0,1.0,0.0,0.0,45.0,21.0,0.0,0.0,38.0,17.0,4.0,2.0,3.0,2.0,40.0,20.0,2.0,0.0,3.0,1.0,0,0,1,0,31.0,2021-01-20 00:00:00,3664,2,1.50,3664.0,0
6806,300.0,1,93.0,42.0,2.0,0.0,0.0,0.0,93.0,42.0,0.0,0.0,47.0,13.0,26.0,15.0,20.0,14.0,90.0,41.0,3.0,1.0,0.0,0.0,1,0,0,0,300.0,True,93.0,42.0,2.0,0.0,0.0,0.0,93.0,42.0,0.0,0.0,47.0,13.0,26.0,15.0,20.0,14.0,90.0,41.0,3.0,1.0,0.0,0.0,1,0,0,0,26.0,2021-01-16 00:00:00,3666,2,1.93,3666.0,0
6836,600.0,1,133.0,72.0,3.0,0.0,0.0,0.0,107.0,50.0,0.0,0.0,55.0,17.0,27.0,15.0,25.0,18.0,100.0,46.0,4.0,2.0,3.0,2.0,2,0,0,0,300.0,False,40.0,30.0,1.0,0.0,0.0,0.0,14.0,8.0,0.0,0.0,8.0,4.0,1.0,0.0,5.0,4.0,10.0,5.0,1.0,1.0,3.0,2.0,1,0,0,0,26.0,2021-02-06 00:00:00,3666,2,2.70,3045.0,1
6813,240.0,0,11.0,7.0,0.0,0.0,0.0,0.0,8.0,4.0,0.0,0.0,7.0,3.0,0.0,0.0,1.0,1.0,6.0,3.0,2.0,1.0,0.0,0.0,0,0,0,1,240.0,False,11.0,7.0,0.0,0.0,0.0,0.0,8.0,4.0,0.0,0.0,7.0,3.0,0.0,0.0,1.0,1.0,6.0,3.0,2.0,1.0,0.0,0.0,0,0,0,1,29.0,2021-01-20 00:00:00,3667,2,4.75,353.0,0


In [13]:
q = f_stats_events_cumulative[f_stats_events_cumulative['fighter_nbr'] == 1]
q['fighterId'].value_counts()

269     31
277     31
101     29
129     27
12      27
        ..
1789     1
1813     1
1829     1
1837     1
2043     1
Name: fighterId, Length: 1590, dtype: int64

In [14]:
### Calculate accuracy cols
for_accuracy_cols = ['hits', 'takedown', 'accentedHits', 'hitsHead', 'hitsBody', 'hitsLegs', 'accentedHitsPositionDistance', 
 'accentedHitsPositionClinch', 'accentedHitsPositionParter']
new_accuracy_cols = []
for col in for_accuracy_cols:
    new_accuracy_col = f'{col}_accuracy' 
    
    new_accuracy_cols.append(new_accuracy_col)
    f_stats_events_cumulative[new_accuracy_col] = f_stats_events_cumulative[f'cumsum_{col}Successful']/f_stats_events_cumulative[f'cumsum_{col}Total']

new_accuracy_cols

['hits_accuracy',
 'takedown_accuracy',
 'accentedHits_accuracy',
 'hitsHead_accuracy',
 'hitsBody_accuracy',
 'hitsLegs_accuracy',
 'accentedHitsPositionDistance_accuracy',
 'accentedHitsPositionClinch_accuracy',
 'accentedHitsPositionParter_accuracy']

In [15]:
### Calculate winpersent cols
for_percent_cols = ['DEC', 'DQ', 'KO', 'SUB']
new_percent_cols = []
for col in for_percent_cols:
    new_percent_col = f'{col}_percent' 
    new_percent_cols.append(new_percent_col)
    f_stats_events_cumulative[new_percent_col] = f_stats_events_cumulative[f'cumsum_winMethods_[{col}]']/f_stats_events_cumulative['cumsum_winner']

new_percent_cols

['DEC_percent', 'DQ_percent', 'KO_percent', 'SUB_percent']

In [16]:
### Calculate Per minute cols
for_PM_cols = ['hits', 'takedown', 'accentedHits', 'hitsHead', 'hitsBody', 'hitsLegs', 
               'accentedHitsPositionDistance', 'accentedHitsPositionClinch', 'accentedHitsPositionParter']

new_PM_cols = []
for col in for_PM_cols:
    new_PM_col = f'{col}_PM' 
    new_PM_cols.append(new_PM_col)
    f_stats_events_cumulative[new_PM_col]  = f_stats_events_cumulative[f'cumsum_{col}Successful']/f_stats_events_cumulative['cumsum_duration']*60

    
# knockdowns per minute
new_PM_col = 'knockdowns_PM' 
new_PM_cols.append(new_PM_col)
f_stats_events_cumulative[new_PM_col]  = f_stats_events_cumulative['cumsum_knockdowns']/f_stats_events_cumulative['cumsum_duration']*60


# protectionPassage per minute
new_PM_col = 'protectionPassage_PM' 
new_PM_cols.append(new_PM_col)
f_stats_events_cumulative[new_PM_col]  = f_stats_events_cumulative['cumsum_protectionPassage']/f_stats_events_cumulative['cumsum_duration']*60

new_PM_cols

['hits_PM',
 'takedown_PM',
 'accentedHits_PM',
 'hitsHead_PM',
 'hitsBody_PM',
 'hitsLegs_PM',
 'accentedHitsPositionDistance_PM',
 'accentedHitsPositionClinch_PM',
 'accentedHitsPositionParter_PM',
 'knockdowns_PM',
 'protectionPassage_PM']

In [17]:
current_col_names, new_accuracy_cols, new_percent_cols, new_PM_cols

(['current_duration',
  'winner',
  'current_hitsTotal',
  'current_hitsSuccessful',
  'current_takedownTotal',
  'current_takedownSuccessful',
  'current_submissionAttempts',
  'current_takeovers',
  'current_accentedHitsTotal',
  'current_accentedHitsSuccessful',
  'current_knockdowns',
  'current_protectionPassage',
  'current_hitsHeadTotal',
  'current_hitsHeadSuccessful',
  'current_hitsBodyTotal',
  'current_hitsBodySuccessful',
  'current_hitsLegsTotal',
  'current_hitsLegsSuccessful',
  'current_accentedHitsPositionDistanceTotal',
  'current_accentedHitsPositionDistanceSuccessful',
  'current_accentedHitsPositionClinchTotal',
  'current_accentedHitsPositionClinchSuccessful',
  'current_accentedHitsPositionParterTotal',
  'current_accentedHitsPositionParterSuccessful',
  'current_winMethods_[DEC]',
  'current_winMethods_[DQ]',
  'current_winMethods_[KO]',
  'current_winMethods_[SUB]'],
 ['hits_accuracy',
  'takedown_accuracy',
  'accentedHits_accuracy',
  'hitsHead_accuracy',
  

In [18]:
# Win and Loose streak
def calculate_win_streak_prod(df):
    streaks = []
    for fighter in tqdm(df[:].groupby(['fighterId'])): #, 'eventDate.date'
        
        curr_streak = 0
        fighterId = fighter[0]
        for winnerId in fighter[1][['winnerId']].values:
            
            if fighterId == winnerId:
                curr_streak += 1
            else:
                curr_streak = 0

            streaks.append(curr_streak)

    return streaks

def calculate_loose_streak_prod(df):
    streaks = []
    for fighter in tqdm(df[:].groupby(['fighterId'])): #, 'fighter_nbr'

        curr_streak = 0
        fighterId = fighter[0]
        for winnerId in fighter[1][['winnerId']].values:
            if fighterId == winnerId:
                curr_streak = 0
            else:
                curr_streak += 1
            streaks.append(curr_streak)

    return streaks

win_streaks = calculate_win_streak_prod(f_stats_events_cumulative)
f_stats_events_cumulative['win_streak'] = win_streaks
f_stats_events_cumulative['win_streak'] = f_stats_events_cumulative['win_streak'] #- f_stats_events_cumulative['winner']

loose_streaks = calculate_loose_streak_prod(f_stats_events_cumulative)
f_stats_events_cumulative['loose_streak'] = loose_streaks
f_stats_events_cumulative['loose_streak'] = f_stats_events_cumulative['loose_streak'] #- ~f_stats_events_cumulative['winner']

new_streak_cols = ['win_streak', 'loose_streak']

  0%|          | 0/2653 [00:00<?, ?it/s]

  0%|          | 0/2653 [00:00<?, ?it/s]

In [19]:
q = f_stats_events_cumulative[(f_stats_events_cumulative['fighterId'] == 347)]

q[['winner', 'fighterId', 'winnerId', 'win_streak', 'loose_streak', 'eventDate.date', 'fighter_nbr']][:50].sort_values('eventDate.date')

Unnamed: 0,winner,fighterId,winnerId,win_streak,loose_streak,eventDate.date,fighter_nbr
1645,True,347,347.0,1,0,2008-11-05 00:00:00,1
2376,True,347,347.0,2,0,2010-12-16 00:00:00,1
2418,True,347,347.0,3,0,2011-02-05 00:00:00,1
2533,True,347,347.0,4,0,2011-06-11 00:00:00,1
2592,True,347,347.0,5,0,2011-08-14 00:00:00,2
2676,True,347,347.0,6,0,2011-10-29 00:00:00,1
2751,False,347,129.0,0,1,2011-12-30 00:00:00,2
2878,True,347,347.0,1,0,2012-05-15 00:00:00,1
2989,True,347,347.0,2,0,2012-08-11 00:00:00,1
3131,False,347,292.0,0,1,2013-01-26 00:00:00,2


In [20]:
static_supl_cols = ['winner', 'city', 'country', 'duration',
            'fighterId_1', 'fighterId_2', 'name', 'rounds', 'eventDate.timezone', 'weightCategory.name',
            'winnerId', 'f1_odds', 'f2_odds',  'f1_age', 'f2_age', 'f1_count_of_fights', 'f2_count_of_fights']

f1_cumsum_col_names = ['f1_' + i for i in new_cumsum_colnames]
f2_cumsum_col_names = ['f2_' + i for i in new_cumsum_colnames]


all_new_cols={
    'static_supl_cols': static_supl_cols,
    'f2_cumsum_col_names':f2_cumsum_col_names,
    'f1_cumsum_col_names':f1_cumsum_col_names,
    'new_cumsum_colnames':new_cumsum_colnames,
    'new_accuracy_cols':new_accuracy_cols,
    'new_percent_cols':new_percent_cols,
    'new_PM_cols':new_PM_cols, 
    'new_streak_cols':new_streak_cols,
     } 
            
all_new_cols

{'static_supl_cols': ['winner',
  'city',
  'country',
  'duration',
  'fighterId_1',
  'fighterId_2',
  'name',
  'rounds',
  'eventDate.timezone',
  'weightCategory.name',
  'winnerId',
  'f1_odds',
  'f2_odds',
  'f1_age',
  'f2_age',
  'f1_count_of_fights',
  'f2_count_of_fights'],
 'f2_cumsum_col_names': ['f2_cumsum_duration',
  'f2_cumsum_winner',
  'f2_cumsum_hitsTotal',
  'f2_cumsum_hitsSuccessful',
  'f2_cumsum_takedownTotal',
  'f2_cumsum_takedownSuccessful',
  'f2_cumsum_submissionAttempts',
  'f2_cumsum_takeovers',
  'f2_cumsum_accentedHitsTotal',
  'f2_cumsum_accentedHitsSuccessful',
  'f2_cumsum_knockdowns',
  'f2_cumsum_protectionPassage',
  'f2_cumsum_hitsHeadTotal',
  'f2_cumsum_hitsHeadSuccessful',
  'f2_cumsum_hitsBodyTotal',
  'f2_cumsum_hitsBodySuccessful',
  'f2_cumsum_hitsLegsTotal',
  'f2_cumsum_hitsLegsSuccessful',
  'f2_cumsum_accentedHitsPositionDistanceTotal',
  'f2_cumsum_accentedHitsPositionDistanceSuccessful',
  'f2_cumsum_accentedHitsPositionClinchTotal'

In [21]:
# # Save data for future inference
# import json
# with open('../../data/Catboost_v1_0/all_new_cols_06.04.2021.txt', 'w') as outfile:
#     json.dump(all_new_cols, outfile)
    
f_stats_events_cumulative['fighterName'] = f_stats_events_cumulative['fighterId'].replace(f_name_dict)
f_stats_events_cumulative.to_csv('../../data/Catboost_v1_0/PROD_f_stats_events_cumulative_06.04.2021.csv')

In [22]:
all_new_cols.keys()

dict_keys(['static_supl_cols', 'f2_cumsum_col_names', 'f1_cumsum_col_names', 'new_cumsum_colnames', 'new_accuracy_cols', 'new_percent_cols', 'new_PM_cols', 'new_streak_cols'])

In [23]:
all_fightCols_list = all_new_cols['new_cumsum_colnames']+all_new_cols['new_accuracy_cols']+\
    all_new_cols['new_percent_cols']+all_new_cols['new_PM_cols']+all_new_cols['new_streak_cols'] + \
    ['count_of_fights'] 
all_fightCols_list

['cumsum_duration',
 'cumsum_winner',
 'cumsum_hitsTotal',
 'cumsum_hitsSuccessful',
 'cumsum_takedownTotal',
 'cumsum_takedownSuccessful',
 'cumsum_submissionAttempts',
 'cumsum_takeovers',
 'cumsum_accentedHitsTotal',
 'cumsum_accentedHitsSuccessful',
 'cumsum_knockdowns',
 'cumsum_protectionPassage',
 'cumsum_hitsHeadTotal',
 'cumsum_hitsHeadSuccessful',
 'cumsum_hitsBodyTotal',
 'cumsum_hitsBodySuccessful',
 'cumsum_hitsLegsTotal',
 'cumsum_hitsLegsSuccessful',
 'cumsum_accentedHitsPositionDistanceTotal',
 'cumsum_accentedHitsPositionDistanceSuccessful',
 'cumsum_accentedHitsPositionClinchTotal',
 'cumsum_accentedHitsPositionClinchSuccessful',
 'cumsum_accentedHitsPositionParterTotal',
 'cumsum_accentedHitsPositionParterSuccessful',
 'cumsum_winMethods_[DEC]',
 'cumsum_winMethods_[DQ]',
 'cumsum_winMethods_[KO]',
 'cumsum_winMethods_[SUB]',
 'hits_accuracy',
 'takedown_accuracy',
 'accentedHits_accuracy',
 'hitsHead_accuracy',
 'hitsBody_accuracy',
 'hitsLegs_accuracy',
 'accentedH