# Task 1: Klasifikasi

## Import Modules

In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTENC

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import ComplementNB
from sklearn.svm import SVC
from catboost import CatBoostClassifier

In [None]:
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 200)

## Utilities

In [None]:
from sklearn.metrics import precision_score, \
    recall_score, classification_report, \
    accuracy_score, f1_score


def evaluate_classifier_performance(prediction, y_test):
    # Informasi evaluasi secara compact
    print("Hasil Evaluasi berdasarkan classification report \n\n%s\n" % (classification_report(y_test, prediction,zero_division=0)))
    print()
    print("Confusion Matrix")
    print()
    y_actual = pd.Series(np.array(y_test), name = "actual")
    y_pred = pd.Series(np.array(prediction), name = "prediction")
    df_confusion = pd.crosstab(y_actual, y_pred)
    display(df_confusion)
    print()
    print()

    print("Butuh informasi lebih lengkap? silakan simak di bawah ini : ")
    print('Accuracy Average:', accuracy_score(y_test, prediction))
    print('F1 Macro Average:', f1_score(y_test, prediction, average='macro'))
    print('F1 Micro Average:', f1_score(y_test, prediction, average='micro'))
    print('Precision Macro Average:', precision_score(y_test, prediction, average='macro',zero_division=0))
    print('Precision Micro Average:', precision_score(y_test, prediction, average='micro',zero_division=0))
    print('Recall Macro Average:', recall_score(y_test, prediction, average='macro',zero_division=0))
    print('Recall Micro Average:', recall_score(y_test, prediction, average='micro',zero_division=0))
    print()

## Load data

In [None]:
ufc = pd.read_csv("./UFC_train.csv")
ufc

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,title_bout,weight_class,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,Winner
0,Joe Riggs,Joe Doerksen,Steve Mazzagatti,2004-08-21,"Las Vegas, Nevada, USA",False,Middleweight,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,182.88,190.50,185.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Southpaw,182.88,177.80,185.0,26.0,21.0,Red
1,Jorge Masvidal,Al Iaquinta,Keith Peterson,2015-04-04,"Fairfax, Virginia, USA",False,Lightweight,1.156250,0.000000,0.394141,0.352422,0.239219,0.011484,0.156250,0.132812,0.000000,0.000000,137.460938,53.234375,89.953125,31.296875,141.210938,56.718750,91.882812,33.078125,2.000000,0.625000,1.835938,0.085938,117.312500,39.976562,83.210938,27.132812,13.625000,7.765625,3.960938,2.070312,6.523438,5.492188,2.781250,2.093750,127.031250,46.812500,88.695312,30.882812,5.671875,3.679688,0.710938,0.054688,4.757812,2.742188,0.546875,0.359375,35.945312,14.601562,522.835938,19,1,0,1,3,6,2,0,0,0,3,3,0,0,Orthodox,177.80,177.80,155.0,0.500000,0.343750,0.498750,0.339062,0.554688,0.207500,0.562500,0.312500,0.500000,0.031250,124.500000,60.562500,117.812500,41.843750,156.031250,87.593750,129.593750,53.093750,3.500000,1.875000,5.812500,0.937500,93.468750,37.781250,92.250000,27.656250,21.656250,17.468750,17.593750,8.312500,9.375000,5.312500,7.968750,5.875000,98.250000,41.562500,110.250000,38.406250,14.281250,11.750000,4.562500,2.343750,11.968750,7.250000,3.000000,1.093750,252.312500,115.000000,890.593750,17,0,2,0,3,5,1,0,0,0,4,0,1,0,Orthodox,180.34,187.96,170.0,27.0,30.0,Blue
2,Dan Stittgen,Stephen Thompson,Josh Rosenthal,2012-02-04,"Las Vegas, Nevada, USA",False,Welterweight,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,182.88,190.50,170.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,185.42,,170.0,28.0,31.0,Blue
3,Josh Koscheck,Johny Hendricks,Kevin Mulhall,2012-05-05,"East Rutherford, New Jersey, USA",False,Welterweight,0.695312,0.000000,0.783359,0.185547,0.088281,0.104375,0.093750,0.062500,0.000000,0.000000,39.218750,19.015625,37.226562,14.531250,51.500000,30.742188,52.046875,28.671875,2.578125,0.804688,1.625000,0.445312,30.367188,11.804688,28.773438,8.367188,5.429688,4.546875,6.656250,4.453125,3.421875,2.664062,1.796875,1.710938,25.703125,8.875000,24.234375,6.359375,11.867188,8.617188,12.179688,7.359375,1.648438,1.523438,0.812500,0.812500,68.281250,67.156250,331.070312,17,0,4,0,4,7,1,0,1,1,1,4,0,0,Southpaw,175.26,175.26,185.0,0.020050,0.011723,0.373518,0.325567,0.419368,0.155259,0.093866,0.007238,0.000015,0.000137,81.387877,27.139767,109.998363,39.224997,96.130762,41.379938,125.441042,53.259472,2.766176,1.440809,3.660690,1.000504,72.988626,19.695154,92.600458,27.688076,8.181122,7.289246,8.554874,4.713242,0.218128,0.155367,8.843031,6.823679,67.441225,14.901989,102.755116,33.409025,8.778824,7.393555,6.983452,5.589394,5.167828,4.844223,0.259796,0.226578,145.693754,116.292816,793.567753,44,1,2,0,5,15,5,0,0,1,5,6,3,0,Orthodox,177.80,185.42,170.0,28.0,34.0,Blue
4,John Dodson,Manvel Gamburyan,James Warring,2016-04-16,"Tampa, Florida, USA",False,Bantamweight,0.500000,0.266602,0.381462,0.456558,0.429614,0.469570,1.377930,0.000244,0.125000,0.500000,74.522461,29.986572,93.299805,42.413086,102.170410,51.280762,125.859863,71.359619,9.037354,3.588867,3.682373,1.380859,58.093994,18.651855,76.181396,26.925781,8.149902,5.452637,10.418213,9.236328,8.278564,5.882080,6.700195,6.250977,56.636475,20.501221,69.770264,26.547607,9.979004,6.316650,17.725586,12.022461,7.906982,3.168701,5.803955,3.843018,209.931152,181.141357,822.403564,31,1,0,1,2,6,7,0,0,0,3,0,3,0,Orthodox,165.10,170.18,135.0,0.320312,0.000000,0.416328,0.420000,0.466719,0.160312,0.000000,0.000000,0.000000,0.000000,105.070312,44.851562,126.031250,59.617188,134.648438,72.429688,168.796875,99.320312,2.625000,1.171875,11.648438,2.156250,68.460938,15.812500,89.789062,35.625000,25.195312,19.101562,24.078125,15.140625,11.414062,9.937500,12.164062,8.851562,88.000000,33.382812,102.640625,43.398438,15.484375,10.640625,23.234375,16.125000,1.585938,0.828125,0.156250,0.093750,110.148438,162.796875,1129.593750,22,3,3,0,3,6,2,0,0,0,2,3,0,1,Orthodox,160.02,167.64,135.0,34.0,31.0,Red
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5405,Matt Frevola,Luis Pena,Herb Dean,2019-10-12,"Tampa, Florida, USA",False,Lightweight,0.125000,0.000000,0.481250,0.545000,0.721250,0.435000,0.625000,0.375000,1.250000,0.125000,115.500000,60.375000,55.875000,27.750000,126.750000,70.750000,65.375000,37.000000,2.500000,1.000000,4.125000,1.875000,108.750000,55.250000,33.875000,8.250000,6.500000,4.875000,7.125000,5.125000,0.250000,0.250000,14.875000,14.375000,55.750000,21.500000,48.000000,23.750000,12.625000,7.750000,4.625000,2.875000,47.125000,31.125000,3.250000,1.125000,346.875000,113.500000,701.000000,10,0,1,0,2,3,1,0,0,0,1,1,1,0,Southpaw,190.50,190.50,155.0,0.000000,1.000000,0.367500,0.365000,0.232500,0.050000,1.250000,0.750000,1.250000,0.500000,90.000000,34.250000,108.750000,38.750000,99.250000,43.250000,116.250000,45.750000,8.000000,2.500000,1.250000,0.250000,65.000000,21.000000,91.000000,27.250000,16.500000,6.000000,13.750000,8.250000,8.500000,7.250000,4.000000,3.250000,82.000000,28.750000,101.000000,35.250000,4.000000,2.000000,4.750000,1.250000,4.000000,3.500000,3.000000,2.250000,164.000000,38.000000,690.000000,7,0,0,2,1,1,2,0,0,0,1,0,0,0,Orthodox,175.26,180.34,155.0,26.0,29.0,Red
5406,Demian Maia,Rick Story,Mario Yamasaki,2012-10-13,"Rio de Janeiro, Brazil",False,Welterweight,0.015625,0.000000,0.384980,0.313633,0.613965,0.201641,0.554688,0.332031,0.009766,0.126953,128.417969,50.142578,80.093750,23.673828,149.867188,70.507812,95.468750,38.597656,5.433594,3.404297,4.937500,1.652344,91.095703,20.906250,70.052734,18.005859,29.732422,21.935547,8.562500,4.578125,7.589844,7.300781,1.478516,1.089844,100.585938,29.765625,71.501953,19.650391,13.421875,8.462891,6.703125,2.392578,14.410156,11.914062,1.888672,1.630859,232.138672,187.236328,890.759766,28,0,0,1,6,7,3,0,0,2,3,1,1,0,Southpaw,175.26,180.34,170.0,0.000000,0.009766,0.674354,0.679751,0.235942,0.191372,0.174561,0.063477,0.000488,0.031616,59.981201,20.242920,61.599731,24.849487,78.031616,35.113525,70.011597,31.948486,5.714111,1.252319,2.063110,1.187622,51.024902,13.841919,52.012451,17.431274,7.016113,5.072388,5.266968,3.630249,1.940186,1.328613,4.320312,3.787964,47.510498,12.865234,50.864624,16.935059,8.517944,4.500732,6.739258,4.336914,3.952759,2.876953,3.995850,3.577515,122.895996,50.394043,475.191772,33,1,5,0,5,10,4,0,0,0,4,1,5,0,Southpaw,185.42,182.88,170.0,28.0,34.0,Red
5407,Eduardo Garagorri,Humberto Bandenay,Osiris Maia,2019-08-10,"Montevideo, Uruguay",False,Featherweight,0.250000,0.250000,0.602500,0.515000,0.330000,0.330000,0.250000,0.000000,0.000000,0.000000,44.750000,16.750000,63.500000,39.500000,45.750000,17.250000,75.000000,50.000000,1.500000,1.000000,3.250000,0.750000,31.000000,7.000000,41.000000,23.250000,7.750000,5.250000,21.000000,15.500000,6.000000,4.500000,1.500000,0.750000,39.750000,13.250000,38.250000,20.750000,4.500000,3.000000,11.000000,8.000000,0.500000,0.500000,14.250000,10.750000,8.000000,185.000000,466.250000,5,0,1,0,1,1,2,0,0,0,0,1,0,0,Southpaw,180.34,180.34,145.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,175.26,177.80,145.0,24.0,30.0,Red
5408,Danny Castillo,Edson Barboza,Mike Beltran,2013-12-14,"Sacramento, California, USA",False,Lightweight,0.875000,0.125000,0.543125,0.426250,0.031250,0.161250,0.000000,0.015625,0.000000,0.000000,75.312500,38.421875,49.140625,19.812500,76.359375,39.218750,52.968750,22.468750,0.093750,0.062500,3.406250,0.390625,39.171875,10.812500,40.625000,13.953125,15.312500,10.765625,6.703125,4.546875,20.828125,16.843750,1.812500,1.312500,72.437500,36.453125,46.625000,18.437500,0.578125,0.468750,0.265625,0.187500,2.296875,1.500000,2.250000,1.187500,14.671875,12.687500,363.968750,16,0,4,0,4,6,1,0,0,1,1,4,0,0,Orthodox,180.34,190.50,145.0,0.125000,0.125000,0.412969,0.352891,0.592031,0.017266,0.125000,1.359375,0.007812,0.531250,72.914062,29.351562,65.601562,25.500000,104.179688,57.304688,118.875000,75.828125,6.539062,4.343750,0.523438,0.101562,63.234375,21.671875,50.406250,13.085938,5.515625,3.703125,11.148438,8.625000,4.164062,3.976562,4.046875,3.789062,51.359375,15.367188,61.367188,21.460938,2.250000,1.992188,2.671875,2.476562,19.304688,11.992188,1.562500,1.562500,416.273438,30.046875,823.750000,21,0,1,0,3,6,2,0,0,1,4,1,0,0,Orthodox,175.26,180.34,155.0,27.0,34.0,Blue


In [None]:
ufc.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5410 entries, 0 to 5409
Data columns (total 144 columns):
 #    Column                        Dtype  
---   ------                        -----  
 0    R_fighter                     object 
 1    B_fighter                     object 
 2    Referee                       object 
 3    date                          object 
 4    location                      object 
 5    title_bout                    bool   
 6    weight_class                  object 
 7    B_avg_KD                      float64
 8    B_avg_opp_KD                  float64
 9    B_avg_SIG_STR_pct             float64
 10   B_avg_opp_SIG_STR_pct         float64
 11   B_avg_TD_pct                  float64
 12   B_avg_opp_TD_pct              float64
 13   B_avg_SUB_ATT                 float64
 14   B_avg_opp_SUB_ATT             float64
 15   B_avg_REV                     float64
 16   B_avg_opp_REV                 float64
 17   B_avg_SIG_STR_att             float64
 18   B_avg_

In [None]:
X_train, y_train = ufc.drop(axis=1, columns=["Winner"]), ufc[["Winner"]]

In [None]:
X_train.head()

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,title_bout,weight_class,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Stance,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,Joe Riggs,Joe Doerksen,Steve Mazzagatti,2004-08-21,"Las Vegas, Nevada, USA",False,Middleweight,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,182.88,190.5,185.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Southpaw,182.88,177.8,185.0,26.0,21.0
1,Jorge Masvidal,Al Iaquinta,Keith Peterson,2015-04-04,"Fairfax, Virginia, USA",False,Lightweight,1.15625,0.0,0.394141,0.352422,0.239219,0.011484,0.15625,0.132812,0.0,0.0,137.460938,53.234375,89.953125,31.296875,141.210938,56.71875,91.882812,33.078125,2.0,0.625,1.835938,0.085938,117.3125,39.976562,83.210938,27.132812,13.625,7.765625,3.960938,2.070312,6.523438,5.492188,2.78125,2.09375,127.03125,46.8125,88.695312,30.882812,5.671875,3.679688,0.710938,0.054688,4.757812,2.742188,0.546875,0.359375,35.945312,14.601562,522.835938,19,1,0,1,3,6,2,0,0,0,3,3,0,0,Orthodox,177.8,177.8,155.0,0.5,0.34375,0.49875,0.339062,0.554688,0.2075,0.5625,0.3125,0.5,0.03125,124.5,60.5625,117.8125,41.84375,156.03125,87.59375,129.59375,53.09375,3.5,1.875,5.8125,0.9375,93.46875,37.78125,92.25,27.65625,21.65625,17.46875,17.59375,8.3125,9.375,5.3125,7.96875,5.875,98.25,41.5625,110.25,38.40625,14.28125,11.75,4.5625,2.34375,11.96875,7.25,3.0,1.09375,252.3125,115.0,890.59375,17,0,2,0,3,5,1,0,0,0,4,0,1,0,Orthodox,180.34,187.96,170.0,27.0,30.0
2,Dan Stittgen,Stephen Thompson,Josh Rosenthal,2012-02-04,"Las Vegas, Nevada, USA",False,Welterweight,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,182.88,190.5,170.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Orthodox,185.42,,170.0,28.0,31.0
3,Josh Koscheck,Johny Hendricks,Kevin Mulhall,2012-05-05,"East Rutherford, New Jersey, USA",False,Welterweight,0.695312,0.0,0.783359,0.185547,0.088281,0.104375,0.09375,0.0625,0.0,0.0,39.21875,19.015625,37.226562,14.53125,51.5,30.742188,52.046875,28.671875,2.578125,0.804688,1.625,0.445312,30.367188,11.804688,28.773438,8.367188,5.429688,4.546875,6.65625,4.453125,3.421875,2.664062,1.796875,1.710938,25.703125,8.875,24.234375,6.359375,11.867188,8.617188,12.179688,7.359375,1.648438,1.523438,0.8125,0.8125,68.28125,67.15625,331.070312,17,0,4,0,4,7,1,0,1,1,1,4,0,0,Southpaw,175.26,175.26,185.0,0.02005,0.011723,0.373518,0.325567,0.419368,0.155259,0.093866,0.007238,1.5e-05,0.000137,81.387877,27.139767,109.998363,39.224997,96.130762,41.379938,125.441042,53.259472,2.766176,1.440809,3.66069,1.000504,72.988626,19.695154,92.600458,27.688076,8.181122,7.289246,8.554874,4.713242,0.218128,0.155367,8.843031,6.823679,67.441225,14.901989,102.755116,33.409025,8.778824,7.393555,6.983452,5.589394,5.167828,4.844223,0.259796,0.226578,145.693754,116.292816,793.567753,44,1,2,0,5,15,5,0,0,1,5,6,3,0,Orthodox,177.8,185.42,170.0,28.0,34.0
4,John Dodson,Manvel Gamburyan,James Warring,2016-04-16,"Tampa, Florida, USA",False,Bantamweight,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,1.37793,0.000244,0.125,0.5,74.522461,29.986572,93.299805,42.413086,102.17041,51.280762,125.859863,71.359619,9.037354,3.588867,3.682373,1.380859,58.093994,18.651855,76.181396,26.925781,8.149902,5.452637,10.418213,9.236328,8.278564,5.88208,6.700195,6.250977,56.636475,20.501221,69.770264,26.547607,9.979004,6.31665,17.725586,12.022461,7.906982,3.168701,5.803955,3.843018,209.931152,181.141357,822.403564,31,1,0,1,2,6,7,0,0,0,3,0,3,0,Orthodox,165.1,170.18,135.0,0.320312,0.0,0.416328,0.42,0.466719,0.160312,0.0,0.0,0.0,0.0,105.070312,44.851562,126.03125,59.617188,134.648438,72.429688,168.796875,99.320312,2.625,1.171875,11.648438,2.15625,68.460938,15.8125,89.789062,35.625,25.195312,19.101562,24.078125,15.140625,11.414062,9.9375,12.164062,8.851562,88.0,33.382812,102.640625,43.398438,15.484375,10.640625,23.234375,16.125,1.585938,0.828125,0.15625,0.09375,110.148438,162.796875,1129.59375,22,3,3,0,3,6,2,0,0,0,2,3,0,1,Orthodox,160.02,167.64,135.0,34.0,31.0


In [None]:
y_train.head()

Unnamed: 0,Winner
0,Red
1,Blue
2,Blue
3,Blue
4,Red


## Preprocessing

### Handling numerical values

In [None]:
ufc.info(verbose=True, max_cols=200)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5410 entries, 0 to 5409
Data columns (total 144 columns):
 #    Column                        Non-Null Count  Dtype  
---   ------                        --------------  -----  
 0    R_fighter                     5410 non-null   object 
 1    B_fighter                     5410 non-null   object 
 2    Referee                       5379 non-null   object 
 3    date                          5410 non-null   object 
 4    location                      5410 non-null   object 
 5    title_bout                    5410 non-null   bool   
 6    weight_class                  5410 non-null   object 
 7    B_avg_KD                      4117 non-null   float64
 8    B_avg_opp_KD                  4117 non-null   float64
 9    B_avg_SIG_STR_pct             4117 non-null   float64
 10   B_avg_opp_SIG_STR_pct         4117 non-null   float64
 11   B_avg_TD_pct                  4117 non-null   float64
 12   B_avg_opp_TD_pct              4117 non-null   

In [None]:
y_train.value_counts()

Winner
Red       3581
Blue      1730
Draw        99
Name: count, dtype: int64

In [None]:
X_train.isnull().sum()

R_fighter                          0
B_fighter                          0
Referee                           31
date                               0
location                           0
title_bout                         0
weight_class                       0
B_avg_KD                        1293
B_avg_opp_KD                    1293
B_avg_SIG_STR_pct               1293
B_avg_opp_SIG_STR_pct           1293
B_avg_TD_pct                    1293
B_avg_opp_TD_pct                1293
B_avg_SUB_ATT                   1293
B_avg_opp_SUB_ATT               1293
B_avg_REV                       1293
B_avg_opp_REV                   1293
B_avg_SIG_STR_att               1293
B_avg_SIG_STR_landed            1293
B_avg_opp_SIG_STR_att           1293
B_avg_opp_SIG_STR_landed        1293
B_avg_TOTAL_STR_att             1293
B_avg_TOTAL_STR_landed          1293
B_avg_opp_TOTAL_STR_att         1293
B_avg_opp_TOTAL_STR_landed      1293
B_avg_TD_att                    1293
B_avg_TD_landed                 1293
B

In [None]:
ufc_num = ufc.select_dtypes(exclude=["object", "bool"])
ufc_num.head()

Unnamed: 0,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Height_cms,B_Reach_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,182.88,190.5,185.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,182.88,177.8,185.0,26.0,21.0
1,1.15625,0.0,0.394141,0.352422,0.239219,0.011484,0.15625,0.132812,0.0,0.0,137.460938,53.234375,89.953125,31.296875,141.210938,56.71875,91.882812,33.078125,2.0,0.625,1.835938,0.085938,117.3125,39.976562,83.210938,27.132812,13.625,7.765625,3.960938,2.070312,6.523438,5.492188,2.78125,2.09375,127.03125,46.8125,88.695312,30.882812,5.671875,3.679688,0.710938,0.054688,4.757812,2.742188,0.546875,0.359375,35.945312,14.601562,522.835938,19,1,0,1,3,6,2,0,0,0,3,3,0,0,177.8,177.8,155.0,0.5,0.34375,0.49875,0.339062,0.554688,0.2075,0.5625,0.3125,0.5,0.03125,124.5,60.5625,117.8125,41.84375,156.03125,87.59375,129.59375,53.09375,3.5,1.875,5.8125,0.9375,93.46875,37.78125,92.25,27.65625,21.65625,17.46875,17.59375,8.3125,9.375,5.3125,7.96875,5.875,98.25,41.5625,110.25,38.40625,14.28125,11.75,4.5625,2.34375,11.96875,7.25,3.0,1.09375,252.3125,115.0,890.59375,17,0,2,0,3,5,1,0,0,0,4,0,1,0,180.34,187.96,170.0,27.0,30.0
2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,182.88,190.5,170.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,185.42,,170.0,28.0,31.0
3,0.695312,0.0,0.783359,0.185547,0.088281,0.104375,0.09375,0.0625,0.0,0.0,39.21875,19.015625,37.226562,14.53125,51.5,30.742188,52.046875,28.671875,2.578125,0.804688,1.625,0.445312,30.367188,11.804688,28.773438,8.367188,5.429688,4.546875,6.65625,4.453125,3.421875,2.664062,1.796875,1.710938,25.703125,8.875,24.234375,6.359375,11.867188,8.617188,12.179688,7.359375,1.648438,1.523438,0.8125,0.8125,68.28125,67.15625,331.070312,17,0,4,0,4,7,1,0,1,1,1,4,0,0,175.26,175.26,185.0,0.02005,0.011723,0.373518,0.325567,0.419368,0.155259,0.093866,0.007238,1.5e-05,0.000137,81.387877,27.139767,109.998363,39.224997,96.130762,41.379938,125.441042,53.259472,2.766176,1.440809,3.66069,1.000504,72.988626,19.695154,92.600458,27.688076,8.181122,7.289246,8.554874,4.713242,0.218128,0.155367,8.843031,6.823679,67.441225,14.901989,102.755116,33.409025,8.778824,7.393555,6.983452,5.589394,5.167828,4.844223,0.259796,0.226578,145.693754,116.292816,793.567753,44,1,2,0,5,15,5,0,0,1,5,6,3,0,177.8,185.42,170.0,28.0,34.0
4,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,1.37793,0.000244,0.125,0.5,74.522461,29.986572,93.299805,42.413086,102.17041,51.280762,125.859863,71.359619,9.037354,3.588867,3.682373,1.380859,58.093994,18.651855,76.181396,26.925781,8.149902,5.452637,10.418213,9.236328,8.278564,5.88208,6.700195,6.250977,56.636475,20.501221,69.770264,26.547607,9.979004,6.31665,17.725586,12.022461,7.906982,3.168701,5.803955,3.843018,209.931152,181.141357,822.403564,31,1,0,1,2,6,7,0,0,0,3,0,3,0,165.1,170.18,135.0,0.320312,0.0,0.416328,0.42,0.466719,0.160312,0.0,0.0,0.0,0.0,105.070312,44.851562,126.03125,59.617188,134.648438,72.429688,168.796875,99.320312,2.625,1.171875,11.648438,2.15625,68.460938,15.8125,89.789062,35.625,25.195312,19.101562,24.078125,15.140625,11.414062,9.9375,12.164062,8.851562,88.0,33.382812,102.640625,43.398438,15.484375,10.640625,23.234375,16.125,1.585938,0.828125,0.15625,0.09375,110.148438,162.796875,1129.59375,22,3,3,0,3,6,2,0,0,0,2,3,0,1,160.02,167.64,135.0,34.0,31.0


In [None]:
ufc_num.isnull().sum()

B_avg_KD                        1293
B_avg_opp_KD                    1293
B_avg_SIG_STR_pct               1293
B_avg_opp_SIG_STR_pct           1293
B_avg_TD_pct                    1293
B_avg_opp_TD_pct                1293
B_avg_SUB_ATT                   1293
B_avg_opp_SUB_ATT               1293
B_avg_REV                       1293
B_avg_opp_REV                   1293
B_avg_SIG_STR_att               1293
B_avg_SIG_STR_landed            1293
B_avg_opp_SIG_STR_att           1293
B_avg_opp_SIG_STR_landed        1293
B_avg_TOTAL_STR_att             1293
B_avg_TOTAL_STR_landed          1293
B_avg_opp_TOTAL_STR_att         1293
B_avg_opp_TOTAL_STR_landed      1293
B_avg_TD_att                    1293
B_avg_TD_landed                 1293
B_avg_opp_TD_att                1293
B_avg_opp_TD_landed             1293
B_avg_HEAD_att                  1293
B_avg_HEAD_landed               1293
B_avg_opp_HEAD_att              1293
B_avg_opp_HEAD_landed           1293
B_avg_BODY_att                  1293
B

### Dropping columns not in test set

It seems there are no `B_Reach_cms` column in the test set we'll also remove it here.

In [None]:
ufc_num = ufc_num.drop(axis=1, columns=["B_Reach_cms"], errors="ignore")

### KNN Imputation

Since a majority of these columns have missing values which are pretty significant in percentage, it's probably not missing at random. Between median and KNN, KNN imputation gave the better results so we will use that.

In [None]:
num_imputer = KNNImputer()
ufc_num = pd.DataFrame(num_imputer.fit_transform(ufc_num), columns=ufc_num.columns)
ufc_num.isnull().sum()

B_avg_KD                        0
B_avg_opp_KD                    0
B_avg_SIG_STR_pct               0
B_avg_opp_SIG_STR_pct           0
B_avg_TD_pct                    0
B_avg_opp_TD_pct                0
B_avg_SUB_ATT                   0
B_avg_opp_SUB_ATT               0
B_avg_REV                       0
B_avg_opp_REV                   0
B_avg_SIG_STR_att               0
B_avg_SIG_STR_landed            0
B_avg_opp_SIG_STR_att           0
B_avg_opp_SIG_STR_landed        0
B_avg_TOTAL_STR_att             0
B_avg_TOTAL_STR_landed          0
B_avg_opp_TOTAL_STR_att         0
B_avg_opp_TOTAL_STR_landed      0
B_avg_TD_att                    0
B_avg_TD_landed                 0
B_avg_opp_TD_att                0
B_avg_opp_TD_landed             0
B_avg_HEAD_att                  0
B_avg_HEAD_landed               0
B_avg_opp_HEAD_att              0
B_avg_opp_HEAD_landed           0
B_avg_BODY_att                  0
B_avg_BODY_landed               0
B_avg_opp_BODY_att              0
B_avg_opp_BODY

### Handling categorical values

### Dropping not-so-useful features

In [None]:
ufc_cat = ufc.select_dtypes(include=["object", "bool"]).drop(axis=1, columns=["Winner"], errors="ignore")
ufc_cat.columns

Index(['R_fighter', 'B_fighter', 'Referee', 'date', 'location', 'title_bout',
       'weight_class', 'B_Stance', 'R_Stance'],
      dtype='object')

Ideally, because R_fighter and B_fighter are names, we should just remove those features

In [None]:
ufc_cat = ufc_cat.drop(axis=1, columns=["R_fighter", "B_fighter"], errors="ignore")
ufc_cat.columns

Index(['Referee', 'date', 'location', 'title_bout', 'weight_class', 'B_Stance',
       'R_Stance'],
      dtype='object')

Unique values for each columns

In [None]:
ufc_cat["location"].value_counts()

location
Las Vegas, Nevada, USA                         1416
Abu Dhabi, Abu Dhabi, United Arab Emirates      135
London, England, United Kingdom                 101
Newark, New Jersey, USA                          80
Montreal, Quebec, Canada                         77
Anaheim, California, USA                         76
Atlantic City, New Jersey, USA                   73
Chicago, Illinois, USA                           71
Los Angeles, California, USA                     69
Stockholm, Sweden                                67
Rio de Janeiro, Brazil                           67
Toronto, Ontario, Canada                         65
Boston, Massachusetts, USA                       64
Houston, Texas, USA                              62
Denver, Colorado, USA                            58
Sacramento, California, USA                      58
Dallas, Texas, USA                               57
Sao Paulo, Sao Paulo, Brazil                     56
Nashville, Tennessee, USA                        55
Mex

Since there are so many locations and we don't know the nationality of these fighters, most likely it won't help the models. Dates we can ignore for now too

In [None]:
ufc_cat = ufc_cat.drop(axis=1, columns=["location", "date"], errors="ignore")

In [None]:
ufc_cat.columns

Index(['Referee', 'title_bout', 'weight_class', 'B_Stance', 'R_Stance'], dtype='object')

In [None]:
ufc_cat["Referee"].value_counts()

Referee
Herb Dean               780
John McCarthy           585
Dan Miragliotta         358
Mario Yamasaki          352
Marc Goddard            311
                       ... 
Brandon Pfannenstiel      1
Mark Lawley               1
Marcio Laselva            1
Steven Davis              1
Robbie Scheureumann       1
Name: count, Length: 202, dtype: int64

Referee can be removed for the same reasons as described

In [None]:
ufc_cat = ufc_cat.drop(axis=1, columns=["Referee"], errors="ignore")

In [None]:
ufc_cat.columns

Index(['title_bout', 'weight_class', 'B_Stance', 'R_Stance'], dtype='object')

In [None]:
ufc_cat["title_bout"].value_counts()

title_bout
False    5080
True      330
Name: count, dtype: int64

In [None]:
ufc_cat["weight_class"].value_counts()

weight_class
Welterweight          980
Lightweight           967
Middleweight          734
Heavyweight           538
LightHeavyweight      511
Featherweight         490
Bantamweight          427
Flyweight             208
WomenStrawweight      176
WomenBantamweight     132
WomenFlyweight        104
OpenWeight             77
CatchWeight            50
WomenFeatherweight     16
Name: count, dtype: int64

In [None]:
ufc_cat["B_Stance"].value_counts()

B_Stance
Orthodox       4077
Southpaw       1048
Switch          209
Open Stance       8
Sideways          4
Name: count, dtype: int64

In [None]:
ufc_cat["R_Stance"].value_counts()

R_Stance
Orthodox       4077
Southpaw       1110
Switch          180
Open Stance      14
Sideways          2
Name: count, dtype: int64

These features might be useful. There's also a heavy imbalance in the features but this might be due to the nature of it since some fighting styles are more common than others, so we'll ignore that

Let's come back to this later

### Encoding the categorical variables using One-Hot encoding

Since we ended up using CatBoost, we didn't really need to encode it directly and let CatBoost do the job with its own version of one-hot encoding. But this is a sample of what we basically did.

In [None]:
cat_enc = OneHotEncoder()

ufc_cat = cat_enc.fit_transform(ufc_cat).toarray()

In [None]:
ufc_cat = pd.DataFrame(ufc_cat, columns=cat_enc.get_feature_names_out())
ufc_cat.head()

Unnamed: 0,title_bout_False,title_bout_True,weight_class_Bantamweight,weight_class_CatchWeight,weight_class_Featherweight,weight_class_Flyweight,weight_class_Heavyweight,weight_class_LightHeavyweight,weight_class_Lightweight,weight_class_Middleweight,weight_class_OpenWeight,weight_class_Welterweight,weight_class_WomenBantamweight,weight_class_WomenFeatherweight,weight_class_WomenFlyweight,weight_class_WomenStrawweight,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Sideways,B_Stance_Southpaw,B_Stance_Switch,B_Stance_nan,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Sideways,R_Stance_Southpaw,R_Stance_Switch,R_Stance_nan
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [None]:
ufc_cat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5410 entries, 0 to 5409
Data columns (total 28 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   title_bout_False                 5410 non-null   float64
 1   title_bout_True                  5410 non-null   float64
 2   weight_class_Bantamweight        5410 non-null   float64
 3   weight_class_CatchWeight         5410 non-null   float64
 4   weight_class_Featherweight       5410 non-null   float64
 5   weight_class_Flyweight           5410 non-null   float64
 6   weight_class_Heavyweight         5410 non-null   float64
 7   weight_class_LightHeavyweight    5410 non-null   float64
 8   weight_class_Lightweight         5410 non-null   float64
 9   weight_class_Middleweight        5410 non-null   float64
 10  weight_class_OpenWeight          5410 non-null   float64
 11  weight_class_Welterweight        5410 non-null   float64
 12  weight_class_WomenBa

In [None]:
ufc_cat[ufc_cat.select_dtypes(include=["bool"]).columns] = ufc_cat.select_dtypes(include=["bool"]).replace({True: "True", False: "False"})

### Mode Imputation

In [None]:
ufc_cat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5410 entries, 0 to 5409
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title_bout    5410 non-null   object
 1   weight_class  5410 non-null   object
 2   B_Stance      5346 non-null   object
 3   R_Stance      5383 non-null   object
dtypes: object(4)
memory usage: 169.2+ KB


This time, let's impute with mode.

In [None]:
cat_imputer = SimpleImputer(strategy="most_frequent")
ufc_cat = pd.DataFrame(cat_imputer.fit_transform(ufc_cat), columns=ufc_cat.columns)
ufc_cat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5410 entries, 0 to 5409
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title_bout    5410 non-null   object
 1   weight_class  5410 non-null   object
 2   B_Stance      5410 non-null   object
 3   R_Stance      5410 non-null   object
dtypes: object(4)
memory usage: 169.2+ KB


## Splitting between features and target

In [None]:
ufc_prepped = ufc_num.copy()
ufc_prepped[ufc_cat.columns] = ufc_cat

In [None]:
ufc_prepped

Unnamed: 0,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Height_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,title_bout,weight_class,B_Stance,R_Stance
0,0.400000,0.200000,0.387000,0.354000,0.194000,0.160000,0.000000,1.200000,0.000000,0.000000,71.300000,32.250000,40.200000,14.000000,101.950000,57.950000,59.750000,32.000000,2.650000,1.050000,1.650000,0.550000,58.500000,24.000000,31.950000,8.100000,7.850000,4.100000,3.650000,1.950000,4.950000,4.150000,4.600000,3.950000,45.500000,16.900000,30.000000,7.750000,5.200000,4.100000,2.550000,1.900000,20.600000,11.250000,7.650000,4.350000,156.150000,98.100000,538.300000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182.88,185.0,0.500000,0.200000,0.599000,0.413000,0.800000,0.232000,0.800000,0.600000,0.000000,0.200000,19.100000,10.600000,21.200000,12.300000,24.500000,15.400000,49.100000,38.400000,1.100000,1.100000,1.500000,0.900000,15.800000,7.800000,18.600000,10.500000,2.600000,2.400000,1.800000,1.200000,0.700000,0.400000,0.800000,0.600000,9.000000,4.000000,7.000000,1.600000,3.100000,2.300000,3.500000,2.900000,7.000000,4.300000,10.700000,7.800000,95.500000,176.500000,327.900000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182.88,177.80,185.0,26.0,21.0,False,Middleweight,Orthodox,Southpaw
1,1.156250,0.000000,0.394141,0.352422,0.239219,0.011484,0.156250,0.132812,0.000000,0.000000,137.460938,53.234375,89.953125,31.296875,141.210938,56.718750,91.882812,33.078125,2.000000,0.625000,1.835938,0.085938,117.312500,39.976562,83.210938,27.132812,13.625000,7.765625,3.960938,2.070312,6.523438,5.492188,2.781250,2.093750,127.031250,46.812500,88.695312,30.882812,5.671875,3.679688,0.710938,0.054688,4.757812,2.742188,0.546875,0.359375,35.945312,14.601562,522.835938,19.0,1.0,0.0,1.0,3.0,6.0,2.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,177.80,155.0,0.500000,0.343750,0.498750,0.339062,0.554688,0.207500,0.562500,0.312500,0.500000,0.031250,124.500000,60.562500,117.812500,41.843750,156.031250,87.593750,129.593750,53.093750,3.500000,1.875000,5.812500,0.937500,93.468750,37.781250,92.250000,27.656250,21.656250,17.468750,17.593750,8.312500,9.375000,5.312500,7.968750,5.875000,98.250000,41.562500,110.250000,38.406250,14.281250,11.750000,4.562500,2.343750,11.968750,7.250000,3.000000,1.093750,252.312500,115.000000,890.593750,17.0,0.0,2.0,0.0,3.0,5.0,1.0,0.0,0.0,0.0,4.0,0.0,1.0,0.0,180.34,187.96,170.0,27.0,30.0,False,Lightweight,Orthodox,Orthodox
2,0.000000,1.000000,0.492000,0.458000,0.516000,0.274000,0.200000,0.400000,0.000000,0.000000,81.000000,33.200000,71.000000,34.200000,110.200000,56.800000,86.200000,48.200000,4.000000,1.400000,2.000000,0.800000,72.800000,27.200000,56.000000,22.800000,2.600000,2.200000,8.800000,6.600000,5.600000,3.800000,6.200000,4.800000,65.400000,23.800000,54.000000,23.000000,6.000000,3.600000,5.000000,3.600000,9.600000,5.800000,12.000000,7.600000,147.600000,75.200000,475.800000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182.88,170.0,0.400000,0.000000,0.599000,0.311000,0.200000,0.206000,0.000000,0.000000,0.000000,0.000000,81.300000,33.900000,40.500000,13.700000,103.600000,55.400000,54.900000,27.500000,0.800000,0.800000,3.400000,1.100000,57.600000,18.000000,29.700000,5.800000,15.300000,10.500000,3.800000,2.200000,8.400000,5.400000,7.000000,5.700000,60.500000,17.200000,37.400000,11.700000,8.700000,6.900000,3.100000,2.000000,12.100000,9.800000,0.000000,0.000000,150.500000,108.000000,541.400000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,185.42,187.96,170.0,28.0,31.0,False,Welterweight,Orthodox,Orthodox
3,0.695312,0.000000,0.783359,0.185547,0.088281,0.104375,0.093750,0.062500,0.000000,0.000000,39.218750,19.015625,37.226562,14.531250,51.500000,30.742188,52.046875,28.671875,2.578125,0.804688,1.625000,0.445312,30.367188,11.804688,28.773438,8.367188,5.429688,4.546875,6.656250,4.453125,3.421875,2.664062,1.796875,1.710938,25.703125,8.875000,24.234375,6.359375,11.867188,8.617188,12.179688,7.359375,1.648438,1.523438,0.812500,0.812500,68.281250,67.156250,331.070312,17.0,0.0,4.0,0.0,4.0,7.0,1.0,0.0,1.0,1.0,1.0,4.0,0.0,0.0,175.26,185.0,0.020050,0.011723,0.373518,0.325567,0.419368,0.155259,0.093866,0.007238,0.000015,0.000137,81.387877,27.139767,109.998363,39.224997,96.130762,41.379938,125.441042,53.259472,2.766176,1.440809,3.660690,1.000504,72.988626,19.695154,92.600458,27.688076,8.181122,7.289246,8.554874,4.713242,0.218128,0.155367,8.843031,6.823679,67.441225,14.901989,102.755116,33.409025,8.778824,7.393555,6.983452,5.589394,5.167828,4.844223,0.259796,0.226578,145.693754,116.292816,793.567753,44.0,1.0,2.0,0.0,5.0,15.0,5.0,0.0,0.0,1.0,5.0,6.0,3.0,0.0,177.80,185.42,170.0,28.0,34.0,False,Welterweight,Southpaw,Orthodox
4,0.500000,0.266602,0.381462,0.456558,0.429614,0.469570,1.377930,0.000244,0.125000,0.500000,74.522461,29.986572,93.299805,42.413086,102.170410,51.280762,125.859863,71.359619,9.037354,3.588867,3.682373,1.380859,58.093994,18.651855,76.181396,26.925781,8.149902,5.452637,10.418213,9.236328,8.278564,5.882080,6.700195,6.250977,56.636475,20.501221,69.770264,26.547607,9.979004,6.316650,17.725586,12.022461,7.906982,3.168701,5.803955,3.843018,209.931152,181.141357,822.403564,31.0,1.0,0.0,1.0,2.0,6.0,7.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,165.10,135.0,0.320312,0.000000,0.416328,0.420000,0.466719,0.160312,0.000000,0.000000,0.000000,0.000000,105.070312,44.851562,126.031250,59.617188,134.648438,72.429688,168.796875,99.320312,2.625000,1.171875,11.648438,2.156250,68.460938,15.812500,89.789062,35.625000,25.195312,19.101562,24.078125,15.140625,11.414062,9.937500,12.164062,8.851562,88.000000,33.382812,102.640625,43.398438,15.484375,10.640625,23.234375,16.125000,1.585938,0.828125,0.156250,0.093750,110.148438,162.796875,1129.593750,22.0,3.0,3.0,0.0,3.0,6.0,2.0,0.0,0.0,0.0,2.0,3.0,0.0,1.0,160.02,167.64,135.0,34.0,31.0,False,Bantamweight,Orthodox,Orthodox
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5405,0.125000,0.000000,0.481250,0.545000,0.721250,0.435000,0.625000,0.375000,1.250000,0.125000,115.500000,60.375000,55.875000,27.750000,126.750000,70.750000,65.375000,37.000000,2.500000,1.000000,4.125000,1.875000,108.750000,55.250000,33.875000,8.250000,6.500000,4.875000,7.125000,5.125000,0.250000,0.250000,14.875000,14.375000,55.750000,21.500000,48.000000,23.750000,12.625000,7.750000,4.625000,2.875000,47.125000,31.125000,3.250000,1.125000,346.875000,113.500000,701.000000,10.0,0.0,1.0,0.0,2.0,3.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,190.50,155.0,0.000000,1.000000,0.367500,0.365000,0.232500,0.050000,1.250000,0.750000,1.250000,0.500000,90.000000,34.250000,108.750000,38.750000,99.250000,43.250000,116.250000,45.750000,8.000000,2.500000,1.250000,0.250000,65.000000,21.000000,91.000000,27.250000,16.500000,6.000000,13.750000,8.250000,8.500000,7.250000,4.000000,3.250000,82.000000,28.750000,101.000000,35.250000,4.000000,2.000000,4.750000,1.250000,4.000000,3.500000,3.000000,2.250000,164.000000,38.000000,690.000000,7.0,0.0,0.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,175.26,180.34,155.0,26.0,29.0,False,Lightweight,Southpaw,Orthodox
5406,0.015625,0.000000,0.384980,0.313633,0.613965,0.201641,0.554688,0.332031,0.009766,0.126953,128.417969,50.142578,80.093750,23.673828,149.867188,70.507812,95.468750,38.597656,5.433594,3.404297,4.937500,1.652344,91.095703,20.906250,70.052734,18.005859,29.732422,21.935547,8.562500,4.578125,7.589844,7.300781,1.478516,1.089844,100.585938,29.765625,71.501953,19.650391,13.421875,8.462891,6.703125,2.392578,14.410156,11.914062,1.888672,1.630859,232.138672,187.236328,890.759766,28.0,0.0,0.0,1.0,6.0,7.0,3.0,0.0,0.0,2.0,3.0,1.0,1.0,0.0,175.26,170.0,0.000000,0.009766,0.674354,0.679751,0.235942,0.191372,0.174561,0.063477,0.000488,0.031616,59.981201,20.242920,61.599731,24.849487,78.031616,35.113525,70.011597,31.948486,5.714111,1.252319,2.063110,1.187622,51.024902,13.841919,52.012451,17.431274,7.016113,5.072388,5.266968,3.630249,1.940186,1.328613,4.320312,3.787964,47.510498,12.865234,50.864624,16.935059,8.517944,4.500732,6.739258,4.336914,3.952759,2.876953,3.995850,3.577515,122.895996,50.394043,475.191772,33.0,1.0,5.0,0.0,5.0,10.0,4.0,0.0,0.0,0.0,4.0,1.0,5.0,0.0,185.42,182.88,170.0,28.0,34.0,False,Welterweight,Southpaw,Southpaw
5407,0.250000,0.250000,0.602500,0.515000,0.330000,0.330000,0.250000,0.000000,0.000000,0.000000,44.750000,16.750000,63.500000,39.500000,45.750000,17.250000,75.000000,50.000000,1.500000,1.000000,3.250000,0.750000,31.000000,7.000000,41.000000,23.250000,7.750000,5.250000,21.000000,15.500000,6.000000,4.500000,1.500000,0.750000,39.750000,13.250000,38.250000,20.750000,4.500000,3.000000,11.000000,8.000000,0.500000,0.500000,14.250000,10.750000,8.000000,185.000000,466.250000,5.0,0.0,1.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,180.34,145.0,0.300000,0.550000,0.446000,0.350000,0.160000,0.132000,0.100000,0.000000,0.200000,0.000000,83.050000,34.950000,78.650000,30.400000,90.500000,41.400000,87.250000,38.800000,2.600000,0.500000,1.550000,0.400000,63.500000,21.950000,67.250000,21.400000,10.750000,6.400000,8.750000,6.750000,8.800000,6.600000,2.650000,2.250000,72.100000,28.250000,69.150000,24.800000,7.400000,3.700000,5.750000,2.850000,3.550000,3.000000,3.750000,2.750000,67.850000,28.400000,423.350000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,175.26,177.80,145.0,24.0,30.0,False,Featherweight,Southpaw,Orthodox
5408,0.875000,0.125000,0.543125,0.426250,0.031250,0.161250,0.000000,0.015625,0.000000,0.000000,75.312500,38.421875,49.140625,19.812500,76.359375,39.218750,52.968750,22.468750,0.093750,0.062500,3.406250,0.390625,39.171875,10.812500,40.625000,13.953125,15.312500,10.765625,6.703125,4.546875,20.828125,16.843750,1.812500,1.312500,72.437500,36.453125,46.625000,18.437500,0.578125,0.468750,0.265625,0.187500,2.296875,1.500000,2.250000,1.187500,14.671875,12.687500,363.968750,16.0,0.0,4.0,0.0,4.0,6.0,1.0,0.0,0.0,1.0,1.0,4.0,0.0,0.0,180.34,145.0,0.125000,0.125000,0.412969,0.352891,0.592031,0.017266,0.125000,1.359375,0.007812,0.531250,72.914062,29.351562,65.601562,25.500000,104.179688,57.304688,118.875000,75.828125,6.539062,4.343750,0.523438,0.101562,63.234375,21.671875,50.406250,13.085938,5.515625,3.703125,11.148438,8.625000,4.164062,3.976562,4.046875,3.789062,51.359375,15.367188,61.367188,21.460938,2.250000,1.992188,2.671875,2.476562,19.304688,11.992188,1.562500,1.562500,416.273438,30.046875,823.750000,21.0,0.0,1.0,0.0,3.0,6.0,2.0,0.0,0.0,1.0,4.0,1.0,0.0,0.0,175.26,180.34,155.0,27.0,34.0,False,Lightweight,Orthodox,Orthodox


In [None]:
X, y = ufc_prepped, ufc[["Winner"]]

### Oversampling using SMOTE-NC

Since the features are all either nominal or continuous/numerical and that we have a pretty severe imbalance, we opted for a variation on SMOTE for oversampling which can handle nominal categorical and numeric data.

In [None]:
y.value_counts()

Winner
Red       3581
Blue      1730
Draw        99
Name: count, dtype: int64

In [None]:
smote = SMOTENC(categorical_features=ufc_cat.columns.tolist())
X_smote, y_smote = smote.fit_resample(X, y)

In [None]:
y_smote.value_counts()

Winner
Blue      3581
Draw      3581
Red       3581
Name: count, dtype: int64

### Train test split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42, stratify=y_smote)

In [None]:
X_train

Unnamed: 0,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Height_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,title_bout,weight_class,B_Stance,R_Stance
10615,0.042269,0.042269,0.510275,0.554311,0.162082,0.309519,0.554895,0.985956,0.121134,0.439433,71.845919,35.988656,103.300618,53.313305,97.817288,59.078458,146.794071,91.288595,4.036597,0.814044,3.687374,1.073329,52.066314,22.143958,72.495353,30.846055,12.413366,8.492503,19.534907,13.202565,7.366239,5.352195,11.270358,9.264685,53.896289,23.582984,71.922628,30.876437,14.045241,9.467657,21.094660,15.142737,3.904390,2.938015,10.283331,7.294132,180.145694,221.912128,755.359089,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.0,166.029558,121.340323,0.000000,0.000000,0.441172,0.419357,0.243634,0.197642,0.269016,0.214391,0.464787,0.189711,78.419699,33.846464,92.823386,37.571582,114.772265,65.622013,113.580555,55.220927,7.735514,1.636909,1.848389,0.647046,65.571751,23.179416,67.895294,18.904783,5.497123,4.648009,8.930164,4.976476,7.350825,6.019039,15.997928,13.690324,53.583723,16.070754,77.918310,28.132993,10.867009,8.553799,9.117870,5.815261,13.968966,9.221911,5.787206,3.623329,317.385101,114.569071,846.117862,19.716398,0.000000,0.000000,0.788656,0.788656,3.154624,3.943280,0.0,0.0,0.788656,2.365968,0.000000,0.000000,0.0,164.170442,163.777698,121.340323,30.577312,31.097903,False,Bantamweight,Orthodox,Southpaw
8366,0.000000,0.000000,0.230874,0.181651,0.000000,0.480582,0.000000,0.038836,0.000000,0.000000,18.310688,4.349523,17.233016,3.310688,18.582539,4.621375,19.165078,5.203914,0.038836,0.000000,1.922328,0.961164,12.349523,1.271852,13.349523,2.310688,3.961164,1.077672,2.922328,0.038836,2.000000,2.000000,0.961164,0.961164,18.116508,4.194180,16.883492,3.077672,0.194180,0.155344,0.233016,0.116508,0.000000,0.000000,0.116508,0.116508,0.582539,18.941797,152.194180,1.000000,0.000000,0.000000,1.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.0,177.997287,155.000000,0.184098,0.015018,0.405683,0.296925,0.115918,0.066244,0.078598,0.153598,0.000464,0.039238,109.708290,44.794564,147.403645,46.195544,114.763778,47.706073,164.092742,60.513217,1.144156,0.296120,0.910701,0.102859,74.432736,17.453193,110.701812,22.765019,16.867122,11.111360,29.011276,17.356260,18.408432,16.230011,7.690557,6.074266,102.447261,40.040003,135.028346,39.191218,6.168019,3.857897,11.994101,6.760653,1.093010,0.896663,0.381198,0.243673,52.266951,42.364503,771.081988,40.728148,0.000000,0.077672,1.922328,6.038836,10.038836,7.805820,0.0,0.0,0.961164,2.116508,4.883492,2.077672,0.0,175.161357,185.222713,155.000000,26.116508,36.689312,False,Lightweight,Orthodox,Orthodox
3058,0.250000,0.000000,0.457500,0.387500,0.500000,0.000000,0.500000,0.000000,0.000000,0.000000,95.750000,36.500000,80.750000,20.000000,102.500000,42.500000,83.000000,21.750000,0.500000,0.500000,2.750000,0.000000,75.500000,23.750000,67.250000,12.250000,8.250000,4.000000,6.250000,1.500000,12.000000,8.750000,7.250000,6.250000,85.250000,28.000000,78.000000,19.750000,1.250000,1.000000,2.500000,0.250000,9.250000,7.500000,0.250000,0.000000,72.000000,47.250000,586.250000,6.000000,0.000000,3.000000,0.000000,3.000000,3.000000,0.000000,0.0,0.0,0.000000,1.000000,2.00000,0.000000,0.0,177.800000,155.000000,0.125000,0.250000,0.522500,0.522500,0.440000,0.165000,0.500000,0.500000,0.000000,0.000000,50.125000,23.750000,42.875000,21.750000,105.250000,70.375000,64.250000,41.625000,6.875000,2.875000,1.625000,0.500000,40.125000,17.250000,31.500000,14.375000,7.000000,3.750000,8.750000,5.375000,3.000000,2.750000,2.625000,2.000000,33.000000,12.250000,28.875000,13.375000,3.000000,1.875000,4.750000,2.375000,14.125000,9.625000,9.250000,6.000000,395.375000,22.500000,689.250000,9.000000,0.000000,2.000000,0.000000,2.000000,3.000000,1.000000,0.0,0.0,0.000000,2.000000,1.000000,0.000000,0.0,180.340000,187.960000,155.000000,26.000000,33.000000,False,Lightweight,Southpaw,Southpaw
6136,1.243047,0.366234,0.531594,0.467138,0.325086,0.395253,0.482467,0.044589,0.042397,0.064692,127.434551,68.842505,91.610690,41.999839,135.234578,75.615855,97.690642,47.595510,3.470075,1.047159,3.587743,1.498187,93.987012,45.504923,69.655223,23.943857,20.819212,13.810147,14.867507,11.414666,12.628328,9.527435,7.087961,6.641316,99.904385,48.998485,81.233877,34.395699,10.042180,7.004842,9.506196,6.800406,17.487987,12.839178,0.870617,0.803734,138.609652,53.231766,708.534371,15.000000,0.000000,3.859740,0.356710,4.573160,4.929870,0.713420,0.0,0.0,0.000000,2.286580,2.64329,0.000000,0.0,169.452087,141.432899,0.227629,0.613190,0.445600,0.391042,0.003642,0.115073,0.005183,0.000000,0.000000,0.000000,86.434590,38.209071,118.209974,46.798482,87.314302,38.686999,121.977515,50.241161,0.094389,0.012093,1.542880,0.418768,67.359262,25.271638,99.926862,34.708604,10.038114,6.161058,11.370901,6.927203,9.037214,6.776374,6.912212,5.162676,79.085962,32.853863,105.813063,37.956719,3.889845,3.363773,6.791838,4.926231,3.458783,1.991435,5.605073,3.915533,9.566216,33.203068,491.634114,34.298698,7.076189,5.929870,0.000000,5.929870,8.216450,3.000000,0.0,0.0,0.000000,4.216450,4.000000,0.000000,0.0,170.180000,177.800000,138.567101,28.573160,30.929870,False,Lightweight,Orthodox,Orthodox
3408,0.000000,0.000000,0.472000,0.630000,0.400000,0.800000,1.200000,0.800000,0.400000,0.200000,72.400000,34.200000,104.800000,48.800000,116.200000,70.200000,147.200000,84.200000,2.800000,1.800000,3.400000,2.600000,54.400000,24.000000,88.000000,36.800000,15.400000,7.600000,14.400000,9.800000,2.600000,2.600000,2.400000,2.200000,42.200000,15.200000,66.600000,23.000000,17.000000,9.400000,23.200000,14.600000,13.200000,9.600000,15.000000,11.200000,192.600000,188.400000,592.800000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.00000,0.000000,0.0,160.020000,115.000000,0.000000,0.400000,0.526000,0.518000,0.366000,0.480000,0.000000,0.200000,0.400000,0.400000,61.800000,25.400000,80.000000,38.800000,83.400000,44.400000,119.200000,72.800000,3.800000,1.000000,4.200000,2.000000,45.800000,14.200000,59.200000,24.200000,7.800000,4.200000,16.000000,11.000000,8.200000,7.000000,4.800000,3.600000,52.400000,19.000000,62.800000,26.400000,5.800000,3.600000,9.200000,6.800000,3.600000,2.800000,8.000000,5.600000,111.200000,197.000000,643.800000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,160.020000,164.592000,115.000000,29.000000,33.000000,False,WomenStrawweight,Orthodox,Orthodox
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8702,0.312954,0.000000,0.435723,0.373658,0.104132,0.286887,0.968089,0.625909,0.000000,0.005845,143.251073,55.400030,160.133658,52.022779,160.590769,71.738502,185.155467,74.620813,2.893600,0.841705,6.873105,2.314416,115.574657,34.640595,128.702138,29.123408,17.797191,12.605013,22.555967,15.352372,9.879224,8.154422,8.875553,7.546999,126.530180,45.165986,145.560155,41.428581,10.676780,6.816589,10.098856,6.329978,6.044113,3.417455,4.474646,4.264220,48.230868,175.301750,842.729354,13.855917,0.374091,0.374091,0.625909,2.122274,2.870457,1.748183,0.0,0.0,0.374091,2.496365,0.00000,0.000000,0.0,171.769808,138.740913,0.312954,0.374091,0.560442,0.496665,0.237592,0.525616,1.593212,0.029226,0.365846,0.329211,69.271536,30.349553,59.690204,29.369045,97.952295,56.033623,77.193917,42.832052,5.420259,1.126011,1.647102,1.129201,52.008789,15.591664,49.475164,20.862860,11.785881,10.316555,7.635389,5.938372,5.476866,4.441334,2.579652,2.567813,60.535761,22.321907,46.803569,18.867714,5.132530,4.755610,5.687760,4.731304,3.603245,3.272036,7.198876,5.770027,74.001088,185.565940,523.320647,17.093196,0.000000,1.625909,0.000000,2.748183,5.366822,1.870457,0.0,0.0,0.374091,1.496365,0.000000,3.496365,0.0,172.409424,176.539232,135.000000,30.000000,30.618639,False,Lightweight,Orthodox,Orthodox
245,0.000000,0.000000,0.400000,0.395000,0.540000,0.200000,1.500000,0.000000,1.000000,0.500000,28.500000,12.000000,40.500000,17.000000,62.000000,42.000000,83.500000,57.000000,6.500000,3.000000,5.000000,2.000000,24.000000,8.500000,35.000000,12.000000,4.500000,3.500000,5.000000,4.500000,0.000000,0.000000,0.500000,0.500000,23.500000,9.000000,28.500000,9.500000,4.500000,2.500000,7.500000,5.000000,0.500000,0.500000,4.500000,2.500000,225.000000,178.500000,638.000000,5.000000,0.000000,0.000000,1.000000,1.000000,1.000000,1.000000,0.0,0.0,0.000000,0.000000,0.00000,1.000000,0.0,160.020000,115.000000,0.000000,0.000000,0.443281,0.591406,0.251563,0.008281,0.109375,0.039062,0.023438,0.460938,151.195312,61.796875,156.585938,86.507812,174.273438,80.703125,190.476562,119.593750,3.992188,0.835938,2.937500,0.031250,109.960938,32.734375,119.398438,56.031250,28.398438,20.406250,26.968750,22.164062,12.835938,8.656250,10.218750,8.312500,118.953125,37.773438,107.492188,43.585938,28.625000,21.484375,33.281250,28.429688,3.617188,2.539062,15.812500,14.492188,258.062500,116.289062,884.554688,21.000000,0.000000,1.000000,0.000000,4.000000,5.000000,3.000000,0.0,0.0,1.000000,2.000000,0.000000,2.000000,0.0,162.560000,165.100000,115.000000,32.000000,35.000000,False,WomenStrawweight,Orthodox,Orthodox
1574,0.000000,0.000000,0.482500,0.415000,0.185000,0.500000,0.500000,0.000000,0.000000,0.000000,24.500000,11.750000,64.500000,32.000000,26.500000,13.250000,69.750000,36.500000,4.500000,1.500000,0.500000,0.500000,21.500000,10.250000,57.000000,25.250000,2.750000,1.250000,3.750000,3.750000,0.250000,0.250000,3.750000,3.000000,17.000000,6.250000,28.750000,11.000000,4.750000,2.750000,2.000000,1.500000,2.750000,2.750000,33.750000,19.500000,51.250000,95.750000,298.500000,4.000000,0.000000,2.000000,0.000000,2.000000,2.000000,1.000000,0.0,0.0,0.000000,0.000000,0.00000,2.000000,0.0,185.420000,185.000000,0.000000,0.000000,0.581250,0.478750,0.395000,0.500000,0.375000,1.125000,0.000000,0.000000,15.875000,10.250000,15.000000,7.500000,28.750000,22.625000,32.375000,20.875000,2.500000,0.875000,0.500000,0.500000,15.375000,9.875000,12.625000,5.625000,0.375000,0.250000,1.875000,1.375000,0.125000,0.125000,0.500000,0.500000,4.625000,1.375000,6.250000,1.500000,0.750000,0.375000,1.000000,0.375000,10.500000,8.500000,7.750000,5.625000,49.750000,166.125000,340.625000,5.000000,0.000000,0.000000,1.000000,2.000000,2.000000,2.000000,0.0,0.0,0.000000,0.000000,0.000000,2.000000,0.0,185.420000,185.420000,185.000000,29.000000,31.000000,False,Middleweight,Southpaw,Orthodox
5480,0.331256,0.000000,0.445731,0.414578,0.231737,0.075971,0.554443,0.115181,0.327668,0.115622,69.830752,29.728472,52.043862,20.519949,87.514792,46.134378,60.900482,28.254716,1.374119,0.614741,2.877202,0.703549,54.198640,18.459716,41.651867,13.496724,8.127202,5.250440,5.823264,3.153575,7.504909,6.018316,4.568731,3.869650,58.405208,20.288201,47.162255,17.232501,6.842461,5.783989,2.858005,1.692850,4.583083,3.656282,2.023601,1.594599,85.210705,86.986905,485.365042,14.228601,0.000000,1.385700,0.000000,3.228601,4.614300,1.000000,0.0,0.0,1.000000,2.228601,0.61430,0.771399,0.0,177.582585,161.858026,0.028964,0.089450,0.396381,0.461290,0.321232,0.062184,0.616112,0.120644,0.001695,0.224747,80.657387,29.755824,95.599080,42.171912,99.704933,44.317947,107.312088,52.753923,10.646660,3.927729,0.203115,0.130697,69.138182,22.446675,79.776467,29.190513,4.964615,1.809499,7.816905,6.026908,6.554590,5.499651,8.005708,6.954492,63.395094,17.276935,86.823101,35.317662,8.871124,5.449729,4.473776,3.583505,8.391170,7.029161,4.302204,3.270745,360.608862,61.754589,841.623841,37.757305,1.000000,0.385700,0.614300,4.228601,9.457202,5.000000,0.0,0.0,0.614300,3.771399,1.228601,3.842901,0.0,176.602908,185.384200,161.858026,27.385700,34.614300,False,Flyweight,Orthodox,Orthodox


In [None]:
X_val

Unnamed: 0,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Height_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,title_bout,weight_class,B_Stance,R_Stance
9571,0.613178,0.022804,0.468202,0.335091,0.459752,0.194978,0.027079,0.640257,0.410211,0.051308,118.862825,53.853202,78.265309,30.466259,152.313430,82.647616,125.242273,71.821621,2.474473,1.464497,3.619464,1.295480,92.615954,32.493407,67.736242,24.139447,21.229259,16.918432,6.422835,3.376515,5.017613,4.441363,4.106232,2.950297,85.912303,30.336864,72.638869,26.550880,8.925633,7.347958,3.620687,2.310115,24.024889,16.168379,2.005753,1.605264,205.979086,175.949848,740.725725,11.283725,0.0,2.182429,0.000000,2.182429,2.729717,1.364858,0.0,0.0,0.182429,0.547288,1.817571,0.000000,0.0,179.190110,141.385021,0.364858,0.000000,0.439372,0.524375,0.401376,0.514307,2.043927,0.182429,0.638727,0.408785,51.232612,24.285510,26.659340,15.072161,97.245169,67.962344,62.470640,48.598836,4.759302,1.865083,3.802778,2.437920,40.033605,16.385471,20.159115,9.571936,5.134016,3.294932,4.839534,4.171672,6.064990,4.605107,1.660691,1.328553,36.890167,13.359022,15.097260,6.426263,4.357237,2.915731,1.149709,1.021964,9.985208,8.010757,10.412371,7.623935,280.303950,360.142988,863.975786,13.628421,0.000000,1.000000,0.000000,2.635142,3.452712,1.635142,0.0,0.0,0.182429,2.452712,0.000000,0.817571,0.0,169.493480,171.570110,141.385021,30.087854,30.277004,False,Bantamweight,Orthodox,Orthodox
7124,0.154484,0.308969,0.469369,0.518588,0.142126,0.071578,0.051495,0.514948,0.102990,0.154484,61.277361,26.455156,75.554853,38.760832,73.375346,37.677730,85.915317,48.760832,3.742526,0.463453,2.514948,0.411958,32.177664,9.068089,57.996707,25.584749,13.282366,6.244172,7.978401,5.154484,15.817331,11.142895,9.579744,8.021599,52.076386,20.931911,68.868826,34.134597,3.794021,2.691031,3.081391,2.411958,5.406954,2.832214,3.604636,2.214276,107.715792,59.508232,615.182799,1.455156,0.0,0.000000,0.485052,0.000000,0.000000,0.485052,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,182.652193,206.827340,0.397010,0.371263,0.597093,0.433738,0.512874,0.123588,0.360464,0.154484,0.102990,0.000000,19.558146,12.105492,19.443501,9.795667,29.895364,20.998354,34.887001,24.048137,1.665284,0.731727,0.720927,0.205979,12.931055,6.787370,16.368761,7.313117,5.534900,4.715132,2.199328,1.787370,1.092190,0.602990,0.875412,0.695180,6.459305,2.650336,8.890360,3.298169,8.182734,6.259976,5.321415,3.126235,4.916107,3.195180,5.231727,3.371263,90.537272,65.076386,240.728238,0.970104,0.000000,0.485052,0.000000,0.485052,0.485052,0.485052,0.0,0.0,0.000000,0.000000,0.485052,0.000000,0.0,186.576096,194.499840,194.701040,24.395364,29.940208,False,LightHeavyweight,Southpaw,Orthodox
6790,0.544393,0.303738,0.352290,0.306121,0.249416,0.310981,0.000000,0.196262,0.000000,0.000000,147.890175,53.621502,150.551406,46.497651,154.948594,59.591135,180.322428,72.572411,3.018690,0.955607,8.063083,3.626166,124.841118,37.869170,135.051406,35.560734,19.934568,12.834105,14.000000,9.588786,3.114489,2.918227,1.500000,1.348131,126.226629,41.754681,130.775720,33.366821,17.175223,9.796725,14.864472,9.612140,4.488323,2.070096,4.911214,3.518690,104.123783,250.443896,900.000000,7.822428,0.0,0.000000,2.214952,0.392524,0.392524,2.214952,0.0,0.0,0.000000,0.392524,0.000000,0.000000,0.0,171.628044,136.775720,0.098131,0.426402,0.433598,0.497921,0.500234,0.358703,0.803738,0.049065,0.480140,0.759345,75.682228,37.307232,97.221957,46.502332,87.936892,48.601616,116.418235,64.511694,3.866821,2.000000,3.918227,1.029205,59.346952,26.207939,75.735974,28.100463,10.089948,6.987144,11.525703,9.191589,6.245327,4.112149,9.960280,9.210280,63.844612,28.759337,84.626158,36.504664,8.346961,5.778035,4.898367,3.398367,3.490655,2.769860,7.697432,6.599301,161.010549,100.792044,593.955590,7.785048,0.000000,0.000000,1.785048,1.000000,1.000000,2.785048,0.0,0.0,0.000000,0.607476,0.392524,0.000000,0.0,171.628044,178.251033,136.775720,27.252332,31.607476,False,Lightweight,Orthodox,Orthodox
9005,0.262350,0.225300,0.592763,0.416468,0.182043,0.019760,0.470361,0.088918,0.470361,0.049399,52.782644,27.339988,49.593858,19.591388,79.459286,52.115428,58.060803,27.011404,1.814219,0.574099,0.197596,0.081508,38.600198,18.700601,36.492055,9.185246,8.599868,4.002470,11.899802,9.342458,5.582578,4.636917,1.202001,1.063684,35.395192,13.219826,33.551334,9.437716,1.467356,1.368558,11.922567,8.010950,15.920097,12.751605,4.119957,2.142722,164.969818,49.807388,570.950682,5.407212,0.0,0.000000,1.802404,0.901202,0.901202,1.802404,0.0,0.0,0.000000,0.901202,0.000000,0.000000,0.0,180.590947,185.000000,0.000000,0.000000,0.440067,0.414665,0.546006,0.113391,0.901202,0.049399,0.000000,0.049399,63.564754,28.484810,61.022684,28.732135,88.546229,50.460111,114.364977,76.910877,6.253170,3.308579,2.632677,0.907377,51.666392,18.604973,48.917216,21.031039,8.941256,7.277704,5.883172,3.447926,2.957106,2.602133,6.222296,4.253170,48.197431,16.842788,37.318583,10.697761,6.034209,5.386012,2.519184,1.972790,9.333113,6.256010,21.184916,16.061584,339.290293,261.749377,869.431985,9.493990,0.901202,1.000000,0.000000,1.098798,2.098798,1.098798,0.0,0.0,0.000000,1.197596,0.000000,0.901202,0.0,187.709053,199.656212,186.975960,33.110818,26.086778,False,Bantamweight,Orthodox,Orthodox
1864,0.126495,0.550781,0.258311,0.405086,0.049115,0.885844,0.816478,0.031988,0.000099,0.000490,129.744170,34.378013,141.884630,58.007129,134.605519,38.844597,145.851747,61.203506,1.981268,0.286905,1.666071,1.447796,100.648781,18.453363,115.064181,38.761467,23.037772,12.781339,21.897161,15.671740,6.057617,3.143311,4.923287,3.573922,122.817514,31.026652,131.414897,52.398046,5.918076,2.710762,7.203486,3.607727,1.008580,0.640598,3.266247,2.001355,29.993358,60.053073,900.791032,58.000000,2.0,6.000000,0.000000,6.000000,14.000000,7.000000,0.0,0.0,3.000000,6.000000,5.000000,0.000000,0.0,177.800000,170.000000,0.257812,1.000000,0.412969,0.588828,0.046797,0.132812,0.445312,0.132812,0.390625,0.000000,41.500000,15.945312,42.835938,24.132812,46.750000,21.039062,52.914062,34.078125,2.070312,0.398438,1.093750,0.445312,26.914062,4.695312,32.640625,16.921875,5.140625,4.195312,4.570312,2.804688,9.445312,7.054688,5.625000,4.406250,34.257812,10.054688,27.773438,10.976562,4.359375,4.039062,5.890625,4.609375,2.882812,1.851562,9.171875,8.546875,78.492188,89.531250,372.039062,18.000000,1.000000,4.000000,0.000000,4.000000,6.000000,2.000000,0.0,0.0,0.000000,2.000000,2.000000,2.000000,0.0,172.720000,180.340000,145.000000,33.000000,33.000000,False,Featherweight,Southpaw,Orthodox
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8285,0.000000,0.000000,0.499714,0.564360,0.109006,0.000000,0.872048,0.872048,0.000000,0.000000,30.099398,16.715542,28.099398,15.895663,36.767711,22.511807,29.663373,17.459639,2.872048,0.436024,1.691928,0.000000,21.023614,9.767711,11.819880,3.436024,4.947831,3.255904,0.563976,0.563976,4.127952,3.691928,15.715542,11.895663,22.151566,11.331687,14.947831,4.436024,1.000000,0.436024,13.151566,11.459639,6.947831,4.947831,0.000000,0.000000,111.782530,67.549156,341.624940,1.563976,0.0,1.000000,0.000000,1.000000,1.000000,0.000000,0.0,0.0,0.000000,0.000000,0.563976,0.436024,0.0,182.880000,178.459639,0.000000,0.000000,0.366304,0.443081,0.338386,0.071944,0.218012,0.000000,0.000000,0.563976,146.194398,56.696868,123.805602,55.037892,161.554639,70.275121,131.075783,62.090060,6.293795,3.383855,1.781988,0.218012,114.516747,33.839096,87.686988,29.151566,15.075783,9.819880,25.066446,16.924217,16.601867,13.037892,11.052169,8.962108,122.720482,42.914880,107.459639,44.218012,20.127952,11.781988,10.947831,7.075783,3.345964,2.000000,5.398133,3.744096,169.411325,65.867651,900.000000,4.308072,0.000000,1.436024,0.000000,1.436024,1.436024,0.000000,0.0,0.0,1.000000,0.436024,0.000000,0.000000,0.0,180.340000,185.744998,170.000000,24.180120,28.255904,False,Heavyweight,Orthodox,Orthodox
1464,0.130859,0.000000,0.423472,0.403914,0.639290,0.176678,0.501465,0.500000,0.001953,0.025513,48.987610,20.044922,51.503845,30.167908,51.715698,22.186035,67.691895,43.745361,1.101318,0.802368,1.364990,0.572937,37.758850,12.666138,42.611877,25.418335,8.621521,5.436890,5.640991,3.170105,2.607239,1.941895,3.250977,1.579468,42.897522,14.777283,26.096191,8.510193,2.378723,2.046692,3.336731,1.967529,3.711365,3.220947,22.070923,19.690186,15.428650,86.406250,300.653564,34.000000,0.0,2.000000,0.000000,3.000000,9.000000,6.000000,0.0,0.0,1.000000,4.000000,2.000000,2.000000,0.0,175.260000,135.000000,0.000000,1.531250,0.369062,0.329375,0.178125,0.044062,0.031250,0.218750,0.000000,0.031250,130.406250,46.875000,117.656250,43.187500,165.500000,80.343750,119.093750,44.500000,3.843750,0.781250,0.875000,0.218750,104.468750,29.531250,89.781250,23.125000,5.968750,4.218750,9.281250,6.593750,19.968750,13.125000,18.593750,13.468750,110.093750,35.375000,110.531250,41.312500,6.375000,2.625000,5.906250,1.281250,13.937500,8.875000,1.218750,0.593750,95.562500,18.687500,769.718750,13.000000,1.000000,1.000000,0.000000,2.000000,4.000000,2.000000,0.0,0.0,1.000000,1.000000,2.000000,0.000000,0.0,167.640000,170.180000,145.000000,37.000000,28.000000,False,Bantamweight,Southpaw,Orthodox
6390,0.000000,0.083072,0.446160,0.340862,0.152021,0.127665,0.000000,0.000000,0.000000,0.083072,189.586341,87.269753,175.874672,59.938906,207.951465,103.807299,183.868393,66.680272,3.987441,0.913789,7.178702,1.169283,129.461014,44.691391,150.094190,42.813579,28.335426,18.769623,13.802460,8.974883,31.789901,23.808739,11.978022,8.150445,163.989141,72.924907,165.896650,53.117608,20.197540,11.603480,9.313449,6.156724,5.399660,2.741366,0.664574,0.664574,70.205000,75.514388,900.000000,8.981162,0.0,0.335426,1.993721,1.000000,1.000000,1.993721,0.0,0.0,0.000000,1.000000,0.000000,0.000000,0.0,166.803966,131.708529,0.041634,0.489027,0.349482,0.437677,0.536450,0.055397,0.342671,0.662276,0.211477,0.230508,123.431659,43.356321,115.635041,47.974363,155.248873,71.435293,136.297852,66.077896,4.909431,2.765331,2.215177,0.309389,100.756922,27.166761,98.062112,36.068894,13.647162,8.447299,13.800926,8.425983,9.027575,7.742261,3.772002,3.479485,101.851072,32.090038,97.415613,36.860962,11.404594,7.773752,10.891324,6.633254,10.175992,3.492531,7.328104,4.480148,296.579528,67.060702,768.044884,16.341706,0.000000,0.000000,1.000000,2.335426,4.335426,3.000000,0.0,0.0,1.670853,1.335426,0.664574,0.664574,0.0,168.491983,172.720000,135.000000,27.652015,35.000000,False,Bantamweight,Orthodox,Orthodox
6647,0.227365,0.021654,0.449162,0.379122,0.299927,0.257498,0.002707,0.380904,0.048721,0.005413,149.577713,64.048274,108.100422,41.473008,177.015619,87.611944,132.280246,60.573256,5.803998,2.431351,7.284914,2.523243,127.850211,46.290390,83.802135,23.520810,9.999628,7.864527,15.364055,10.599404,11.727874,9.893357,8.934232,7.352794,132.254904,51.390713,98.227775,34.486069,8.252061,5.759809,4.354072,3.267085,9.070747,6.897753,5.518575,3.719853,206.252606,84.652520,877.604518,13.157537,0.0,2.307077,0.000000,2.307077,3.692923,1.385846,0.0,0.0,0.000000,3.000000,0.692923,0.000000,0.0,173.700048,145.000000,0.045197,0.278005,0.416771,0.486294,0.192698,0.066322,0.172594,0.529398,0.054280,0.190776,120.890990,47.488876,102.181423,48.914288,138.868647,63.227810,115.371186,60.373483,7.483054,1.343522,2.415658,0.592274,95.457111,27.215717,86.182876,37.216074,16.605157,12.254330,13.824444,9.784109,8.828722,8.018830,2.174103,1.914105,101.706497,33.414573,86.335179,36.726182,13.949116,9.923437,12.837471,9.213525,5.235376,4.150866,3.008773,2.974581,144.603996,104.456332,723.061614,53.259696,0.346461,0.653539,0.346461,5.039384,12.267693,8.267693,0.0,0.0,2.000000,5.960616,2.307077,2.000000,0.0,170.959976,170.959976,148.464614,26.346461,34.267693,False,Welterweight,Orthodox,Orthodox


### Normalization using MinMax

Scale numeric

In [None]:
num_scaler = MinMaxScaler()

X_train_num_scaled = num_scaler.fit_transform(X_train.select_dtypes(exclude=["object", "bool"]))

In [None]:
X_val_num_scaled = num_scaler.transform(X_val.select_dtypes(exclude=["object", "bool"]))

In [None]:
X_train[ufc_num.columns] = X_train_num_scaled
X_val[ufc_num.columns] = X_val_num_scaled

## Training

We will test out the predictions of these models:

1. Random Forest Classifier
2. Naive Bayes
3. Support Vector Machine
4. CatBoost

### Attempting Random Forest Classifier

Since random forest can't handle categorical values raw, we need to encode them using one-hot encoding first

In [None]:
cat_enc = OneHotEncoder()

encoded_cats = cat_enc.fit_transform(X_train[ufc_cat.columns])

In [None]:
X_train_rf = X_train.copy()
X_val_rf = X_val.copy()

In [None]:
X_train_rf = X_train_rf.drop(axis=1, columns=ufc_cat.columns, errors="ignore")
X_train_rf[cat_enc.get_feature_names_out()] = encoded_cats.toarray()

X_val_rf = X_val.drop(axis=1, columns=ufc_cat.columns, errors="ignore")
X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()

  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.get_feature_names_out()] = cat_enc.transform(X_val[ufc_cat.columns]).toarray()
  X_val_rf[cat_enc.g

In [None]:
rf = RandomForestClassifier(random_state=42, n_jobs=-1)
rf.fit(X_train_rf, y_train)

  return fit_method(estimator, *args, **kwargs)


In [None]:
y_preds_rf = rf.predict(X_val_rf)

In [None]:
evaluate_classifier_performance(y_preds_rf, y_val["Winner"].to_numpy())

Hasil Evaluasi berdasarkan classification report 

              precision    recall  f1-score   support

        Blue       0.76      0.78      0.77       716
        Draw       1.00      1.00      1.00       716
         Red       0.77      0.76      0.77       717

    accuracy                           0.85      2149
   macro avg       0.85      0.85      0.85      2149
weighted avg       0.84      0.85      0.84      2149



Confusion Matrix



prediction,Blue,Draw,Red
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Blue,556,2,158
Draw,0,715,1
Red,172,0,545




Butuh informasi lebih lengkap? silakan simak di bawah ini : 
Accuracy Average: 0.8450442066077245
F1 Macro Average: 0.8450183464142818
F1 Micro Average: 0.8450442066077245
Precision Macro Average: 0.8450315302433502
Precision Micro Average: 0.8450442066077245
Recall Macro Average: 0.8450837469385423
Recall Micro Average: 0.8450442066077245



### Attempting Naive Bayes

A pretty good classifier is Naive Bayes and we'll be using the Complement variant for this due to the imbalanced nature of the dataset (despite being handled using SMOTE-NC and Stratified Sampling)

In [None]:
X_train_nb = X_train_rf.copy()
X_val_nb = X_val_rf.copy()

In [None]:
nb = ComplementNB()
nb.fit(X_train_nb, y_train)

  y = column_or_1d(y, warn=True)


In [None]:
y_preds_nb = nb.predict(X_val_nb)

In [None]:
evaluate_classifier_performance(y_preds_nb, y_val["Winner"].to_numpy())

Hasil Evaluasi berdasarkan classification report 

              precision    recall  f1-score   support

        Blue       0.46      0.36      0.40       716
        Draw       0.48      0.60      0.53       716
         Red       0.54      0.51      0.52       717

    accuracy                           0.49      2149
   macro avg       0.49      0.49      0.49      2149
weighted avg       0.49      0.49      0.49      2149



Confusion Matrix



prediction,Blue,Draw,Red
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Blue,260,268,188
Draw,161,430,125
Red,149,204,364




Butuh informasi lebih lengkap? silakan simak di bawah ini : 
Accuracy Average: 0.4904606793857608
F1 Macro Average: 0.4860377156591768
F1 Micro Average: 0.4904606793857608
Precision Macro Average: 0.4901749762410799
Precision Micro Average: 0.4904606793857608
Recall Macro Average: 0.4904526672016913
Recall Micro Average: 0.4904606793857608



### Attempting Support Vector Machine

Another noteworthy model not used in class is the Support Vector Machine so we'd like to test it out.

In [None]:
X_train_svc, X_val_svc = X_train_nb.copy(), X_val_nb.copy()

In [None]:
svc = SVC(random_state=42)
svc.fit(X_train_svc, y_train)

  y = column_or_1d(y, warn=True)


In [None]:
y_preds_svc = svc.predict(X_val_svc)

In [None]:
evaluate_classifier_performance(y_preds_svc, y_val["Winner"].to_numpy())

Hasil Evaluasi berdasarkan classification report 

              precision    recall  f1-score   support

        Blue       0.66      0.68      0.67       716
        Draw       0.85      0.97      0.91       716
         Red       0.74      0.62      0.67       717

    accuracy                           0.76      2149
   macro avg       0.75      0.76      0.75      2149
weighted avg       0.75      0.76      0.75      2149



Confusion Matrix



prediction,Blue,Draw,Red
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Blue,485,78,153
Draw,17,691,8
Red,229,41,447




Butuh informasi lebih lengkap? silakan simak di bawah ini : 
Accuracy Average: 0.7552349930200093
F1 Macro Average: 0.750235027746251
F1 Micro Average: 0.7552349930200093
Precision Macro Average: 0.7505861601255338
Precision Micro Average: 0.7552349930200093
Recall Macro Average: 0.7552963543005852
Recall Micro Average: 0.7552349930200093



### Attempting CatBoost

A particular model known for being good at classification is CatBoost which combines principles of ensemble learning such as gradient boosting and is good on handling categorical features and targets

In [None]:
cb = CatBoostClassifier(random_state=42)
cb.fit(X_train, y_train, cat_features=ufc_cat.columns.tolist())

Learning rate set to 0.088301
0:	learn: 1.0716897	total: 216ms	remaining: 3m 36s
1:	learn: 1.0491047	total: 404ms	remaining: 3m 21s
2:	learn: 1.0328687	total: 575ms	remaining: 3m 11s
3:	learn: 1.0140927	total: 783ms	remaining: 3m 15s
4:	learn: 1.0009840	total: 937ms	remaining: 3m 6s
5:	learn: 0.9880883	total: 1.12s	remaining: 3m 5s
6:	learn: 0.9712823	total: 1.45s	remaining: 3m 25s
7:	learn: 0.9547270	total: 1.64s	remaining: 3m 22s
8:	learn: 0.9402818	total: 1.82s	remaining: 3m 20s
9:	learn: 0.9288957	total: 2.11s	remaining: 3m 28s
10:	learn: 0.9179077	total: 2.28s	remaining: 3m 25s
11:	learn: 0.9095211	total: 2.44s	remaining: 3m 21s
12:	learn: 0.8976387	total: 2.63s	remaining: 3m 19s
13:	learn: 0.8875509	total: 2.79s	remaining: 3m 16s
14:	learn: 0.8794517	total: 2.93s	remaining: 3m 12s
15:	learn: 0.8704161	total: 3.21s	remaining: 3m 17s
16:	learn: 0.8586275	total: 3.53s	remaining: 3m 24s
17:	learn: 0.8486899	total: 3.69s	remaining: 3m 21s
18:	learn: 0.8432614	total: 3.9s	remaining: 3m

<catboost.core.CatBoostClassifier at 0x22790640e90>

In [None]:
y_preds = cb.predict(X_val)

In [None]:
evaluate_classifier_performance(np.reshape(y_preds, -1), y_val["Winner"].to_numpy())

Hasil Evaluasi berdasarkan classification report 

              precision    recall  f1-score   support

        Blue       0.80      0.75      0.77       716
        Draw       0.99      0.99      0.99       716
         Red       0.76      0.81      0.78       717

    accuracy                           0.85      2149
   macro avg       0.85      0.85      0.85      2149
weighted avg       0.85      0.85      0.85      2149



Confusion Matrix



prediction,Blue,Draw,Red
actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Blue,535,4,177
Draw,0,711,5
Red,137,2,578




Butuh informasi lebih lengkap? silakan simak di bawah ini : 
Accuracy Average: 0.8487668683108422
F1 Macro Average: 0.8486282457956781
F1 Micro Average: 0.8487668683108422
Precision Macro Average: 0.849429689111202
Precision Micro Average: 0.8487668683108422
Recall Macro Average: 0.8487867147669396
Recall Micro Average: 0.8487668683108422



Since CatBoost is the best one, that's what we'll go with.

## Custom Transformers

For the test set, we created a transformer that will make the code more readable

### Categorical - Remove Identifier or Non-Impactful Features

In [None]:
class RemoveLessUsefulCategoricalFeatures(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        less_useful_cat_features = ["R_fighter", "B_fighter", "location", "date", "Referee"]
        return X.drop(axis=1, columns=less_useful_cat_features, errors="ignore")

## Test Set on Kaggle

We ran through basically the same steps.

For the categorical columns we:

1. Remove the not so useful features
2. Transforming booleans into strings for CatBoost
3. Impute missing values using most frequent values (mode)
4. One-Hot Encoding (if necessary)

For the numerical features we:

1. Imputed missing values using KNN imputation
2. Scaled using MinMax scaling

In [None]:
ufc_test = pd.read_csv("./UFC_Test_Classif_X.csv")

In [None]:
ufc_test_cat = ufc_test.select_dtypes(include=["object", "bool"])
ufc_test_num = ufc_test.drop(axis=1, columns=ufc_test_cat.columns).drop(axis=1, columns=["id"])

In [None]:
rem_tf = RemoveLessUsefulCategoricalFeatures()
ufc_test_cat = rem_tf.fit_transform(ufc_test_cat)

In [None]:
ufc_test_cat[ufc_test_cat.select_dtypes(include=["bool"]).columns] = ufc_test_cat.select_dtypes(include=["bool"]).replace({True: "True", False: "False"})

In [None]:
ufc_test_cat = pd.DataFrame(cat_imputer.transform(ufc_test_cat), columns=ufc_test_cat.columns)

In [None]:
ufc_test_cat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 602 entries, 0 to 601
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title_bout    602 non-null    object
 1   weight_class  602 non-null    object
 2   B_Stance      602 non-null    object
 3   R_Stance      602 non-null    object
dtypes: object(4)
memory usage: 18.9+ KB


In [None]:
ufc_test_num = pd.DataFrame(num_imputer.transform(ufc_test_num), columns=ufc_test_num.columns)
ufc_test_num.head()

Unnamed: 0,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Height_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,0.0,0.0,0.49375,0.44875,0.475,0.1775,0.0,0.125,0.5625,0.0,88.9375,42.0625,68.8125,28.4375,131.625,80.3125,145.4375,98.75,5.25,2.5,2.0625,1.25,60.3125,19.375,49.8125,13.5,15.125,13.0,18.375,14.625,13.5,9.6875,0.625,0.3125,58.125,17.6875,44.1875,9.6875,17.9375,15.0625,15.1875,11.1875,12.875,9.3125,9.4375,7.5625,367.625,181.6875,900.0,15.0,0.0,0.0,1.0,2.0,3.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,165.1,115.0,0.0625,0.0,0.46125,0.26625,0.3225,0.375,0.0,0.0,0.0,0.0,158.625,73.375,141.0625,37.625,200.25,106.5,152.125,45.625,2.5625,0.6875,2.375,1.0,117.0625,40.3125,112.5625,20.5625,19.0625,13.5625,23.5625,13.1875,22.5,19.5,4.9375,3.875,147.8125,66.0625,130.8125,32.625,7.4375,5.375,6.8125,2.875,3.375,1.9375,3.4375,2.125,215.0,94.9375,900.0,15.0,0.0,3.0,0.0,3.0,4.0,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,154.94,152.4,115.0,35.0,27.0
1,0.0,0.25,0.473125,0.371875,0.0,0.20375,0.0,0.5,0.1875,0.0,54.3125,23.75,53.875,20.125,63.6875,32.25,109.875,70.375,0.0,0.0,7.8125,2.375,37.125,10.9375,37.75,11.375,10.4375,7.6875,8.0,3.625,6.75,5.125,8.125,5.125,42.6875,16.0625,47.25,14.5625,11.5,7.5625,3.375,2.3125,0.125,0.125,3.25,3.25,9.6875,390.375,731.75,13.0,0.0,0.0,1.0,1.0,1.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,180.34,170.0,0.251953,0.015625,0.610156,0.536279,0.441553,0.370762,0.073242,0.104492,0.033203,0.760742,64.634766,39.113281,96.371094,51.285156,119.251953,88.878906,146.732422,95.954102,5.15625,1.925781,5.234375,1.925781,40.663086,18.320312,58.008789,22.541016,10.712891,8.336914,20.206055,14.579102,13.258789,12.456055,18.15625,14.165039,39.666992,23.71875,67.904297,29.001953,23.037109,13.837891,21.053711,15.53418,1.930664,1.556641,7.413086,6.749023,222.675781,273.668945,895.705078,31.0,0.0,4.0,0.0,4.0,6.0,5.0,0.0,0.0,3.0,1.0,2.0,0.0,0.0,170.18,182.88,170.0,28.0,31.0
2,0.0,0.5,0.5,0.48,0.615,0.0,0.0,0.0,0.0,0.0,72.5,36.5,48.0,23.0,118.5,74.0,117.0,89.0,11.5,6.5,1.5,0.0,61.0,30.0,27.5,10.5,8.5,4.5,16.5,9.5,3.0,2.0,4.0,3.0,40.0,14.0,31.5,12.0,13.5,6.5,11.0,7.5,19.0,16.0,5.5,3.5,533.5,160.5,900.0,6.0,0.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,180.34,145.0,0.5,0.0,0.345,0.36,0.265,0.115,0.0,0.0,0.0,0.0,135.0,46.5,198.5,73.5,136.0,47.0,202.5,75.5,4.0,1.0,8.5,1.0,108.0,26.0,153.5,42.5,9.5,5.0,24.0,15.0,17.5,15.5,21.0,16.0,126.0,42.0,190.0,67.0,7.0,3.5,8.5,6.5,2.0,1.0,0.0,0.0,35.5,40.5,900.0,6.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,170.18,177.8,145.0,24.0,30.0
3,0.0,0.0,0.66,0.69,0.85,0.0,0.0,2.0,0.0,1.0,12.0,8.0,36.0,25.0,104.0,84.0,109.0,89.0,7.0,6.0,0.0,0.0,11.0,7.0,23.0,15.0,0.0,0.0,9.0,6.0,1.0,1.0,4.0,4.0,2.0,0.0,17.0,8.0,1.0,1.0,6.0,5.0,9.0,7.0,13.0,12.0,835.0,5.0,900.0,3.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,170.18,135.0,0.0,0.5,0.419,0.507,0.385,0.279,0.2,0.5,0.3,0.1,52.2,25.6,67.3,36.5,86.0,54.7,96.1,61.6,3.4,1.7,2.5,1.0,39.7,15.6,48.1,22.0,5.7,4.7,11.6,8.4,6.8,5.3,7.6,6.1,30.5,9.5,38.0,13.8,7.4,6.0,10.7,8.6,14.3,10.1,18.6,14.1,311.9,142.8,698.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,165.1,162.56,125.0,27.0,23.0
4,0.0,0.0,0.505312,0.439375,0.815937,0.067187,1.28125,0.640625,0.015625,0.046875,29.890625,14.53125,13.90625,5.171875,54.515625,39.078125,22.6875,13.53125,2.46875,2.21875,2.625,0.171875,24.25,11.265625,8.453125,0.875,2.328125,1.4375,2.40625,2.15625,3.3125,1.828125,3.046875,2.140625,18.984375,5.71875,9.984375,2.546875,3.203125,2.421875,3.15625,2.328125,7.703125,6.390625,0.765625,0.296875,219.75,1.0,499.65625,15.0,6.0,7.0,0.0,7.0,7.0,0.0,0.0,0.0,2.0,1.0,0.0,3.0,1.0,177.8,170.0,1.0,0.0,0.81,0.33,0.83,0.5,2.0,0.0,0.0,0.0,16.5,10.5,21.5,14.0,22.0,15.5,40.5,32.5,2.0,1.5,1.5,1.5,11.0,5.5,20.5,13.0,1.0,1.0,1.0,1.0,4.5,4.0,0.0,0.0,14.0,9.5,5.0,2.5,2.0,0.5,7.0,5.0,0.5,0.5,9.5,6.5,0.0,0.0,476.0,3.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,175.26,186.944,170.0,33.0,24.0


In [None]:
ufc_test_num = pd.DataFrame(num_scaler.transform(ufc_test_num), columns=ufc_test_num.columns)

In [None]:
ufc_test_prepped = pd.concat([ufc_test_num, ufc_test_cat], axis=1)
ufc_test_prepped.head()

Unnamed: 0,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,B_avg_opp_REV,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_TD_att,B_avg_TD_landed,B_avg_opp_TD_att,B_avg_opp_TD_landed,B_avg_HEAD_att,B_avg_HEAD_landed,B_avg_opp_HEAD_att,B_avg_opp_HEAD_landed,B_avg_BODY_att,B_avg_BODY_landed,B_avg_opp_BODY_att,B_avg_opp_BODY_landed,B_avg_LEG_att,B_avg_LEG_landed,B_avg_opp_LEG_att,B_avg_opp_LEG_landed,B_avg_DISTANCE_att,B_avg_DISTANCE_landed,B_avg_opp_DISTANCE_att,B_avg_opp_DISTANCE_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,B_avg_opp_CLINCH_att,B_avg_opp_CLINCH_landed,B_avg_GROUND_att,B_avg_GROUND_landed,B_avg_opp_GROUND_att,B_avg_opp_GROUND_landed,B_avg_CTRL_time(seconds),B_avg_opp_CTRL_time(seconds),B_total_time_fought(seconds),B_total_rounds_fought,B_total_title_bouts,B_current_win_streak,B_current_lose_streak,B_longest_win_streak,B_wins,B_losses,B_draw,B_win_by_Decision_Majority,B_win_by_Decision_Split,B_win_by_Decision_Unanimous,B_win_by_KO/TKO,B_win_by_Submission,B_win_by_TKO_Doctor_Stoppage,B_Height_cms,B_Weight_lbs,R_avg_KD,R_avg_opp_KD,R_avg_SIG_STR_pct,R_avg_opp_SIG_STR_pct,R_avg_TD_pct,R_avg_opp_TD_pct,R_avg_SUB_ATT,R_avg_opp_SUB_ATT,R_avg_REV,R_avg_opp_REV,R_avg_SIG_STR_att,R_avg_SIG_STR_landed,R_avg_opp_SIG_STR_att,R_avg_opp_SIG_STR_landed,R_avg_TOTAL_STR_att,R_avg_TOTAL_STR_landed,R_avg_opp_TOTAL_STR_att,R_avg_opp_TOTAL_STR_landed,R_avg_TD_att,R_avg_TD_landed,R_avg_opp_TD_att,R_avg_opp_TD_landed,R_avg_HEAD_att,R_avg_HEAD_landed,R_avg_opp_HEAD_att,R_avg_opp_HEAD_landed,R_avg_BODY_att,R_avg_BODY_landed,R_avg_opp_BODY_att,R_avg_opp_BODY_landed,R_avg_LEG_att,R_avg_LEG_landed,R_avg_opp_LEG_att,R_avg_opp_LEG_landed,R_avg_DISTANCE_att,R_avg_DISTANCE_landed,R_avg_opp_DISTANCE_att,R_avg_opp_DISTANCE_landed,R_avg_CLINCH_att,R_avg_CLINCH_landed,R_avg_opp_CLINCH_att,R_avg_opp_CLINCH_landed,R_avg_GROUND_att,R_avg_GROUND_landed,R_avg_opp_GROUND_att,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_total_rounds_fought,R_total_title_bouts,R_current_win_streak,R_current_lose_streak,R_longest_win_streak,R_wins,R_losses,R_draw,R_win_by_Decision_Majority,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,title_bout,weight_class,B_Stance,R_Stance
0,0.0,0.0,0.49375,0.44875,0.475,0.1775,0.0,0.017857,0.1875,0.0,0.232054,0.254924,0.171602,0.14078,0.313075,0.349185,0.359994,0.425647,0.276316,0.22792,0.107426,0.108696,0.18369,0.158811,0.148694,0.107143,0.213028,0.203125,0.30123,0.291237,0.214864,0.187611,0.009058,0.00601,0.163852,0.111946,0.122403,0.060928,0.222826,0.239087,0.144643,0.133185,0.146307,0.19401,0.066933,0.106514,0.360348,0.21759,0.598125,0.168539,0.0,0.0,0.2,0.125,0.130435,0.125,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.217391,0.0,0.015625,0.0,0.46125,0.26625,0.3225,0.375,0.0,0.0,0.0,0.0,0.37829,0.349034,0.31071,0.215,0.45476,0.429223,0.329989,0.225866,0.085417,0.0625,0.148438,0.090909,0.317946,0.235718,0.281406,0.147932,0.268486,0.251157,0.314167,0.304211,0.314376,0.312743,0.063803,0.069912,0.373162,0.347383,0.297301,0.193047,0.090701,0.088115,0.083079,0.056373,0.023936,0.02964,0.033053,0.03125,0.215701,0.116918,0.598125,0.170455,0.0,0.1875,0.0,0.1875,0.173913,0.071429,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.043478,0.0,0.0,0.607143,0.285714,False,WomenStrawweight,Orthodox,Orthodox
1,0.0,0.087887,0.473125,0.371875,0.0,0.20375,0.0,0.071429,0.0625,0.0,0.141711,0.143939,0.134352,0.099629,0.151483,0.140217,0.271968,0.303341,0.0,0.0,0.406916,0.206522,0.113069,0.089652,0.112687,0.090278,0.147007,0.120117,0.131148,0.072187,0.107432,0.099252,0.117754,0.098558,0.120334,0.101661,0.130886,0.091588,0.142857,0.12004,0.032143,0.02753,0.00142,0.002604,0.02305,0.045775,0.009496,0.467515,0.485432,0.146067,0.0,0.0,0.2,0.0625,0.043478,0.25,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.478261,0.083969,0.062988,0.005208,0.610156,0.536279,0.441553,0.370762,0.012207,0.013062,0.011068,0.190186,0.154141,0.186056,0.212271,0.293058,0.270817,0.358205,0.318292,0.47502,0.171875,0.175071,0.327148,0.175071,0.110442,0.107124,0.145022,0.162166,0.150886,0.154387,0.269414,0.336313,0.185255,0.199771,0.234616,0.255562,0.100142,0.124723,0.154328,0.171609,0.28094,0.226851,0.256753,0.304592,0.013693,0.023814,0.07128,0.09925,0.223402,0.337031,0.595248,0.352273,0.0,0.25,0.0,0.25,0.26087,0.357143,0.0,0.0,0.617675,0.1,0.181818,0.0,0.0,0.304348,0.5,0.23913,0.357143,0.428571,False,Welterweight,Orthodox,Orthodox
2,0.0,0.175773,0.5,0.48,0.615,0.0,0.0,0.0,0.0,0.0,0.189165,0.221212,0.119701,0.113861,0.281856,0.321739,0.289604,0.383621,0.605263,0.592593,0.078128,0.0,0.185784,0.245902,0.08209,0.083333,0.119718,0.070312,0.270492,0.18918,0.047748,0.038733,0.057971,0.057692,0.112758,0.088608,0.087258,0.075472,0.167702,0.103175,0.104762,0.089286,0.215909,0.333333,0.039007,0.049296,0.522939,0.192216,0.598125,0.067416,0.0,0.125,0.0,0.125,0.086957,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.478261,0.045802,0.125,0.0,0.345,0.36,0.265,0.115,0.0,0.0,0.0,0.0,0.321949,0.221194,0.437225,0.42,0.308851,0.189422,0.439262,0.373762,0.133333,0.090909,0.53125,0.090909,0.293332,0.152029,0.38375,0.305755,0.133803,0.092593,0.32,0.346022,0.244515,0.24859,0.271363,0.288668,0.318095,0.220853,0.431818,0.39645,0.085366,0.057377,0.103659,0.127451,0.014184,0.015298,0.0,0.0,0.035616,0.049877,0.598125,0.068182,0.0,0.0,0.25,0.0625,0.043478,0.071429,0.0,0.0,0.205892,0.0,0.0,0.0,0.0,0.304348,0.416667,0.130435,0.214286,0.392857,False,Featherweight,Orthodox,Orthodox
3,0.0,0.0,0.66,0.69,0.85,0.0,0.0,0.285714,0.0,0.333333,0.03131,0.048485,0.089776,0.123762,0.247368,0.365217,0.269802,0.383621,0.368421,0.547009,0.0,0.0,0.033502,0.057377,0.068657,0.119048,0.0,0.0,0.147541,0.119482,0.015916,0.019366,0.057971,0.076923,0.005638,0.0,0.047091,0.050314,0.012422,0.015873,0.057143,0.059524,0.102273,0.145833,0.092199,0.169014,0.818471,0.005988,0.598125,0.033708,0.0,0.0625,0.0,0.0625,0.043478,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.304348,0.030534,0.0,0.166667,0.419,0.507,0.385,0.279,0.033333,0.0625,0.1,0.025,0.124487,0.121776,0.148238,0.208571,0.195303,0.220455,0.20846,0.30495,0.113333,0.154545,0.15625,0.090909,0.107826,0.091217,0.12025,0.158273,0.080282,0.087037,0.154667,0.193772,0.095011,0.085002,0.098208,0.110055,0.076999,0.049955,0.086364,0.081657,0.090244,0.098361,0.130488,0.168627,0.101418,0.154512,0.178846,0.207353,0.312917,0.175862,0.463161,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.217391,0.166667,0.043478,0.321429,0.142857,False,Bantamweight,Orthodox,Orthodox
4,0.0,0.0,0.505312,0.439375,0.815937,0.067187,0.160156,0.091518,0.005208,0.015625,0.07799,0.088068,0.034679,0.025603,0.129667,0.169905,0.056157,0.058324,0.129934,0.202279,0.136724,0.014946,0.073857,0.092341,0.025233,0.006944,0.03279,0.022461,0.039447,0.042939,0.052721,0.035404,0.044158,0.041166,0.053516,0.036195,0.027658,0.016018,0.03979,0.038442,0.03006,0.027716,0.087536,0.133138,0.00543,0.004181,0.2154,0.001198,0.329977,0.168539,0.375,0.4375,0.0,0.4375,0.304348,0.0,0.0,0.0,0.4,0.1,0.0,0.214286,0.5,0.434783,0.083969,0.25,0.0,0.81,0.33,0.83,0.5,0.333333,0.0,0.0,0.0,0.039349,0.049947,0.047357,0.08,0.049961,0.062469,0.087852,0.160891,0.066667,0.136364,0.09375,0.136364,0.029876,0.03216,0.05125,0.093525,0.014085,0.018519,0.013333,0.023068,0.062875,0.064152,0.0,0.0,0.035344,0.049955,0.011364,0.014793,0.02439,0.008197,0.085366,0.098039,0.003546,0.007649,0.091346,0.095588,0.0,0.0,0.314133,0.034091,0.0625,0.0625,0.0,0.0625,0.043478,0.071429,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.391304,0.566667,0.23913,0.535714,0.178571,True,Welterweight,Orthodox,Orthodox


In [None]:
y_test_preds = cb.predict(ufc_test_prepped)

In [None]:
submissions = ufc_test[["id"]]
submissions["Winner"] = y_test_preds.reshape(-1)
submissions.to_csv("ufc_classification.csv", index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submissions["Winner"] = y_test_preds.reshape(-1)
