---
## Crawling UFC Data 

In [1]:
from src.data_crawler.crawler import StatsCrawler
from src.data_crawler.crawler import FightsCrawler

### Crawling Stats of all UFC Fighters

In [2]:
stats = StatsCrawler()

In [3]:
stats.crawl_stats()

In [4]:
stats_df = stats.return_stats()
stats_df.to_csv("src/data/stats_fighters.csv")

In [5]:
stats_df

Unnamed: 0,Name,Record,Wins,Losses,Draws,Height_cm,Weight_lbs,Reach_inch,Stance,Debut,SLpM,StrAcc,SApM,StrDef,TD_Avg,TD_Acc,TD_Def,Sub_Avg
0,Brian Johnston,Record: 5-5-0,5.0,5.0,0.0,190.502317,230.0,,Orthodox,,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
1,Shamil Gamzatov,Record: 14-0-0,14.0,0.0,0.0,187.962286,205.0,76.0,Orthodox,1990.0,4.27,0.46,2.13,0.58,0.00,0.00,0.50,0.0
2,Mario Bautista,Record: 8-2-0,8.0,2.0,0.0,175.262131,135.0,69.0,Switch,1993.0,5.71,0.45,4.78,0.54,0.48,0.33,0.70,0.0
3,Ludovit Klein,Record: 17-2-0,17.0,2.0,0.0,170.182069,155.0,72.0,Southpaw,1995.0,7.11,0.69,3.16,0.42,0.00,0.00,0.00,0.0
4,Pat Benson,Record: 3-5-3,3.0,5.0,3.0,175.262131,155.0,,,,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2644,Kamuela Kirk,Record: 9-3-0,9.0,3.0,0.0,177.802162,145.0,75.0,Switch,1994.0,5.31,0.57,12.25,0.21,2.89,0.40,1.00,0.0
2645,Johny Hendricks,Record: 18-8-0,18.0,8.0,0.0,175.262131,185.0,69.0,Southpaw,1983.0,3.49,0.45,3.99,0.53,3.83,0.46,0.63,0.3
2646,Timur Valiev,Record: 17-2-0 (1 NC),17.0,2.0,0.0,167.642039,145.0,67.0,Orthodox,1990.0,5.50,0.71,1.55,0.56,2.73,0.66,0.50,0.0
2647,Anthony Hamilton,Record: 15-9-0,15.0,9.0,0.0,195.582378,260.0,76.0,Orthodox,1980.0,3.91,0.65,3.02,0.45,2.62,0.42,0.78,0.0


### Crawling Fights and corresponding results of all UFC Fighters

In [6]:
fights = FightsCrawler()

In [7]:
fights.crawl_fights()

In [8]:
fights_df = fights.return_fights()
fights_df.to_csv("src/data/fight_results.csv")

In [9]:
fights_df

Unnamed: 0,Fighter,Opponent,Result,Fighters_Win,Opponents_Win
0,Brian Johnston,Dan Bobish,loss,0,1
1,Brian Johnston,Ken Shamrock,loss,0,1
2,Brian Johnston,Mark Coleman,loss,0,1
3,Brian Johnston,Reza Nasri,win,1,0
4,Brian Johnston,Don Frye,loss,0,1
...,...,...,...,...,...
12045,Anthony Hamilton,Daniel Omielanczuk,win,1,0
12046,Anthony Hamilton,Todd Duffee,loss,0,1
12047,Anthony Hamilton,Ruan Potts,win,1,0
12048,Anthony Hamilton,Aleksei Oleinik,loss,0,1


### Merging and preparing data

In [1]:
import pandas as pd

In [2]:
stats_df = pd.read_csv("src/data/stats_fighters.csv", index_col=0)
fights_df = pd.read_csv("src/data/fight_results.csv", index_col=0)

In [3]:
stats_df_copy = stats_df.copy()
fights_df_copy = fights_df.copy()

stats_df_copy["Fighter"] = stats_df_copy.Name

In [4]:
pd.set_option("display.max_columns", None)

In [5]:
merged_df = pd.merge(fights_df_copy, stats_df_copy, on=["Fighter"])

In [6]:
stats_df_copy.drop("Fighter", axis=1, inplace=True)
stats_df_copy.rename(columns={"Name": "Opponent"}, inplace=True)
merged_df.drop("Name", axis=1, inplace=True)

In [7]:
final_df = pd.merge(merged_df, stats_df_copy, on=["Opponent"])
final_df.drop(["Record_x", "Record_y"], axis=1, inplace=True)
final_df.dropna(axis=0, inplace=True)

In [8]:
def convert_stance(stance):

    num_stance = []
    for i in final_df.Stance_x:
        if i == "Southpaw":
            num_stance.append(0)
        elif i == "Orthodox":
            num_stance.append(1)
        elif i == "Switch":
            num_stance.append(2)
        elif i == "Open Stance":
            num_stance.append(3)
        elif i == "Sideways":
            num_stance.append(4)

    return num_stance

In [9]:
final_df["Stance_x_num"] = convert_stance(final_df.Stance_x)
final_df["Stance_y_num"] = convert_stance(final_df.Stance_y)

In [10]:
X = final_df[
    [
        "Wins_x",
        "Losses_x",
        "Draws_x",
        "Height_cm_x",
        "Weight_lbs_x",
        "Reach_inch_x",
        "Debut_x",
        "SLpM_x",
        "StrAcc_x",
        "SApM_x",
        "StrDef_x",
        "TD_Avg_x",
        "TD_Acc_x",
        "TD_Def_x",
        "Sub_Avg_x",
        "Stance_x_num",
        "Wins_y",
        "Losses_y",
        "Draws_y",
        "Height_cm_y",
        "Weight_lbs_y",
        "Reach_inch_y",
        "Debut_y",
        "SLpM_y",
        "StrAcc_y",
        "SApM_y",
        "StrDef_y",
        "TD_Avg_y",
        "TD_Acc_y",
        "TD_Def_y",
        "Sub_Avg_y",
        "Stance_y_num",
    ]
]

y = final_df[["Fighters_Win"]]

## Creating model and testing its accuracy

In [11]:
from src.model.mlp_classifier import *
from sklearn.metrics import classification_report

In [12]:
X_stand = standardization(X)

In [13]:
X_train, X_test, y_train, y_test = splitting_data(X_stand, y, testing_size=0.2, random_state=1)

In [14]:
prediction = return_prediction(X_train, y_train, X_test, hidden_layer_sizes=(16, 10, 8))

In [15]:
print(classification_report(y_test, prediction))

              precision    recall  f1-score   support

          -1       0.00      0.00      0.00        12
           0       0.67      0.63      0.65       585
           1       0.65      0.70      0.67       591

    accuracy                           0.66      1188
   macro avg       0.44      0.44      0.44      1188
weighted avg       0.65      0.66      0.66      1188



  _warn_prf(average, modifier, msg_start, len(result))


## Predict upcoming Event UFC 261 Main Event

In [16]:
# Predicting the fight between Leon Edwards and Nate Diaz
who_wins_the_upcoming_fight(
    favorite_name="Leon Edwards", 
    underdog_name="Nate Diaz",
    stats_data=X_stand,
    outcome=y,
    names_data=final_df
)

The favorite 'Leon Edwards' will win the fight!


In [17]:
# Some fights of UFC 261
favorites_list = ["Uriah Hall", "Kamaru Usman"]  
underdogs_list = ["Chris Weidman", "Jorge Masvidal"]

In [18]:
def ufc_event(favorites_list, underdogs_list):
    
    assert len(favorites_list) == len(underdogs_list)
    for f, u in zip(favorites_list, underdogs_list):
        who_wins_the_upcoming_fight(
            favorite_name=f,
            underdog_name=u,
            stats_data=X_stand,
            outcome=y,
            names_data=final_df,
        )

In [19]:
ufc_event(favorites_list, underdogs_list)

The underdog 'Chris Weidman' will win the fight!
The favorite 'Kamaru Usman' will win the fight!


In [20]:
# If one fighter is not included in the data set because of nan values,
# the following error occurs => dropped and not useable 
who_wins_the_upcoming_fight(
    favorite_name="Weili Zhang",
    underdog_name="Rose Namajunas",
    stats_data=X_stand,
    outcome=y,
    names_data=final_df,
)

IndexError: Weili Zhang was dropped because of nan values!