---
## Crawling UFC Data 

In [1]:
from src.data_crawler.crawler import StatsCrawler
from src.data_crawler.crawler import FightsCrawler

### Crawling Stats of all UFC Fighters

In [2]:
stats = StatsCrawler()

In [3]:
stats.crawl_stats()

In [4]:
stats_df = stats.return_stats()
stats_df.to_csv("src/data/stats_fighters.csv")

In [5]:
stats_df

Unnamed: 0,Name,Record,Wins,Losses,Draws,Height_cm,Weight_lbs,Reach_inch,Stance,Debut,SLpM,StrAcc,SApM,StrDef,TD_Avg,TD_Acc,TD_Def,Sub_Avg
0,Maki Pitolo,Record: 13-8-0,13.0,8.0,0.0,177.802162,185.0,75.0,Orthodox,1990.0,4.20,0.44,3.71,0.56,2.42,0.52,0.55,0.5
1,Marc-Andre Barriault,Record: 12-4-0 (1 NC),12.0,4.0,0.0,185.422255,185.0,74.0,Orthodox,1990.0,5.36,0.48,4.79,0.55,0.43,0.25,0.72,0.0
2,Dan Lauzon,Record: 17-6-0,17.0,6.0,0.0,177.802162,155.0,,Orthodox,1988.0,1.34,0.25,4.09,0.61,1.63,0.33,0.80,2.7
3,Junior Dos Santos,Record: 21-9-0,21.0,9.0,0.0,193.042347,238.0,77.0,Orthodox,1984.0,4.49,0.47,3.33,0.56,0.30,0.50,0.81,0.1
4,Sanghoon Yoo,Record: 5-1-0,5.0,1.0,0.0,182.882224,155.0,74.0,Orthodox,1990.0,5.00,0.66,7.27,0.27,1.00,1.00,0.75,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2644,Dino Pezao,Record: 1-3-0,1.0,3.0,0.0,,205.0,,Orthodox,,0.66,0.30,3.28,0.31,0.00,0.00,0.00,0.0
2645,Drew Dimanlig,Record: 3-4-0,3.0,4.0,0.0,,185.0,,,,3.70,0.73,3.17,0.20,0.00,0.00,0.00,0.0
2646,Pablo Villaseca,Record: 12-3-0,12.0,3.0,0.0,170.182069,155.0,,,1987.0,2.79,0.42,3.25,0.65,2.50,0.50,0.33,0.0
2647,Noe Hernandez,Record: 5-5-0,5.0,5.0,0.0,,,,Orthodox,,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0


### Crawling Fights and corresponding results of all UFC Fighters

In [6]:
fights = FightsCrawler()

In [8]:
fights.crawl_fights()

In [9]:
fights_df = fights.return_fights()
fights_df.to_csv("src/data/fight_results.csv")

In [10]:
fights_df

Unnamed: 0,Fighter,Opponent,Result,Fighters_Win,Opponents_Win
0,Maki Pitolo,Julian Marquez,loss,0,1
1,Maki Pitolo,Impa Kasanganay,loss,0,1
2,Maki Pitolo,Darren Stewart,loss,0,1
3,Maki Pitolo,Charles Byrd,win,1,0
4,Maki Pitolo,Callan Potter,loss,0,1
...,...,...,...,...,...
15305,Jason Lambert,Renato Sobral,win,1,0
15306,Jason Lambert,Rashad Evans,loss,0,1
15307,Jason Lambert,Branden Lee Hinkle,win,1,0
15308,Jason Lambert,Terry Martin,win,1,0


### Merging Data

In [11]:
import pandas as pd

In [12]:
stats_df = pd.read_csv("src/data/stats_fighters.csv", index_col=0)
fights_df = pd.read_csv("src/data/fight_results.csv", index_col=0)

In [13]:
stats_df_copy = stats_df.copy()
fights_df_copy = fights_df.copy()

stats_df_copy["Fighter"] = stats_df_copy.Name

In [14]:
pd.set_option("display.max_columns", None)

In [15]:
merged_df = pd.merge(fights_df_copy, stats_df_copy, on=["Fighter"])

In [16]:
stats_df_copy.drop("Fighter", axis=1, inplace=True)
stats_df_copy.rename(columns={"Name": "Opponent"}, inplace=True)
merged_df.drop("Name", axis=1, inplace=True)

In [17]:
final_df = pd.merge(merged_df, stats_df_copy, on=["Opponent"])
final_df.drop(["Record_x", "Record_y"], axis=1, inplace=True)
final_df.dropna(axis=0, inplace=True)

In [18]:
def convert_stance(stance):

    num_stance = []
    for i in final_df.Stance_x:
        if i == "Southpaw":
            num_stance.append(0)
        elif i == "Orthodox":
            num_stance.append(1)
        elif i == "Switch":
            num_stance.append(2)
        elif i == "Open Stance":
            num_stance.append(3)
        elif i == "Sideways":
            num_stance.append(4)

    return num_stance

In [19]:
final_df["Stance_x_num"] = convert_stance(final_df.Stance_x)
final_df["Stance_y_num"] = convert_stance(final_df.Stance_y)

In [20]:
X = final_df[
    [
        "Wins_x",
        "Losses_x",
        "Draws_x",
        "Height_cm_x",
        "Weight_lbs_x",
        "Reach_inch_x",
        "Debut_x",
        "SLpM_x",
        "StrAcc_x",
        "SApM_x",
        "StrDef_x",
        "TD_Avg_x",
        "TD_Acc_x",
        "TD_Def_x",
        "Sub_Avg_x",
        "Stance_x_num",
        "Wins_y",
        "Losses_y",
        "Draws_y",
        "Height_cm_y",
        "Weight_lbs_y",
        "Reach_inch_y",
        "Debut_y",
        "SLpM_y",
        "StrAcc_y",
        "SApM_y",
        "StrDef_y",
        "TD_Avg_y",
        "TD_Acc_y",
        "TD_Def_y",
        "Sub_Avg_y",
        "Stance_y_num",
    ]
]

y = final_df[["Fighters_Win"]]

## Creating model and testing its accuracy

In [26]:
from src.model.mlp_classifier import *
from sklearn.metrics import classification_report

In [22]:
X_stand = standardization(X)

In [23]:
X_train, X_test, y_train, y_test = splitting_data(X_stand, y)

In [24]:
prediction = return_prediction(X_train, y_train, X_test)

In [27]:
print(classification_report(y_test, prediction))

              precision    recall  f1-score   support

          -1       0.00      0.00      0.00        16
           0       0.67      0.71      0.69       750
           1       0.69      0.67      0.68       741

    accuracy                           0.68      1507
   macro avg       0.45      0.46      0.46      1507
weighted avg       0.67      0.68      0.68      1507



  _warn_prf(average, modifier, msg_start, len(result))


## Predict upcoming Event UFC 261 Main Event

In [28]:
clf_all = prepare_model(X_stand, y)

In [29]:
def choose_fighters(final_df):

    #left = input("Favorite's full name (e.g. Israel Adesanya): ")
    #right = input("Underdogs's full name: ")
    # Uncomment left and right to use input function!

    left = "Kamaru Usman"
    right = "Jorge Masvidal"

    favorite = final_df[final_df["Fighter"] == left][
        [
            "Wins_x",
            "Losses_x",
            "Draws_x",
            "Height_cm_x",
            "Weight_lbs_x",
            "Reach_inch_x",
            "Debut_x",
            "SLpM_x",
            "StrAcc_x",
            "SApM_x",
            "StrDef_x",
            "TD_Avg_x",
            "TD_Acc_x",
            "TD_Def_x",
            "Sub_Avg_x",
            "Stance_x_num",
        ]
    ].iloc[0, :]
    underdog = final_df[final_df["Opponent"] == right][
        [
            "Wins_y",
            "Losses_y",
            "Draws_y",
            "Height_cm_y",
            "Weight_lbs_y",
            "Reach_inch_y",
            "Debut_y",
            "SLpM_y",
            "StrAcc_y",
            "SApM_y",
            "StrDef_y",
            "TD_Avg_y",
            "TD_Acc_y",
            "TD_Def_y",
            "Sub_Avg_y",
            "Stance_y_num",
        ]
    ].iloc[0, :]
    up_fighter_stats = pd.DataFrame(pd.concat([favorite, underdog])).T
    return up_fighter_stats

In [31]:
up_fighter_stats = choose_fighters(final_df) #Usman vs. Masvidal

Favorite's full name (e.g. Israel Adesanya):  Kamaru Usman
Underdogs's full name:  Jorge Masvidal


In [32]:
fight_prediction = clf_all.predict(up_fighter_stats)

if fight_prediction[0] == 1:
    print("Favorite will win!")
elif fight_prediction[0] == 0:
    print("Underdog will win!")
else:
    print("Draw Decision!")

Favorite will win!
