# BT4014 Project

## 0. Imports

In [1]:
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt

## 1. Loading the dataset

In [2]:
# Using the basic stats as outlined in the dataset description (except urls)
masterMLpublic = pd.read_csv("masterMLpublic.csv",
                            usecols = [
                                "date", "result", "fighter", "opponent", "division",
                                "stance", "dob", "method", "total_comp_time", "round", 
                                "time", "referee", "time_format", "reach", "height",
                                "age", "knockdowns", "sub_attempts", "reversals", "control",
                                "takedowns_landed", "takedowns_attempts",
                                "sig_strikes_landed", "sig_strikes_attempts",
                                "total_strikes_landed", "total_strikes_attempts",
                                "head_strikes_landed", "head_strikes_attempts",
                                "body_strikes_landed", "body_strikes_attempts",
                                "leg_strikes_landed", "leg_strikes_attempts",
                                "distance_strikes_landed", "distance_strikes_attempts",
                                "clinch_strikes_landed", "clinch_strikes_attempts",
                                "ground_strikes_landed", "ground_strikes_attempts",
                                "KO_losses", "days_since_last_comp", "lose_streak",
                                "win_streak", "win_loss_ratio", "total_comp_time", "stamina",
                                "num_fights", "trueskill", "elo"
                            ])
# Convert the date column from object into datetime
masterMLpublic["date"] = pd.to_datetime(masterMLpublic["date"]).dt.date
masterMLpublic

Unnamed: 0,date,result,fighter,opponent,stance,dob,division,method,round,time,...,ground_strikes_attempts,num_fights,win_streak,lose_streak,win_loss_ratio,KO_losses,total_comp_time,stamina,elo,trueskill
0,1994-03-11,0,sean daugherty,scott morris,,1975-12-04,Open Weight,SUB,1,0:20,...,0,1.0,0,1,0.000000,0,20.0,0.000000,1147.756618,20.794729
1,1994-03-11,1,scott morris,sean daugherty,Orthodox,,Open Weight,SUB,1,0:20,...,0,1.0,1,0,1.000000,0,20.0,0.000000,1252.243382,29.205271
2,1994-03-11,0,ray wizard,patrick smith,,,Open Weight,SUB,1,0:58,...,0,1.0,0,1,0.000000,0,58.0,0.000000,1147.164966,20.794729
3,1994-03-11,1,patrick smith,ray wizard,Orthodox,1963-08-28,Open Weight,SUB,1,0:58,...,0,1.0,1,0,1.000000,0,58.0,0.000000,1249.442138,29.205271
4,1994-03-11,0,david levicki,johnny rhodes,,,Open Weight,KO/TKO,1,12:13,...,1,1.0,0,1,0.000000,1,733.0,0.000000,1141.838344,20.794729
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14125,2023-04-15,1,azamat murzakanov,dustin jacoby,Southpaw,1989-04-12,Light Heavyweight,U-DEC,3,5:00,...,0,3.0,3,0,1.000000,0,2222.0,1.000000,1341.922237,35.760448
14126,2023-04-15,0,billy quarantillo,edson barboza,Orthodox,1988-12-08,Featherweight,KO/TKO,1,2:37,...,0,8.0,0,1,0.625000,1,5352.0,0.600000,1264.744398,31.425332
14127,2023-04-15,1,edson barboza,billy quarantillo,Orthodox,1986-01-21,Featherweight,KO/TKO,1,2:37,...,1,28.0,1,0,0.607143,4,18947.0,0.631579,1350.043973,34.795952
14128,2023-04-15,0,arnold allen,max holloway,Southpaw,1994-01-22,Featherweight,U-DEC,5,5:00,...,0,11.0,0,1,0.909091,0,8823.0,0.888889,1489.695647,39.967508


## 2. Exploring the dataset

In [3]:
masterMLpublic.dropna(subset=["age", "height"], inplace=True)

In [4]:
masterMLpublic.isna().sum()

date                            0
result                          0
fighter                         0
opponent                        0
stance                         68
dob                             0
division                        0
method                          0
round                           0
time                            0
referee                        64
time_format                     0
reach                        1058
age                             0
height                          0
knockdowns                      0
days_since_last_comp            0
sub_attempts                    0
reversals                       0
control                         0
takedowns_landed                0
takedowns_attempts              0
sig_strikes_landed              0
sig_strikes_attempts            0
total_strikes_landed            0
total_strikes_attempts          0
head_strikes_landed             0
head_strikes_attempts           0
body_strikes_landed             0
body_strikes_a

In [5]:
# masterMLpublic.iloc[0]

In [6]:
# masterMLpublic.to_csv("new_masterMLpublic.csv")

In [7]:
# for col in masterMLpublic.columns:
#     print(col)

In [8]:
# masterdataframe = pd.read_csv("masterdataframe.csv")
# masterdataframe

In [9]:
# for col in masterdataframe.columns:
#     print(col)

In [10]:
# fix days_since_last_comp variable
specific_value = 216.1080438291005
masterMLpublic.loc[masterMLpublic["days_since_last_comp"] == specific_value, "days_since_last_comp"] = masterMLpublic["age"]

In [11]:
# removing draws because nobody wins
masterMLpublic = masterMLpublic[masterMLpublic["method"] != "DRAW"]
# removing dqs because they are not standard wins
masterMLpublic = masterMLpublic[masterMLpublic["method"] != "DQ"]

In [12]:
# masterMLpublic.iloc[0]["days_since_last_comp"]

In [13]:
masterMLpublic["method"].value_counts()

U-DEC     4908
KO/TKO    4545
SUB       2722
S-DEC     1317
M-DEC      113
Name: method, dtype: int64

In [14]:
masterMLpublic["division"].value_counts()

Lightweight              2384
Welterweight             2353
Middleweight             1844
Featherweight            1306
Heavyweight              1244
Light Heavyweight        1235
Bantamweight             1162
Flyweight                 574
Women's Strawweight       510
Women's Flyweight         376
Women's Bantamweight      358
Catch Weight              112
Open Weight                93
Women's Featherweight      52
Super Heavyweight           2
Name: division, dtype: int64

## 3. Filtering out the lightweight division for analysis

In [15]:
lightweight = masterMLpublic[masterMLpublic["division"] == "Lightweight"]
lightweight

Unnamed: 0,date,result,fighter,opponent,stance,dob,division,method,round,time,...,ground_strikes_attempts,num_fights,win_streak,lose_streak,win_loss_ratio,KO_losses,total_comp_time,stamina,elo,trueskill
199,1997-02-07,1,jerry bohlander,rainy martinez,Orthodox,1974-02-12,Lightweight,SUB,1,1:24,...,0,4.0,2,0,0.750000,1,1858.0,0.000000,1292.341803,29.965180
200,1997-02-07,0,wallid ismail,yoshiki takahashi,Orthodox,1968-02-23,Lightweight,U-DEC,2,3:00,...,0,1.0,0,1,0.000000,0,900.0,0.000000,1147.756618,20.794729
201,1997-02-07,1,yoshiki takahashi,wallid ismail,Southpaw,1969-03-13,Lightweight,U-DEC,2,3:00,...,10,1.0,1,0,1.000000,0,900.0,0.000000,1252.243382,29.205271
207,1997-02-07,1,jerry bohlander,nick sanzo,Orthodox,1974-02-12,Lightweight,SUB,1,0:39,...,0,5.0,3,0,0.800000,1,1897.0,0.000000,1329.459875,31.301881
213,1997-05-30,1,tito ortiz,wes albritton,Orthodox,1975-01-23,Lightweight,KO/TKO,1,0:31,...,23,1.0,1,0,1.000000,0,31.0,0.000000,1263.165486,29.205271
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14055,2023-03-18,1,justin gaethje,rafael fiziev,Orthodox,1988-11-14,Lightweight,M-DEC,3,5:00,...,0,11.0,1,0,0.636364,2,6670.0,0.600000,1491.524082,38.809578
14108,2023-04-15,0,lando vannata,daniel zellhuber,Orthodox,1992-03-14,Lightweight,U-DEC,3,5:00,...,2,13.0,0,2,0.307692,0,9089.0,0.222222,1148.578353,26.265970
14109,2023-04-15,1,daniel zellhuber,lando vannata,Switch,1999-07-07,Lightweight,U-DEC,3,5:00,...,51,2.0,1,0,0.500000,0,1800.0,0.500000,1200.212327,26.476804
14118,2023-04-15,0,clay guida,rafa garcia,Orthodox,1981-12-08,Lightweight,U-DEC,3,5:00,...,0,35.0,0,1,0.514286,2,24276.0,0.521739,1178.957220,31.316986


In [16]:
average_reach_lightweight = lightweight["reach"].mean()
lightweight["reach"].fillna(average_reach_lightweight, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lightweight["reach"].fillna(average_reach_lightweight, inplace=True)


In [17]:
lightweight["fighter"].nunique()

493

In [18]:
# date_threshold = datetime.date(2021, 1, 1)
# early_lightweight = lightweight[lightweight["date"] < date_threshold]
# late_lightweight = lightweight[lightweight["date"] >= date_threshold]

In [19]:
# early_lightweight

In [20]:
# late_lightweight

In [21]:
# for index, row in lightweight.iterrows():
#     print(row["fighter"], row["opponent"])

In [22]:
# lightweight.iloc[0]

In [23]:
class ModifiedLinUCB:
    def __init__(self, num_arms, num_features, alpha=0.1):
        self.num_arms = num_arms
        self.num_features = num_features
        self.alpha = alpha
        self.A = [np.identity(num_features) for _ in range(num_arms)]
        self.b = [np.zeros((num_features, 1)) for _ in range(num_arms)]
        
    def recommend(self, context, available_arms):
        chosen_arm = None
        max_ucb = float('-inf')

        for arm in available_arms:
            x = context.reshape((self.num_features, 1))
            A_inv = np.linalg.inv(self.A[arm])
            theta = np.dot(A_inv, self.b[arm])
            ucb = np.dot(theta.T, x) + self.alpha * np.sqrt(np.dot(x.T, np.dot(A_inv, x)))

            if ucb > max_ucb:
                max_ucb = ucb
                chosen_arm = arm

        return chosen_arm
        
    def update(self, arm, context, reward):
        # Ideally should use new context
        x = context.reshape((self.num_features, 1))
        self.A[arm] += np.dot(x, x.T)
        self.b[arm] += reward * x

In [24]:
chosen_features = [
    # "date", "result", "fighter", "opponent", "division",
    # "stance", 
    # "dob", "method", 
    "total_comp_time", 
    # "round", "time", "referee", "time_format", 
    "reach", "height", "age", 
    "knockdowns", "sub_attempts", "reversals", "control",
    "takedowns_landed", "takedowns_attempts",
    "sig_strikes_landed", "sig_strikes_attempts",
    "total_strikes_landed", "total_strikes_attempts",
    "head_strikes_landed", "head_strikes_attempts",
    "body_strikes_landed", "body_strikes_attempts",
    "leg_strikes_landed", "leg_strikes_attempts",
    "distance_strikes_landed", "distance_strikes_attempts",
    "clinch_strikes_landed", "clinch_strikes_attempts",
    "ground_strikes_landed", "ground_strikes_attempts",
    "KO_losses", "days_since_last_comp", "lose_streak",
    "win_streak", "win_loss_ratio", 
    # "total_comp_time", 
    "stamina", "num_fights", "trueskill", "elo"
]

In [33]:
num_arms = lightweight["fighter"].nunique()
num_features = len(chosen_features) * 2
bandit = ModifiedLinUCB(num_arms, num_features)

num_lightweight_fights = len(lightweight.index) // 2

fighter_list = []

total_reward = 0

for i in range(num_lightweight_fights):
    i_fighter1 = i * 2
    i_fighter2 = i_fighter1 + 1
    
    fighter1_values = lightweight.iloc[i_fighter1]
    fighter1_name = fighter1_values["fighter"]
    fighter1_stats = fighter1_values[chosen_features].tolist()

    fighter2_values = lightweight.iloc[i_fighter2]
    fighter2_name = fighter2_values["fighter"]
    fighter2_stats = fighter2_values[chosen_features].tolist()
    
    context = fighter1_stats + fighter2_stats
    context = np.array(context)

    if fighter1_name not in fighter_list:
        fighter_list.append(fighter1_name)
    if fighter2_name not in fighter_list:
        fighter_list.append(fighter2_name)
        
    arm1 = fighter_list.index(fighter1_name)
    arm2 = fighter_list.index(fighter2_name)
        
    available_arms = [arm1, arm2]

    chosen_arm = bandit.recommend(context, available_arms)
    
    print(f"chosen_arm: {chosen_arm}, arm1: {arm1}, arm2: {arm2}")
    print(f"fighter1_result: {fighter1_values['result']}, fighter2_result: {fighter2_values['result']}")

    if chosen_arm == arm1 and fighter1_values["result"] == 1:
        reward = 1
    elif chosen_arm == arm2 and fighter2_values["result"] == 1:
        reward = 1
    else:
        reward = 0
        
    print(f"reward: {reward}")

    total_reward += reward
    bandit.update(chosen_arm, context, reward)
    
print(total_reward)

chosen_arm: 0, arm1: 0, arm2: 1
fighter1_result: 1, fighter2_result: 0
reward: 1
chosen_arm: 2, arm1: 2, arm2: 0
fighter1_result: 1, fighter2_result: 1
reward: 1
chosen_arm: 3, arm1: 3, arm2: 4
fighter1_result: 1, fighter2_result: 1
reward: 1
chosen_arm: 5, arm1: 5, arm2: 3
fighter1_result: 1, fighter2_result: 0
reward: 1
chosen_arm: 4, arm1: 4, arm2: 6
fighter1_result: 1, fighter2_result: 1
reward: 1
chosen_arm: 6, arm1: 6, arm2: 7
fighter1_result: 0, fighter2_result: 1
reward: 0
chosen_arm: 8, arm1: 8, arm2: 9
fighter1_result: 1, fighter2_result: 1
reward: 1
chosen_arm: 10, arm1: 10, arm2: 9
fighter1_result: 0, fighter2_result: 1
reward: 0
chosen_arm: 9, arm1: 8, arm2: 9
fighter1_result: 0, fighter2_result: 1
reward: 1
chosen_arm: 11, arm1: 11, arm2: 8
fighter1_result: 1, fighter2_result: 1
reward: 1
chosen_arm: 12, arm1: 12, arm2: 9
fighter1_result: 0, fighter2_result: 1
reward: 0
chosen_arm: 13, arm1: 13, arm2: 11
fighter1_result: 0, fighter2_result: 1
reward: 0
chosen_arm: 14, arm

In [34]:
total_reward / num_lightweight_fights

0.4303691275167785

In [29]:
import numpy as np

class LinUCB:
    def __init__(self, num_arms, num_features, alpha=0.1):
        self.num_arms = num_arms
        self.num_features = num_features
        self.alpha = alpha
        self.A = [np.identity(num_features) for _ in range(num_arms)]
        self.b = [np.zeros((num_features, 1)) for _ in range(num_arms)]

    def recommend(self, context):
        p_values = []
        for arm in range(self.num_arms):
            theta = np.linalg.inv(self.A[arm]).dot(self.b[arm])
            x = np.array(context).reshape((-1, 1))
            p = theta.T.dot(x) + self.alpha * np.sqrt(x.T.dot(np.linalg.inv(self.A[arm])).dot(x))
            p_values.append(p[0, 0])

        chosen_arm = np.argmax(p_values)
        return chosen_arm
        
    def update(self, arm, context, reward):
        x = context.reshape((self.num_features, 1))
        self.A[arm] += np.dot(x, x.T)
        self.b[arm] += reward * x


In [36]:
num_arms = 2
num_features = len(chosen_features)
bandit = LinUCB(num_arms, num_features)

num_lightweight_povs = len(lightweight.index)

total_reward = 0

for i in range(num_lightweight_povs):
    fighter_values = lightweight.iloc[i]
    fighter_name = fighter_values["fighter"]
    fighter_stats = fighter_values[chosen_features].tolist()
    
    context = fighter_stats
    context = np.array(context)

    chosen_arm = bandit.recommend(context)

    if chosen_arm == fighter_values["result"]:
        reward = 1
    else:
        reward = 0
        
    print(f"result: {fighter_values['result']}, chosen_arm: {chosen_arm}")
    print(f"reward: {reward}")

    total_reward += reward
    bandit.update(chosen_arm, context, reward)
    
print(total_reward)

result: 1, chosen_arm: 0
reward: 0
result: 0, chosen_arm: 1
reward: 0
result: 1, chosen_arm: 0
reward: 0
result: 1, chosen_arm: 1
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 1, chosen_arm: 0
reward: 0
result: 1, chosen_arm: 1
reward: 1
result: 0, chosen_arm: 0
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 1, chosen_arm: 0
reward: 0
result: 0, chosen_arm: 0
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 0, chosen_arm: 0
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 0, chosen_arm: 1
reward: 0
result: 1, chosen_arm: 0
reward: 0
result: 1, chosen_arm: 1
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 0, chosen_arm: 0
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 0, chosen_arm: 0
reward: 1
result: 1, chosen_arm: 1
reward: 1
result: 0, chosen_arm: 0
reward: 1
result: 0, chosen_arm: 1
reward: 0
result: 1, chosen_arm: 0
reward: 0
result: 1, chosen_arm: 0
reward: 0
result: 1, chosen_ar

In [37]:
total_reward / num_lightweight_povs

0.9911912751677853