In [22]:
import numpy as np
import pandas as pd

In [45]:
# Import the data from csv.
ufc = pd.read_csv('data/data.csv')
ufc.head()

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,...,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_wins,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,Henry Cejudo,Marlon Moraes,Marc Goddard,2019-06-08,"Chicago, Illinois, USA",Red,True,Bantamweight,5,0.0,...,2.0,0.0,0.0,8.0,Orthodox,162.56,162.56,135.0,31.0,32.0
1,Valentina Shevchenko,Jessica Eye,Robert Madrigal,2019-06-08,"Chicago, Illinois, USA",Red,True,Women's Flyweight,5,0.0,...,0.0,2.0,0.0,5.0,Southpaw,165.1,167.64,125.0,32.0,31.0
2,Tony Ferguson,Donald Cerrone,Dan Miragliotta,2019-06-08,"Chicago, Illinois, USA",Red,False,Lightweight,3,0.0,...,3.0,6.0,1.0,14.0,Orthodox,180.34,193.04,155.0,36.0,35.0
3,Jimmie Rivera,Petr Yan,Kevin MacDonald,2019-06-08,"Chicago, Illinois, USA",Blue,False,Bantamweight,3,0.0,...,1.0,0.0,0.0,6.0,Orthodox,162.56,172.72,135.0,26.0,29.0
4,Tai Tuivasa,Blagoy Ivanov,Dan Miragliotta,2019-06-08,"Chicago, Illinois, USA",Blue,False,Heavyweight,3,0.0,...,2.0,0.0,0.0,3.0,Southpaw,187.96,190.5,264.0,32.0,26.0


In [13]:
# Check for duplicate values.
ufc.duplicated().sum()

0

In [14]:
# Check for null values.
ufc.isnull().sum()

R_fighter         0
B_fighter         0
Referee          23
date              0
location          0
               ... 
R_Height_cms      4
R_Reach_cms     316
R_Weight_lbs      3
B_age           172
R_age            64
Length: 145, dtype: int64

In [15]:
# We must remove all rows that have null values!
ufc = ufc.dropna(how='any',axis=0) 

In [16]:
# The number of rows was descovered to be 5144 before removing all null value rows using len(ufc.index)
len(ufc.index)

3202

In [17]:
# We must split the 3202 rows into testing and learning.
ufc_train = ufc.sample(frac = 0.75, random_state = 99)
ufc_test = ufc.loc[~ufc.index.isin(ufc_train.index), :]

In [18]:
from sklearn import tree

# Initializing Variables
features = ufc_train[[
    'B_wins', 
    'R_wins',  
    'B_losses',
    'R_losses',
    'B_win_by_KO/TKO',
    'R_win_by_KO/TKO',
    'B_win_by_Submission',
    'R_win_by_Submission',
    'B_current_win_streak',  
    'R_current_win_streak',
    'B_current_lose_streak',
    'R_current_lose_streak',
    'B_Height_cms',
    'R_Height_cms',
    'B_Weight_lbs',
    'R_Weight_lbs',
    'B_Reach_cms',
    'R_Reach_cms']].values

labels = ufc_train['Winner'].values

# Learning Code
clf = tree.DecisionTreeClassifier() 
cls = clf.fit(features, labels) 

# Prediction Code
test_features = ufc_test[[
    'B_wins', 
    'R_wins',  
    'B_losses',
    'R_losses',
    'B_win_by_KO/TKO',
    'R_win_by_KO/TKO',
    'B_win_by_Submission',
    'R_win_by_Submission',
    'B_current_win_streak',  
    'R_current_win_streak',
    'B_current_lose_streak',
    'R_current_lose_streak',
    'B_Height_cms',
    'R_Height_cms',
    'B_Weight_lbs',
    'R_Weight_lbs',
    'B_Reach_cms',
    'R_Reach_cms']].values

predicted_values = clf.predict(test_features)
print(predicted_values)

['Blue' 'Red' 'Red' 'Blue' 'Blue' 'Blue' 'Red' 'Red' 'Red' 'Blue' 'Red'
 'Blue' 'Red' 'Red' 'Blue' 'Red' 'Blue' 'Red' 'Blue' 'Blue' 'Red' 'Red'
 'Blue' 'Red' 'Red' 'Blue' 'Red' 'Red' 'Blue' 'Red' 'Blue' 'Blue' 'Red'
 'Red' 'Red' 'Blue' 'Red' 'Red' 'Red' 'Blue' 'Blue' 'Red' 'Red' 'Red'
 'Red' 'Blue' 'Blue' 'Red' 'Red' 'Red' 'Red' 'Blue' 'Red' 'Blue' 'Blue'
 'Red' 'Red' 'Red' 'Red' 'Red' 'Red' 'Blue' 'Red' 'Red' 'Red' 'Red' 'Red'
 'Red' 'Red' 'Blue' 'Red' 'Blue' 'Blue' 'Blue' 'Blue' 'Blue' 'Draw' 'Red'
 'Red' 'Blue' 'Blue' 'Blue' 'Red' 'Red' 'Blue' 'Blue' 'Red' 'Blue' 'Red'
 'Red' 'Blue' 'Red' 'Red' 'Blue' 'Red' 'Red' 'Red' 'Blue' 'Red' 'Blue'
 'Blue' 'Blue' 'Red' 'Red' 'Red' 'Red' 'Red' 'Red' 'Red' 'Blue' 'Red'
 'Red' 'Red' 'Red' 'Red' 'Red' 'Red' 'Red' 'Blue' 'Red' 'Draw' 'Blue'
 'Red' 'Red' 'Blue' 'Red' 'Red' 'Blue' 'Red' 'Blue' 'Red' 'Blue' 'Blue'
 'Blue' 'Draw' 'Blue' 'Blue' 'Blue' 'Blue' 'Red' 'Red' 'Red' 'Red' 'Blue'
 'Blue' 'Blue' 'Blue' 'Red' 'Red' 'Blue' 'Blue' 'Blue' 'Red' 'Bl

In [20]:
# Saving real values below and predicted values above.
real_values = ufc_test['Winner'].values

In [23]:
# Check if the predicted is equal to the real values.
np.array_equal(real_values,predicted_values)

False

In [27]:
# Check all values one by one. True = prediction was correct and False = prediction failed.
check = (real_values == predicted_values)
check

array([False, False,  True, False,  True,  True,  True, False, False,
        True, False,  True,  True,  True, False,  True,  True,  True,
       False,  True, False, False,  True, False, False, False, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True, False,  True,  True, False,
       False, False, False, False,  True,  True, False,  True, False,
       False,  True,  True,  True, False, False, False,  True, False,
        True, False,  True, False,  True,  True,  True,  True, False,
       False,  True, False, False, False,  True,  True, False, False,
        True, False, False,  True, False,  True,  True, False,  True,
        True, False, False,  True, False, False,  True, False,  True,
       False, False, False,  True,  True, False, False,  True, False,
       False, False, False,  True,  True,  True,  True,  True,  True,
       False, False,  True, False, False,  True, False, False, False,
        True, False,

In [25]:
# Total amount of values.
check.size

800

In [26]:
# Amount of values predicted correctly out of 800.
np.count_nonzero(check)

437