### This notebook contains the Result Evaluation.

#### Notebook 1: Extract-Transform-Load
#### Notebook 2: Data Visualization
#### Notebook 3: Feature Engineering, Hyperparameter tuning and Modelling
#### Notebook 4: Result Evaluation

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
%matplotlib inline 
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# data import - from the Feature Engineering, Hyperparameter tuning and Modelling notebook
Results_data = pd.read_csv(r"C:\Users\### LOCAL PATH ###\Result_evaluation.txt", sep = "\t")

In [None]:
Results_data.head()

In [None]:
Results_data.shape

### Accuracy

In [None]:
Accuracy = pd.DataFrame(columns=["Tree Prediction","SVM Prediction","NN Prediction","Democracy","Unanimous","Fav Strategy","Pick Home"])

for i in range(Results_data.shape[0]):
    for column in Accuracy.columns:
        if str(Results_data.loc[i,column]) == "nan":
            Accuracy.loc[i,column] = np.nan
        
        else:
            Accuracy.loc[i,column] = np.where(Results_data.loc[i,"Actual"] == Results_data.loc[i,column],1,0)

In [None]:
Accuracy.head()

In [None]:
print("Accuracy by approach: \n \n" ,
      '{:10}'.format("Tree: ")+ '{:>13}'.format(str(round((Accuracy["Tree Prediction"].sum() / Accuracy["Tree Prediction"].dropna().shape[0])*100,2))+"% \n"), 
      '{:10}'.format("SVM: ")+'{:>13}'.format(str(round((Accuracy["SVM Prediction"].sum()/ Accuracy["SVM Prediction"].dropna().shape[0])*100,2))+"% \n"),
     '{:10}'.format("NN: ")+ '{:>13}'.format(str(round((Accuracy["NN Prediction"].sum()/ Accuracy["NN Prediction"].dropna().shape[0])*100,2))+"% \n"),
    '{:10}'.format("Democracy: ")+'{:>12}'.format(str(round((Accuracy["Democracy"].sum()/ Accuracy["Democracy"].dropna().shape[0])*100,2))+"% \n"),
     '{:10}'.format("Unanimous: ")+ '{:>12}'.format(str(round((Accuracy["Unanimous"].sum()/ Accuracy["Unanimous"].dropna().shape[0])*100,2))+"% \n"),
      '{:10}'.format("Fav Strategy: ")+'{:>9}'.format(str(round((Accuracy["Fav Strategy"].sum()/ Accuracy["Fav Strategy"].dropna().shape[0])*100,2))+"% \n"),
     '{:10}'.format("Pick Home: ")+'{:>10}'.format(str(round((Accuracy["Pick Home"].sum()/ Accuracy["Pick Home"].dropna().shape[0])*100,2))+"%"))

In [None]:
accuracy_measured = pd.DataFrame(columns= Accuracy.columns)
accuracy_measured.loc[0] = Accuracy.mean()
accuracy_measured = accuracy_measured.transpose().sort_values(0, ascending = False).rename(columns={0:"Accuracy %"})
accuracy_measured["Accuracy %"] = round(accuracy_measured["Accuracy %"]*100,2).astype(str)
accuracy_measured

##### t-tests to determine whether the best strategy is in fact better

In [None]:
# variably determining the most accurate strategy - model based strategies
accuracy_measured = pd.DataFrame(columns= Accuracy.columns)
accuracy_measured.loc[0] = Accuracy.mean()
#accuracy_measured = accuracy_measured.drop(["Fav Strategy","Pick Home"], axis=1)
accuracy_measured = accuracy_measured.transpose().sort_values(0, ascending = False)
most_accurate = accuracy_measured.index[0]
sec_most_accurate = accuracy_measured.index[1]
least_accurate = accuracy_measured.index[-1]
print("Most/Least accurate: \n \n","1. "+ most_accurate +'\n', "2. "+sec_most_accurate +'\n', "Last: "+least_accurate)

In [None]:
# vs. 
print("The most accurate (model based) strategy is "+most_accurate+" with "+str(round(accuracy_measured.loc[most_accurate,0]*100,2))+"% accuracy. \n")

for model_2 in list(Accuracy.columns):
    if most_accurate == model_2:
        next
    else:
        var_most = Accuracy[most_accurate].dropna().var(ddof=1)
        var_second = Accuracy[model_2].dropna().var(ddof=1)
        s = np.sqrt((var_most / Accuracy[most_accurate].dropna().shape[0] + var_second / Accuracy[model_2].dropna().shape[0]))

        t = (accuracy_measured.loc[most_accurate,0] - accuracy_measured.loc[model_2,0]) / s
        
        if model_2 == "Fav Strategy":
            print()
        if t < 1.65:
            print(most_accurate + " is not significantly more accurate than "+model_2+". (t-value: "+str(round(t,3))+").")
        elif (t >1.65)& (t<1.96):
            print(most_accurate + " is more accurate than "+model_2 +" at the 10% confidence level."+" (t-value: "+str(round(t,3))+").")
        elif (t >1.96)& (t<2.58):
            print(most_accurate + " is more accurate than "+model_2 +" at the 5% confidence level."+" (t-value: "+str(round(t,3))+").")
        elif (t>2.58):
            print(most_accurate + " is more accurate than "+model_2 +" at the 1% confidence level."+" (t-value: "+str(round(t,3))+").")

In [None]:
# vs. 

print("The NN strategy has "+str(round(accuracy_measured.loc["NN Prediction",0]*100,2))+"% accuracy. \n")

for model_2 in list(Accuracy.columns):
    if model_2 in ["NN Prediction"] :
        next
    else:
        var_most = Accuracy["NN Prediction"].dropna().var(ddof=1)
        var_second = Accuracy[model_2].dropna().var(ddof=1)
        s = np.sqrt((var_most / Accuracy["NN Prediction"].dropna().shape[0] + var_second / Accuracy[model_2].dropna().shape[0]))

        t = (accuracy_measured.loc["NN Prediction",0] - accuracy_measured.loc[model_2,0]) / s
        
        if model_2 == "Fav Strategy":
            print()
        if t < 1.65:
            print("NN strategy" + " is not significantly more accurate than "+model_2+". (t-value: "+str(round(t,3))+").")
        elif (t >1.65)& (t<1.96):
            print("NN strategy" + " is more accurate than "+model_2 +" at the 10% confidence level."+" (t-value: "+str(round(t,3))+").")
        elif (t >1.96)& (t<2.58):
            print("NN strategy" + " is more accurate than "+model_2 +" at the 5% confidence level."+" (t-value: "+str(round(t,3))+").")
        elif (t>2.58):
            print("NN strategy" + " is more accurate than "+model_2 +" at the 1% confidence level."+" (t-value: "+str(round(t,3))+").")

### Confusion Matrix

In [None]:
def plot_confusion_matrix(df_confusion, title, cmap=plt.cm.gray_r):
    plt.matshow(df_confusion, cmap=cmap) # imshow
    plt.title(title,y = 1.2, size=15)
    plt.colorbar()
    tick_marks = np.arange(len(df_confusion.columns))
    plt.xticks(tick_marks, df_confusion.columns)
    plt.yticks(tick_marks, df_confusion.index)
    #plt.tight_layout()
    plt.ylabel(df_confusion.index.name, labelpad=15)
    plt.xlabel(df_confusion.columns.name, labelpad=15)

#### Decision Tree

In [None]:
y_actual = Results_data["Actual"]
y_predicted = Results_data["Tree Prediction"]

df_confusion = pd.crosstab(y_actual,y_predicted, rownames=['Actual'], colnames=['Predicted'])

try:
    x = df_confusion["D"].shape[0]
except:
    df_confusion["D"] = 0
    df_confusion = df_confusion[["A","D","H"]]

plot_confusion_matrix(df_confusion, "Confusion Matrix - Tree")

In [None]:
df_confusion

#### Support-Vector-Machine

In [None]:
y_actual = Results_data["Actual"]
y_predicted = Results_data["SVM Prediction"]

df_confusion = pd.crosstab(y_actual,y_predicted, rownames=['Actual'], colnames=['Predicted'])

try:
    x = df_confusion["D"].shape[0]
except:
    df_confusion["D"] = 0
    df_confusion = df_confusion[["A","D","H"]]

plot_confusion_matrix(df_confusion, "Confusion Matrix - SVM")

In [None]:
df_confusion

#### Neural Network

In [None]:
y_actual = Results_data["Actual"]
y_predicted = Results_data["NN Prediction"]

df_confusion = pd.crosstab(y_actual,y_predicted, rownames=['Actual'], colnames=['Predicted'])

try:
    x = df_confusion["D"].shape[0]
except:
    df_confusion["D"] = 0
    df_confusion = df_confusion[["A","D","H"]]

plot_confusion_matrix(df_confusion, "Confusion Matrix - NN")

In [None]:
df_confusion

#### Democracy

In [None]:
y_actual = Results_data["Actual"]
y_predicted = Results_data["Democracy"]

df_confusion = pd.crosstab(y_actual,y_predicted, rownames=['Actual'], colnames=['Predicted'])

try:
    x = df_confusion["D"].shape[0]
except:
    df_confusion["D"] = 0
    df_confusion = df_confusion[["A","D","H"]]

plot_confusion_matrix(df_confusion, "Confusion Matrix - Democracy")

In [None]:
df_confusion

#### Unanimous

In [None]:
y_actual = Results_data["Actual"]
y_predicted = Results_data["Unanimous"]

df_confusion = pd.crosstab(y_actual,y_predicted, rownames=['Actual'], colnames=['Predicted'])

try:
    x = df_confusion["D"].shape[0]
except:
    df_confusion["D"] = 0
    df_confusion = df_confusion[["A","D","H"]]


plot_confusion_matrix(df_confusion, "Confusion Matrix - Unanimous")

In [None]:
df_confusion

#### Fav Strategy

In [None]:
y_actual = Results_data["Actual"]
y_predicted = Results_data["Fav Strategy"]

df_confusion = pd.crosstab(y_actual,y_predicted, rownames=['Actual'], colnames=['Predicted'])

try:
    x = df_confusion["D"].shape[0]
except:
    df_confusion["D"] = 0
    df_confusion = df_confusion[["A","D","H"]]


plot_confusion_matrix(df_confusion, "Confusion Matrix - Favorites")

In [None]:
df_confusion

### Return

#### Note that the model is not omptimized, nor designed, to exploit discrepancies between estimated and by odds implied game result probabilities.

##### Assuming bets of 1 unit, every game, on the most likely outcome according to the respective model / strategy

In [None]:
def return_calc(Dataframe):
    Dataframe = Dataframe.dropna().reset_index(drop=True)
    Dataframe["correct"] = np.nan
    Dataframe["Payoff"] = np.nan
    
    for i in range(Dataframe.shape[0]):
        Dataframe.loc[i,"correct"] = np.where(Dataframe.loc[i,"Actual"] == Dataframe.iloc[i,1], 1, 0)
        if Dataframe.loc[i,"Actual"] == "A":
            Dataframe.loc[i,"Payoff"] = np.where(Dataframe.loc[i,"correct"] == 1, Dataframe.loc[i,"Odds A"] -1, -1)
        elif Dataframe.loc[i,"Actual"] == "D":
            Dataframe.loc[i,"Payoff"] = np.where(Dataframe.loc[i,"correct"] == 1, Dataframe.loc[i,"Odds D"] -1, -1)
        elif Dataframe.loc[i,"Actual"] == "H":
            Dataframe.loc[i,"Payoff"] = np.where(Dataframe.loc[i,"correct"] == 1, Dataframe.loc[i,"Odds H"] -1, -1)
        else:
            print("ERROR")
    
    absolute_return = Dataframe["Payoff"].sum()
    ROI = absolute_return / Dataframe.shape[0]
    
    return absolute_return, ROI

##### Comparing absolute and relative returns by strategy

In [None]:
abs_Tree, ROI_Tree = return_calc(Results_data.loc[:,["Actual","Tree Prediction","Odds A", "Odds D","Odds H"]])
abs_SVM, ROI_SVM = return_calc(Results_data.loc[:,["Actual","SVM Prediction","Odds A", "Odds D","Odds H"]])
abs_NN, ROI_NN = return_calc(Results_data.loc[:,["Actual","NN Prediction","Odds A", "Odds D","Odds H"]])
abs_Democracy, ROI_Democracy = return_calc(Results_data.loc[:,["Actual","Democracy","Odds A", "Odds D","Odds H"]])
abs_Unanimous, ROI_Unanimous = return_calc(Results_data.loc[:,["Actual","Unanimous","Odds A", "Odds D","Odds H"]])
abs_Fav, ROI_Fav = return_calc(Results_data.loc[:,["Actual","Fav Strategy","Odds A", "Odds D","Odds H"]])
abs_HP, ROI_HP = return_calc(Results_data.loc[:,["Actual","Pick Home","Odds A", "Odds D","Odds H"]])

In [None]:
print("Absolute and return on investment by approach: \n \n" ,
      '{:>21}'.format("Absolute")+ '{:>9}'.format("ROI \n"),
      '{:10}'.format("Tree: ")+ '{:>10}'.format(str(round(abs_Tree,2))) +'{:>11}'.format(str(round(ROI_Tree*100,2))+"% \n"), 
      '{:10}'.format("SVM: ")+'{:>10}'.format(str(round(abs_SVM,2))) +'{:>11}'.format(str(round(ROI_SVM*100,2))+"% \n"),
     '{:10}'.format("NN: ")+ '{:>10}'.format(str(round(abs_NN,2))) +'{:>11}'.format(str(round(ROI_NN*100,2))+"% \n"),
    '{:10}'.format("Democracy: ")+'{:>9}'.format(str(round(abs_Democracy,2))) +'{:>11}'.format(str(round(ROI_Democracy*100,2))+"% \n"),
     '{:10}'.format("Unanimous: ")+ '{:>9}'.format(str(round(abs_Unanimous,2))) +'{:>11}'.format(str(round(ROI_Unanimous*100,2))+"% \n"),
      '{:10}'.format("Fav Strategy: ")+'{:>15}'.format(str(round(abs_Fav,2)) +'{:>11}'.format(str(round(ROI_Fav*100,2))+"% \n")),
      '{:10}'.format("Home Pick: ")+'{:>18}'.format(str(round(abs_HP,2)) +'{:>9}'.format(str(round(ROI_HP*100,2))+"%"))
     )

##### Return ranking

In [None]:
Returns = pd.DataFrame(columns = ["Strategy","Absolute","ROI in %"])

Returns.loc[0] = ["Tree",abs_Tree,ROI_Tree*100]
Returns.loc[1] = ["SVM",abs_SVM,ROI_SVM*100]
Returns.loc[2] = ["NN",abs_NN,ROI_NN*100]
Returns.loc[3] = ["Democracy",abs_Democracy,ROI_Democracy*100]
Returns.loc[4] = ["Unanimous",abs_Unanimous,ROI_Unanimous*100]
Returns.loc[5] = ["Fav Strategy",abs_Fav,ROI_Fav*100]
Returns.loc[6] = ["Home Pick",abs_HP,ROI_HP*100]
Returns.sort_values(["ROI in %","Absolute"], ascending = [False, False]).reset_index(drop=True)

In [None]:
Returns_grp = Returns.groupby("Strategy",as_index=True).sum()
Returns_df = pd.DataFrame(columns=["Absolute","ROI in %"])
Returns_df.loc["NN"] = Returns_grp.loc["NN",:]
Returns_df.loc["Tree"] = Returns_grp.loc["Tree",:]
Returns_df.loc["SVM"] = Returns_grp.loc["SVM",:]
Returns_df.loc["Democracy"] = Returns_grp.loc["Democracy",:]
Returns_df.loc["Unanimous"] = Returns_grp.loc["Unanimous",:]
Returns_df.loc["Fav Strategy"] = Returns_grp.loc["Fav Strategy",:]
Returns_df.loc["Home Pick"] = Returns_grp.loc["Home Pick",:]
Returns_df

In [None]:
colors_list = ["silver", "gold"]

df = Returns_df.rename(columns={"ROI in %":"ROI"})

ax = df.plot(kind='barh', figsize =(10,7), color = colors_list, edgecolor='w')

ax.set_alpha(0.8)
ax.set_title("Absolute and relative return by strategy", size = 16)

ax.set_xlabel("Result", size = 13)
ax.set_ylabel("Strategy", size = 13)
ax.set_xlim(-100,100)


counter = 0
for i in ax.patches:
    counter +=1
    
    if i.get_width() < 0:
        if str(i.get_y())[-1:] == "5":
            ax.text(2,  i.get_y()+0.18, (str(round((i.get_width()),2))), fontsize=11, color='red')
        else:
            ax.text(2,  i.get_y()+0.18, (str(round((i.get_width()),2))+"%"), fontsize=11, color='red')
    else:
        if str(i.get_y())[-1:] == "5":
            ax.text(-13,  i.get_y()+0.18, (str(round((i.get_width()),2))), fontsize=11, color='green')
        else:
            ax.text(-13,  i.get_y()+0.18, (str(round((i.get_width()),2))+"%"), fontsize=11, color='green')
    
ax.invert_yaxis()


### Team-Bias?

In [None]:
Accuracy = Accuracy.rename(columns={"Tree Prediction":"Tree Acc","SVM Prediction":"SVM Acc","NN Prediction":"NN Acc"})


In [None]:
Data = pd.concat([Results_data[["Home Team","Away Team","Tree Prediction","SVM Prediction","NN Prediction"]], Accuracy[["Tree Acc","SVM Acc","NN Acc"]]], axis=1)
Data.head()

In [None]:
Teams_list = list(set(list(Data["Home Team"])+(list(Data["Away Team"]))))


In [None]:
Data_Team_Bias = pd.DataFrame(columns=["Team","Tree for","Tree for correct","Tree against","Tree against correct",
                                      "SVM for","SVM for correct","SVM against","SVM against correct",
                                      "NN for","NN for correct","NN against","NN against correct"])
i = 0
for Team in Teams_list:
    df1 = Data.loc[Data["Home Team"] == Team]
    df2 = Data.loc[Data["Away Team"] == Team]
    
    for model in ["Tree","SVM","NN"]:
        if model == "Tree": 
            T_H_for = df1.loc[df1["Tree Prediction"]=="H"].shape[0]
            T_H_against = df1.loc[df1["Tree Prediction"]!="H"].shape[0]
            
            T_H_for_correct = df1.loc[df1["Tree Prediction"]=="H"]["Tree Acc"].sum()
            T_H_against_correct = df1.loc[df1["Tree Prediction"]!="H"]["Tree Acc"].sum()
            
            ###
            T_A_for = df2.loc[df2["Tree Prediction"]=="A"].shape[0]
            T_A_against = df2.loc[df2["Tree Prediction"]!="A"].shape[0]
            
            T_A_for_correct = df2.loc[df2["Tree Prediction"]=="A"]["Tree Acc"].sum()
            T_A_against_correct = df2.loc[df2["Tree Prediction"]!="A"]["Tree Acc"].sum()
            
            T_for = T_H_for + T_A_for
            T_for_corr = T_H_for_correct + T_A_for_correct
            T_against = T_H_against + T_A_against
            T_against_corr = T_H_against_correct + T_A_against_correct
            
        elif model == "SVM": 
            S_H_for = df1.loc[df1["SVM Prediction"]=="H"].shape[0]
            S_H_against = df1.loc[df1["SVM Prediction"]!="H"].shape[0]
            
            S_H_for_correct = df1.loc[df1["SVM Prediction"]=="H"]["Tree Acc"].sum()
            S_H_against_correct = df1.loc[df1["SVM Prediction"]!="H"]["Tree Acc"].sum()
            
            ###
            S_A_for = df2.loc[df2["SVM Prediction"]=="A"].shape[0]
            S_A_against = df2.loc[df2["SVM Prediction"]!="A"].shape[0]
            
            S_A_for_correct = df2.loc[df2["SVM Prediction"]=="A"]["Tree Acc"].sum()
            S_A_against_correct = df2.loc[df2["SVM Prediction"]!="A"]["Tree Acc"].sum()
            
            S_for = S_H_for + S_A_for
            S_for_corr = S_H_for_correct + S_A_for_correct
            S_against = S_H_against + S_A_against
            S_against_corr = S_H_against_correct + S_A_against_correct
            
        elif model == "NN": 
            N_H_for = df1.loc[df1["NN Prediction"]=="H"].shape[0]
            N_H_against = df1.loc[df1["NN Prediction"]!="H"].shape[0]
            
            N_H_for_correct = df1.loc[df1["NN Prediction"]=="H"]["Tree Acc"].sum()
            N_H_against_correct = df1.loc[df1["NN Prediction"]!="H"]["Tree Acc"].sum()
            
            ###
            N_A_for = df2.loc[df2["NN Prediction"]=="A"].shape[0]
            N_A_against = df2.loc[df2["NN Prediction"]!="A"].shape[0]
            
            N_A_for_correct = df2.loc[df2["NN Prediction"]=="A"]["Tree Acc"].sum()
            N_A_against_correct = df2.loc[df2["NN Prediction"]!="A"]["Tree Acc"].sum()
            
            N_for = N_H_for + N_A_for
            N_for_corr = N_H_for_correct + N_A_for_correct
            N_against = N_H_against + N_A_against
            N_against_corr = N_H_against_correct + N_A_against_correct  
    
    data = [Team, T_for, T_for_corr, T_against, T_against_corr, S_for, S_for_corr, S_against, S_against_corr,
            N_for, N_for_corr, N_against, N_against_corr]
    
    Data_Team_Bias.loc[i] = data
    i += 1
    
Data_Team_Bias = Data_Team_Bias.reset_index(drop=True)

In [None]:
Data_Team_Bias.head()

## Tree

#### When picking the Team to win

In [None]:
Tree_for_data = Data_Team_Bias.loc[:,["Team","Tree for","Tree for correct"]]
Tree_for_data = Tree_for_data.rename(columns={"Tree for":"pred Win","Tree for correct":"Correct"})
Tree_for_data["%"] = np.nan
for i in range(Tree_for_data.shape[0]):
    try:
        Tree_for_data.loc[i,"%"] = round(Tree_for_data.loc[i,"Correct"] / Tree_for_data.loc[i,"pred Win"] *100,2)
    except:
        Tree_for_data.loc[i,"%"] = np.nan

In [None]:
Tree_for_data.loc[Tree_for_data["pred Win"] >= 5].dropna().sort_values("%", ascending = False).head()

In [None]:
Tree_for_data.loc[Tree_for_data["pred Win"] >= 5].dropna().sort_values("%", ascending = False).tail()

#### When picking against the Team

In [None]:
Tree_for_data = Data_Team_Bias.loc[:,["Team","Tree against","Tree against correct"]]
Tree_for_data = Tree_for_data.rename(columns={"Tree against":"pred not Win","Tree against correct":"Correct"})
Tree_for_data["%"] = np.nan
for i in range(Tree_for_data.shape[0]):
    try:
        Tree_for_data.loc[i,"%"] = round(Tree_for_data.loc[i,"Correct"] / Tree_for_data.loc[i,"pred not Win"] *100,2)
    except:
        Tree_for_data.loc[i,"%"] = np.nan

In [None]:
Tree_for_data.loc[Tree_for_data["pred not Win"] >= 5].dropna().sort_values("%", ascending = False).head()

In [None]:
Tree_for_data.loc[Tree_for_data["pred not Win"] >= 5].dropna().sort_values("%", ascending = False).tail()

## SVM

#### When picking the Team to win

In [None]:
SVM_for_data = Data_Team_Bias.loc[:,["Team","SVM for","SVM for correct"]]
SVM_for_data["%"] = np.nan
for i in range(SVM_for_data.shape[0]):
    try:
        SVM_for_data.loc[i,"%"] = round(SVM_for_data.loc[i,"SVM for correct"] / SVM_for_data.loc[i,"SVM for"] *100,2)
    except:
        SVM_for_data.loc[i,"%"] = np.nan

In [None]:
SVM_for_data.loc[SVM_for_data["SVM for"] >= 5].dropna().sort_values("%", ascending = False).head()

In [None]:
SVM_for_data.loc[SVM_for_data["SVM for"] >= 5].dropna().sort_values("%", ascending = False).tail()

#### When picking against the Team

In [None]:
SVM_ag_data = Data_Team_Bias.loc[:,["Team","SVM against","SVM against correct"]]
SVM_ag_data["%"] = np.nan
for i in range(SVM_ag_data.shape[0]):
    try:
        SVM_ag_data.loc[i,"%"] = round(SVM_ag_data.loc[i,"SVM against correct"] / SVM_ag_data.loc[i,"SVM against"] *100,2)
    except:
        SVM_ag_data.loc[i,"%"] = np.nan

In [None]:
SVM_ag_data.loc[SVM_ag_data["SVM against"] >= 5].dropna().sort_values("%", ascending = False).head()

In [None]:
SVM_ag_data.loc[SVM_ag_data["SVM against"] >= 5].dropna().sort_values("%", ascending = False).tail()

## NN

#### When picking the Team to win

In [None]:
NN_for_data = Data_Team_Bias.loc[:,["Team","NN for","NN for correct"]]
NN_for_data["%"] = np.nan
for i in range(NN_for_data.shape[0]):
    try:
        NN_for_data.loc[i,"%"] = round(NN_for_data.loc[i,"NN for correct"] / NN_for_data.loc[i,"NN for"] *100,2)
    except:
        NN_for_data.loc[i,"%"] = np.nan

In [None]:
NN_for_data.loc[NN_for_data["NN for"] >= 5].dropna().sort_values("%", ascending = False).head()

In [None]:
NN_for_data.loc[NN_for_data["NN for"] >= 5].dropna().sort_values("%", ascending = False).tail()

#### When picking against the Team

In [None]:
NN_ag_data = Data_Team_Bias.loc[:,["Team","NN against","NN against correct"]]
NN_ag_data["%"] = np.nan
for i in range(NN_ag_data.shape[0]):
    try:
        NN_ag_data.loc[i,"%"] = round(NN_ag_data.loc[i,"NN against correct"] / NN_ag_data.loc[i,"NN against"] *100,2)
    except:
        NN_ag_data.loc[i,"%"] = np.nan

In [None]:
NN_ag_data.loc[NN_ag_data["NN against"] >= 5].dropna().sort_values("%", ascending = False).head()

In [None]:
NN_ag_data.loc[NN_ag_data["NN against"] >= 5].dropna().sort_values("%", ascending = False).tail()