In [14]:
import pandas as pd
import numpy as np
import scipy 
import sys

In [15]:
# load results from 'home_team_prediction.ipynb' and 'away_team_prediction.ipynb'
df_home = pd.read_excel('df_all_seasons_home.xlsx')
df_away = pd.read_excel('df_all_seasons_away.xlsx')

In [16]:
cols_to_use = df_home.columns.difference(df_away.columns)

In [17]:
df_all = pd.merge(df_away, df_home[cols_to_use], left_index=True, right_index=True, how='outer')

In [18]:
del df_all['Unnamed: 0']

In [19]:
# create predicted goal differences subtracting predicted home and away goals from each other and vice vers
df_all['pred_HTGDIFF'] = df_all['FTHG'] - df_all['FTAG']
df_all['pred_ATGDIFF'] = df_all['FTAG'] - df_all['FTHG']

In [20]:
df_all.rename(columns={"HTGDIFF": "test_HTGDIFF", "ATGDIFF": "test_ATGDIFF", 'FTHG': 'pred_FTHG', 'FTAG':'pred_FTAG'}, inplace=True)

In [21]:
df_all = df_all.reindex(columns = ['Day', 'Month', 'Year', 'HomeTeam', 'AwayTeam', 'pred_FTHG', 'pred_FTAG',
       'test_HTGDIFF', 'pred_HTGDIFF', 'test_ATGDIFF', 'pred_ATGDIFF', 'AVGATGDIFF', 'AVGFTAG','AVGFTHG', 'AVGHTGDIFF'])

In [22]:
df_all.to_excel('both.xlsx')

In [23]:
# counting where error = 0 which means prediction and test data are the same = success
# then dividing it by the length of all errors
errors = abs(df_all['pred_HTGDIFF'] - df_all['test_HTGDIFF'])
accuracy = (errors==0).sum() / len(errors) * 100
print('MAE:', round(np.mean(errors),2), 'Goals.')
print('Accuracy:', round(accuracy, 2), '%.')

MAE: 1.36 Goals.
Accuracy: 24.41 %.


In [24]:
total_wins=(df_all["pred_HTGDIFF"] > 0).sum()
total_draw=(df_all["pred_HTGDIFF"] == 0).sum()
total_loss=(df_all["pred_HTGDIFF"] < 0).sum()

In [25]:
common_win = ((df_all["test_HTGDIFF"] > 0) & (df_all["pred_HTGDIFF"] > 0)).sum()
common_draw = ((df_all["test_HTGDIFF"] == 0) & (df_all["pred_HTGDIFF"] == 0)).sum()
common_lost = ((df_all["test_HTGDIFF"] < 0) & (df_all["pred_HTGDIFF"] < 0)).sum()

In [26]:
print('Correct Prediction Total: {} %'.format(np.round(((common_win+common_draw+common_lost)/df_all.shape[0]) * 100,2)))
print('Correct Prediction Share Wins: {} %'.format(np.round((common_win /total_wins)*100, 2)))
print('Correct Prediction Share Draws: {} %'.format(np.round((common_draw / total_draw)*100,2)))
print('Correct Prediction Share Lost: {} %'.format(np.round((common_lost / total_loss)*100,2)))

Correct Prediction Total: 52.35 %
Correct Prediction Share Wins: 64.39 %
Correct Prediction Share Draws: 24.64 %
Correct Prediction Share Lost: 68.67 %
