In [139]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np
import copy
import glob

from keras.utils.vis_utils import plot_model


# Beautify print - delete later
import sys
np.set_printoptions(threshold=sys.maxsize)


In [141]:
path = '../data'

files = [f for f in glob.glob(path + "/*.csv", recursive=True)]

matches = pd.DataFrame()

for f in files:
    fields = ['HomeTeam', 'AwayTeam', 'FTR', 'FTHG', 'FTAG', 'WHH', 'WHD', 'WHA']
    curr_season = pd.read_csv(f, error_bad_lines=False, usecols=fields)
    # Filter float values that were given in the data
    curr_season.dropna(inplace=True)
    curr_season['FTHG'] = curr_season['FTHG'].astype(int)
    curr_season['FTAG'] = curr_season['FTAG'].astype(int)
    matches = matches.append(curr_season, ignore_index=True, sort='False')


In [142]:
last_n_games = 10
match = {}
team = {}

In [143]:
for i in range(len(matches)):
    match_index = len(matches) - i - 1
    curr_match = matches.iloc[len(matches) - i - 1]
    match[match_index] = [(curr_match['HomeTeam'], curr_match['AwayTeam']), [0] * 6, [0] * 6,
                          [curr_match['WHH'], curr_match['WHD'], curr_match['WHA']]]

    if curr_match['HomeTeam'] in team:
        team[curr_match['HomeTeam']] = team[curr_match['HomeTeam']] + [match_index]
    else:
        team[curr_match['HomeTeam']] = [match_index]

    if curr_match['AwayTeam'] in team:
        team[curr_match['AwayTeam']] = team[curr_match['AwayTeam']] + [match_index]
    else:
        team[curr_match['AwayTeam']] = [match_index]

    if len(team[curr_match['HomeTeam']]) > last_n_games:
        curr_team = copy.deepcopy(curr_match['HomeTeam'])
        match_key = copy.deepcopy(team[curr_team].pop(0))
        curr_team_matches = copy.deepcopy(team[curr_team])

        games_total = 0
        wins_total = 0
        draws_total = 0
        losses_total = 0
        goals_scored_total = 0
        goals_conceded_total = 0

        for key in curr_team_matches:
            games_total += 1
            match_calc = matches.iloc[key]
            if match_calc['FTR'] == 'H' and match_calc['HomeTeam'] == curr_team:
                wins_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['AwayTeam'] == curr_team:
                wins_total += 1

            if match_calc['FTR'] == 'H' and match_calc['AwayTeam'] == curr_team:
                losses_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['HomeTeam'] == curr_team:
                losses_total += 1

            if match_calc['FTR'] == 'D':
                draws_total += 1

            if match_calc['HomeTeam'] == curr_team:
                goals_scored_total += match_calc['FTHG']
                goals_conceded_total += match_calc['FTAG']
            elif match_calc['AwayTeam'] == curr_team:
                goals_scored_total += match_calc['FTAG']
                goals_conceded_total += match_calc['FTHG']

        home_or_away = 2
        if matches.iloc[match_key]['HomeTeam'] == curr_team:
            home_or_away = 1
        match[match_key][home_or_away] = [games_total, wins_total, draws_total,
                                          losses_total, goals_scored_total, goals_conceded_total]
    if len(team[curr_match['AwayTeam']]) > last_n_games:
        curr_team = copy.deepcopy(curr_match['AwayTeam'])
        match_key = copy.deepcopy(team[curr_team].pop(0))
        curr_team_matches = copy.deepcopy(team[curr_team])

        games_total = 0
        wins_total = 0
        draws_total = 0
        losses_total = 0
        goals_scored_total = 0
        goals_conceded_total = 0

        for key in curr_team_matches:
            games_total += 1
            match_calc = matches.iloc[key]

            if match_calc['FTR'] == 'H' and match_calc['HomeTeam'] == curr_team:
                wins_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['AwayTeam'] == curr_team:
                wins_total += 1

            if match_calc['FTR'] == 'H' and match_calc['AwayTeam'] == curr_team:
                losses_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['HomeTeam'] == curr_team:
                losses_total += 1

            if match_calc['FTR'] == 'D':
                draws_total += 1

            if match_calc['HomeTeam'] == curr_team:
                goals_scored_total += match_calc['FTHG']
                goals_conceded_total += match_calc['FTAG']
            elif match_calc['AwayTeam'] == curr_team:
                goals_scored_total += match_calc['FTAG']
                goals_conceded_total += match_calc['FTHG']

        home_or_away = 2
        if matches.iloc[match_key]['HomeTeam'] == curr_team:
            home_or_away = 1
        match[match_key][home_or_away] = [games_total, wins_total, draws_total,
                                          losses_total, goals_scored_total, goals_conceded_total]


In [144]:
matches_nn_input = []
rows_to_drop = []
for key, value in match.items():
    if np.count_nonzero(match[key][1]) == 0 or np.count_nonzero(match[key][2]) == 0:
        # print(key, value)
        rows_to_drop.append(key)
    else:
#         matches_nn_input.append(match[key][1][1:] + match[key][2][1:])
        matches_nn_input.append(match[key][3])

matches = matches.drop(rows_to_drop)
matches.index = range(len(matches))
matches_nn_input = np.array(matches_nn_input)

full_time_results = matches['FTR']

In [145]:
# output_class = ['H', 'D', 'A']
output_class = [0, 1, 2]

output_final_ints = []
for res in full_time_results:
    if res == 'H':
        output_final_ints.append(1)
    elif res == 'A':
        output_final_ints.append(2)
    else:
        output_final_ints.append(0)
output_final_ints = np.array(output_final_ints)

train_input, test_input, train_output, test_output =\
    train_test_split(matches_nn_input, output_final_ints, test_size=0.3, shuffle=False)


In [146]:
hidden_layer_1 = 10
hidden_layer_2 = 10
hidden_layer_3 = 10
hidden_layer_4 = 10

In [153]:
model = keras.Sequential([keras.layers.Flatten(input_shape=(3, )),
                          keras.layers.Dense(hidden_layer_1, activation=tf.nn.sigmoid),
                          keras.layers.Dense(hidden_layer_2, activation=tf.nn.sigmoid),
                          keras.layers.Dense(hidden_layer_3, activation=tf.nn.sigmoid),
#                           keras.layers.Dense(hidden_layer_4, activation=tf.nn.relu),
                          keras.layers.Dense(len(output_class), activation=tf.nn.softmax)])

plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)


In [154]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
#               loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_input, train_output, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fb057797278>

In [155]:
test_loss, test_acc = model.evaluate(test_input, test_output)
print('Test accuracy:', test_acc)

prediction = model.predict(test_input)
# print(output_class)
# print(prediction)

print(test_output.shape[0])
print(np.count_nonzero(test_output == 0))
print(np.count_nonzero(test_output == 1))
print(np.count_nonzero(test_output == 2))

# print(np.count_nonzero(prediction == 0))
# print(np.count_nonzero(prediction == 1))
# print(np.count_nonzero(prediction == 2))


1345 / 2889
ones = 0
zeros = 0
twos = 0
correct_zeros = 0
correct_twos = 0
correct_ones = 0
for i in range(2021):
#     print(test_output[i], np.argmax(prediction[i]))
    if np.argmax(prediction[i]) == 0:
        zeros += 1
        if test_output[i] == 0:
            correct_zeros += 1
    elif np.argmax(prediction[i]) == 1:
        if test_output[i] == 1:
            correct_ones += 1
        ones += 1
    else:
        if test_output[i] == 2:
            correct_twos += 1
        twos += 1

print()
print(ones, zeros, twos)
print()
print(correct_ones, correct_zeros, correct_twos)

print(1/2889)
print(1345/2889)
print(150/2889)
print(155/3000)
print(51/1000)

Test accuracy: 0.4774864
2021
505
965
551

2021 0 0

965 0 0
0.00034614053305642093
0.46555901696088614
0.05192107995846314
0.051666666666666666
0.051
