In [2]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np
import copy
import glob
# Beautify print - delete later
import sys
np.set_printoptions(threshold=sys.maxsize)


In [3]:
path = '../data'

files = [f for f in glob.glob(path + "**/*.csv", recursive=True)]

matches = pd.DataFrame()

for f in files:
    fields = ['HomeTeam', 'AwayTeam', 'FTR', 'FTHG', 'FTAG']
    curr_season = pd.read_csv(f, error_bad_lines=False, usecols=fields)
    # Filter float values that were given in the data
    curr_season.dropna(inplace=True)
    curr_season['FTHG'] = curr_season['FTHG'].astype(int)
    curr_season['FTAG'] = curr_season['FTAG'].astype(int)
    matches = matches.append(curr_season, ignore_index=True, sort='False')


In [4]:
last_n_games = 10
match = {}
team = {}

In [6]:
for i in range(len(matches)):
    match_index = len(matches) - i - 1
    curr_match = matches.iloc[len(matches) - i - 1]
    match[match_index] = [(curr_match['HomeTeam'], curr_match['AwayTeam']), [0] * 6, [0] * 6]

    if curr_match['HomeTeam'] in team:
        team[curr_match['HomeTeam']] = team[curr_match['HomeTeam']] + [match_index]
    else:
        team[curr_match['HomeTeam']] = [match_index]

    if curr_match['AwayTeam'] in team:
        team[curr_match['AwayTeam']] = team[curr_match['AwayTeam']] + [match_index]
    else:
        team[curr_match['AwayTeam']] = [match_index]

    if len(team[curr_match['HomeTeam']]) > last_n_games:
        curr_team = copy.deepcopy(curr_match['HomeTeam'])
        match_key = copy.deepcopy(team[curr_team].pop(0))
        curr_team_matches = copy.deepcopy(team[curr_team])

        games_total = 0
        wins_total = 0
        draws_total = 0
        losses_total = 0
        goals_scored_total = 0
        goals_conceded_total = 0

        for key in curr_team_matches:
            games_total += 1
            match_calc = matches.iloc[key]
            if match_calc['FTR'] == 'H' and match_calc['HomeTeam'] == curr_team:
                wins_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['AwayTeam'] == curr_team:
                wins_total += 1

            if match_calc['FTR'] == 'H' and match_calc['AwayTeam'] == curr_team:
                losses_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['HomeTeam'] == curr_team:
                losses_total += 1

            if match_calc['FTR'] == 'D':
                draws_total += 1

            if match_calc['HomeTeam'] == curr_team:
                goals_scored_total += match_calc['FTHG']
                goals_conceded_total += match_calc['FTAG']
            elif match_calc['AwayTeam'] == curr_team:
                goals_scored_total += match_calc['FTAG']
                goals_conceded_total += match_calc['FTHG']

        home_or_away = 2
        if matches.iloc[match_key]['HomeTeam'] == curr_team:
            home_or_away = 1
        match[match_key][home_or_away] = [games_total, wins_total, draws_total,
                                          losses_total, goals_scored_total, goals_conceded_total]
    if len(team[curr_match['AwayTeam']]) > last_n_games:
        curr_team = copy.deepcopy(curr_match['AwayTeam'])
        match_key = copy.deepcopy(team[curr_team].pop(0))
        curr_team_matches = copy.deepcopy(team[curr_team])

        games_total = 0
        wins_total = 0
        draws_total = 0
        losses_total = 0
        goals_scored_total = 0
        goals_conceded_total = 0

        for key in curr_team_matches:
            games_total += 1
            match_calc = matches.iloc[key]

            if match_calc['FTR'] == 'H' and match_calc['HomeTeam'] == curr_team:
                wins_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['AwayTeam'] == curr_team:
                wins_total += 1

            if match_calc['FTR'] == 'H' and match_calc['AwayTeam'] == curr_team:
                losses_total += 1
            elif match_calc['FTR'] == 'A' and match_calc['HomeTeam'] == curr_team:
                losses_total += 1

            if match_calc['FTR'] == 'D':
                draws_total += 1

            if match_calc['HomeTeam'] == curr_team:
                goals_scored_total += match_calc['FTHG']
                goals_conceded_total += match_calc['FTAG']
            elif match_calc['AwayTeam'] == curr_team:
                goals_scored_total += match_calc['FTAG']
                goals_conceded_total += match_calc['FTHG']

        home_or_away = 2
        if matches.iloc[match_key]['HomeTeam'] == curr_team:
            home_or_away = 1
        match[match_key][home_or_away] = [games_total, wins_total, draws_total,
                                          losses_total, goals_scored_total, goals_conceded_total]


In [7]:
matches_nn_input = []
rows_to_drop = []
for key, value in match.items():
    if np.count_nonzero(match[key][1]) == 0 or np.count_nonzero(match[key][2]) == 0:
        # print(key, value)
        rows_to_drop.append(key)
    else:
        matches_nn_input.append(match[key][1][1:] + match[key][2][1:])

matches = matches.drop(rows_to_drop)
matches.index = range(len(matches))
matches_nn_input = np.array(matches_nn_input)

full_time_results = matches['FTR']

In [38]:
# output_class = ['H', 'D', 'A']
output_class = [1, 0, 2]

output_final_ints = []
for res in full_time_results:
    if res == 'H':
        output_final_ints.append(1)
    elif res == 'A':
        output_final_ints.append(2)
    else:
        output_final_ints.append(0)
output_final_ints = np.array(output_final_ints)

train_input, test_input, train_output, test_output =\
    train_test_split(matches_nn_input, output_final_ints, test_size=0.25, shuffle=False)


In [62]:
hidden_layer_1 = 10
hidden_layer_2 = 10
hidden_layer_3 = 10
hidden_layer_4 = 10

In [63]:
model = keras.Sequential([keras.layers.Flatten(),
                          keras.layers.Dense(hidden_layer_1, activation=tf.nn.relu),
                          keras.layers.Dense(hidden_layer_2, activation=tf.nn.relu),
                          keras.layers.Dense(hidden_layer_3, activation=tf.nn.relu),
                          keras.layers.Dense(hidden_layer_4, activation=tf.nn.relu),
                          keras.layers.Dense(len(output_class), activation=tf.nn.softmax)])

In [64]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
#               loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_input, train_output, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f2546b02198>

In [65]:
test_loss, test_acc = model.evaluate(test_input, test_output)
print('Test accuracy:', test_acc)

prediction = model.predict(test_input)

Test accuracy: 0.46032405
