In [216]:
# Prepair the data
import pandas as pd

# Read & create the data frame
data = pd.read_csv('data.csv')
x = data.filter(['season', 'localTeam', 'visitorTeam'])
y = data.filter(['localGoals', 'visitorGoals'])

# Clean the data frame (by converting the team names with numbers)
teams = {}
counter = 1
for team in x['localTeam']:
    try:
        team = teams[team]
    except:
        teams[team] = counter
        counter += 1
for team in x['visitorTeam']:
    try:
        team = teams[team]
    except:
        teams[team] = counter
        counter += 1

x['season'] = x['season'].apply(lambda l: l.split("-")[0])
x['localTeam'] = x['localTeam'].apply(lambda l: teams[l])
x['visitorTeam'] = x['visitorTeam'].apply(lambda l: teams[l])

# Train and save the model
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
    
model = DecisionTreeClassifier()
model.fit(x_train, y_train)
joblib.dump(model, 'model-data.joblib')



['model-data.joblib']

In [217]:
# Find the accurecy
import joblib
from sklearn.metrics import accuracy_score

model = joblib.load('model-data.joblib')
predictions = model.predict(x_test)

p_local = [pre[0] for pre in predictions]
p_visitor = [pre[1] for pre in predictions]
y_local = [ y for y in y_test['localGoals']]
y_visitor = [ y for y in y_test['visitorGoals']]

local_score = accuracy_score(y_local, p_local)
visitor_score = accuracy_score(y_visitor, p_visitor)


print(visitor_score, local_score)

0.3364737550471063 0.2519515477792732


In [234]:
# Export the decision tree graphical file
from sklearn import tree
tree.export_graphviz(model, out_file="tree.dot", feature_names=["season", "localTeam", "visitorTeam"], 
                     label="all", filled=True, rounded=True)


In [241]:
def predict(year, local, visitor):
    pre = model.predict([[year, teams[local], teams[visitor]]])

    print("Match in    :", str(year))
    print("LocalTeam   :", local)
    print("VisitorTeam :", visitor)
    print("Results     :", pre[0][0], "-", pre[0][1])
    print("#####################")
    print("")

    
# Test 1
local = 'Atletico de Bilbao'
visitor = 'Atletico de Madrid'
year = 2008
predict(year, local, visitor)


# Test 2, Which actually happend in the history
local = 'Barcelona'
visitor = 'Real Madrid'
year = 2003
predict(year, local, visitor)

# Test 3
local = 'Barcelona'
visitor = 'Real Madrid'
year = 2030
predict(year, local, visitor)


Match in    : 2008
LocalTeam   : Atletico de Bilbao
VisitorTeam : Atletico de Madrid
Results     : 1 - 4
#####################

Match in    : 2003
LocalTeam   : Barcelona
VisitorTeam : Real Madrid
Results     : 1 - 2
#####################

Match in    : 2030
LocalTeam   : Barcelona
VisitorTeam : Real Madrid
Results     : 2 - 2
#####################

