In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
from sklearn.model_selection import train_test_split

In [6]:
data = pd.read_csv("data/pl-data-cleaned.csv", index_col=0)
data.drop(columns=["season", "match_date", "home_team_score", "away_team_score"], inplace=True)

team_names = data["home_team"].unique()
team_names.sort()

team_mapping = {}
for i, team in enumerate(team_names):
    team_mapping[team] = i

data["home_team"] = data["home_team"].map(team_mapping)
data["away_team"] = data["away_team"].map(team_mapping)

X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [61]:
model = models.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [62]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [63]:
model.summary()

In [64]:
model.fit(X, y, batch_size=16, epochs=10, validation_split=0.2)

Epoch 1/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 44ms/step - accuracy: 0.6508 - loss: 1.6153 - val_accuracy: 0.5556 - val_loss: 1.0746
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5911 - loss: 0.8378 - val_accuracy: 0.5889 - val_loss: 0.8368
Epoch 3/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5918 - loss: 0.7183 - val_accuracy: 0.5778 - val_loss: 0.7878
Epoch 4/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5955 - loss: 0.7168 - val_accuracy: 0.5889 - val_loss: 0.7585
Epoch 5/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6124 - loss: 0.6783 - val_accuracy: 0.5778 - val_loss: 0.7410
Epoch 6/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6037 - loss: 0.6764 - val_accuracy: 0.6000 - val_loss: 0.7400
Epoch 7/10
[1m23/23[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f834fd84da0>

In [65]:
from sklearn.metrics import accuracy_score

In [73]:
y_pred_test = model.predict(X_test)
y_pred_train = model.predict(X_train)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step


In [74]:
y_pred_test = (y_pred_test > 0.5).astype("int64")
y_pred_train = (y_pred_train > 0.5).astype("int64")

In [76]:
print(accuracy_score(y_test, y_pred_test))
accuracy_score(y_train, y_pred_train)

0.6


0.6361111111111111

In [77]:
import pickle

In [78]:
with open("models/nn_predictor.pkl", "wb") as file:
    pickle.dump(model, file)