In [14]:
import pandas as pd
import numpy as np

from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Subtract
from tensorflow.keras.utils import plot_model

In [2]:
games_season = pd.read_csv('inputs/games_season.csv')
games_tourney = pd.read_csv('inputs/games_tourney.csv')
games_season.head()

Unnamed: 0,season,team_1,team_2,home,score_diff,score_1,score_2,won
0,1985,3745,6664,0,17,81,64,1
1,1985,126,7493,1,7,77,70,1
2,1985,288,3593,1,7,63,56,1
3,1985,1846,9881,1,16,70,54,1
4,1985,2675,10298,1,12,86,74,1


In [3]:
#Count the number of teams.
n_teams = np.unique(games_season[['team_1', 'team_2']]).shape[0]
n_teams

10888

In [4]:
#Create an embedding layer.
#The embedding layer maps each team ID to a single number representing the team's strngth.

team_lookup = Embedding(input_dim = n_teams, output_dim = 1, 
                       input_length = 1, name = 'Team-Strength')


### Define the team model.

In [5]:
#Create an input layer from the team ID.
teamid_in = Input(shape = (1, ))

#Look for the input in the team strength embedding layer.
strength_lookup = team_lookup(teamid_in)

#Flatten the output.
strength_lookup_flat = Flatten()(strength_lookup)

#Combine the operations into a single, re-usable model.
team_strength_model = Model(teamid_in, strength_lookup_flat, name = 'Team-Strength-Model')

2022-06-10 17:09:25.640855: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-06-10 17:09:25.640929: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-06-10 17:09:25.641027: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (debonair): /proc/driver/nvidia/version does not exist
2022-06-10 17:09:25.648562: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
team_strength_model.compile(loss = 'mae', 
                           optimizer = 'adam')

team_strength_model.summary()

Model: "Team-Strength-Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1)]               0         
                                                                 
 Team-Strength (Embedding)   (None, 1, 1)              10888     
                                                                 
 flatten (Flatten)           (None, 1)                 0         
                                                                 
Total params: 10,888
Trainable params: 10,888
Non-trainable params: 0
_________________________________________________________________


### Define 2 inputs.

In [8]:
team_1_in = Input(shape = (1, ), name = 'Team-1-In')
team_2_in = Input(shape = (1, ), name = 'Team-2-In')

### Lookup both inputs in the same model.

In [10]:
#Lookup team 1 in the team strength model.
team_1_strength = team_strength_model(team_1_in)

#Lookup team 2 in the team strength model.
team_2_strength = team_strength_model(team_2_in)

### Output layer using shared layers.

In [12]:
#Create a subtract layer from keras.
score_diff = Subtract()([team_1_strength, team_2_strength])

In [13]:
#Create the model.
model = Model([team_1_in, team_2_in], score_diff)

#Compile the model.
model.compile(optimizer = 'adam',
             loss = 'mean_absolute_error')

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Team-1-In (InputLayer)         [(None, 1)]          0           []                               
                                                                                                  
 Team-2-In (InputLayer)         [(None, 1)]          0           []                               
                                                                                                  
 Team-Strength-Model (Functiona  (None, 1)           10888       ['Team-1-In[0][0]',              
 l)                                                               'Team-2-In[0][0]']              
                                                                                                  
 subtract (Subtract)            (None, 1)            0           ['Team-Strength-Model[1][0]',

In [16]:
plot_model(model, to_file = 'inputs/model.svg')

ValueError: Cannot embed the 'svg' image format

### Fit the model to the regular season training data.'

In [17]:
#Get the colums for training.
input_1 = games_season['team_1']
input_2 = games_season['team_2']

model.fit([input_1, input_2], games_season['score_diff'], 
         epochs = 1, batch_size = 32, validation_split = 0.10, verbose = 1)



<keras.callbacks.History at 0x7f57a97c37c0>

### Evaluate the model on the test data.

In [18]:
#Get team for the tournament data.
input_1 = games_tourney['team_1']
input_2 = games_tourney['team_2']

#Evaluate model.
model.evaluate([input_1, input_2], games_tourney['score_diff'], verbose = 1)



11.617619514465332