In [18]:
# run this to shorten the data import from the files
path_data = '/home/nero/Documents/Estudos/DataCamp/Python/Advanced_Deep_Learning_with_Keras/datasets/'

import pandas as pd
import numpy as np

In [19]:
games_season = pd.read_csv(path_data + 'games_season.csv')
games_season.head()

Unnamed: 0,season,team_1,team_2,home,score_diff,score_1,score_2,won
0,1985,3745,6664,0,17,81,64,1
1,1985,126,7493,1,7,77,70,1
2,1985,288,3593,1,7,63,56,1
3,1985,1846,9881,1,16,70,54,1
4,1985,2675,10298,1,12,86,74,1


In [20]:
# exercise 01

"""
Define team lookup

Shared layers allow a model to use the same weight matrix for multiple steps. In this exercise, you will build a "team strength" layer that represents each team by a single number. You will use this number for both teams in the model. The model will learn a number for each team that works well both when the team is team_1 and when the team is team_2 in the input data.

The games_season DataFrame is available in your workspace.
"""

# Instructions

"""

    Count the number of unique teams.
    Create an embedding layer that maps each team ID to a single number representing that team's strength.
    The output shape should be 1 dimension (as we want to represent the teams by a single number).
    The input length should be 1 dimension (as each team is represented by exactly one id).

"""

# solution

# Imports
from tensorflow.keras.layers import Embedding
from numpy import unique

# Count the unique number of teams
n_teams = unique(games_season[['team_1','team_2']]).shape[0]

# Create an embedding layer
team_lookup = Embedding(input_dim=n_teams,
                        output_dim=1,
                        input_length=1,
                        name='Team-Strength')

#----------------------------------#

# Conclusion

"""
The embedding layer is a lot like a dictionary, but your model learns the values for each key.
"""

'\nThe embedding layer is a lot like a dictionary, but your model learns the values for each key.\n'

In [21]:
# exercise 02

"""
Define team model

The team strength lookup has three components: an input, an embedding layer, and a flatten layer that creates the output.

If you wrap these three layers in a model with an input and output, you can re-use that stack of three layers at multiple places.

Note again that the weights for all three layers will be shared everywhere we use them.
"""

# Instructions

"""

    Create a 1D input layer for the team ID (which will be an integer). Be sure to set the correct input shape!
    Pass this input to the team strength lookup layer you created previously.
    Flatten the output of the team strength lookup.
    Create a model that uses the 1D input as input and flattened team strength as output.

"""

# solution

# Imports
from tensorflow.keras.layers import Input, Embedding, Flatten
from tensorflow.keras.models import Model

# Create an input layer for the team ID
teamid_in = Input(shape=(1,))

# Lookup the input in the team strength embedding layer
strength_lookup = team_lookup(teamid_in)

# Flatten the output
strength_lookup_flat = Flatten()(strength_lookup)

# Combine the operations into a single, re-usable model
team_strength_model = Model(teamid_in, strength_lookup_flat, name='Team-Strength-Model')

#----------------------------------#

# Conclusion

"""
The model will be reusable, so you can use it in two places in your final model.
"""

'\nThe model will be reusable, so you can use it in two places in your final model.\n'

In [24]:
team_strength_model.compile(optimizer='adam', loss='mae')

In [23]:
team_strength_model.save(path_data + 'team_strength_model.h5')



In [8]:
# exercise 03

"""
Defining two inputs

In this exercise, you will define two input layers for the two teams in your model. This allows you to specify later in the model how the data from each team will be used differently.
"""

# Instructions

"""

    Create an input layer to use for team 1. Recall that our input dimension is 1.
    Name the input "Team-1-In" so you can later distinguish it from team 2.
    Create an input layer to use for team 2, named "Team-2-In".

"""

# solution

# Load the input layer from tensorflow.keras.layers
from tensorflow.keras.layers import Input

# Input layer for team 1
team_in_1 = Input(shape = (1, ), name = "Team-1-In")

# Separate input layer for team 2
team_in_2 = Input(shape = (1, ), name = "Team-2-In")

#----------------------------------#

# Conclusion

"""
These two inputs will be used later for the shared layer.
"""

'\nThese two inputs will be used later for the shared layer.\n'

In [9]:
# exercise 04

"""
Lookup both inputs in the same model

Now that you have a team strength model and an input layer for each team, you can lookup the team inputs in the shared team strength model. The two inputs will share the same weights.

In this dataset, you have 10,888 unique teams. You want to learn a strength rating for each team, such that if any pair of teams plays each other, you can predict the score, even if those two teams have never played before. Furthermore, you want the strength rating to be the same, regardless of whether the team is the home team or the away team.

To achieve this, you use a shared layer, defined by the re-usable model (team_strength_model()) you built in exercise 3 and the two input layers (team_in_1 and team_in_2) from the previous exercise, all of which are available in your workspace.
"""

# Instructions

"""

    Lookup the first team ID in the team strength model.
    Lookup the second team ID in the team strength model.

"""

# solution

# Lookup the first team ID in the team strength model
team_1_strength = team_strength_model(team_in_1)

# Lookup the second team ID in the team strength model
team_2_strength = team_strength_model(team_in_2)


#----------------------------------#

# Conclusion

"""
Now your model knows how strong each team is.
"""

'\nNow your model knows how strong each team is.\n'

In [11]:
# exercise 05

"""
Output layer using shared layer

Now that you've looked up how "strong" each team is, subtract the team strengths to determine which team is expected to win the game.

This is a bit like the seeds that the tournament committee uses, which are also a measure of team strength. But rather than using seed differences to predict score differences, you'll use the difference of your own team strength model to predict score differences.

The subtract layer will combine the weights from the two layers by subtracting them.
"""

# Instructions

"""

    Import the Subtract layer from keras.layers.
    Combine the two-team strength lookups you did earlier.

"""

# solution

# Import the Subtract layer from tensorflow.keras
from tensorflow.keras.layers import Subtract

# Create a subtract layer using the inputs from the previous exercise
score_diff = Subtract()([team_1_strength, team_2_strength])

#----------------------------------#

# Conclusion

"""
This setup subracts the team strength ratings to determine a winner.
"""

'\nThis setup subracts the team strength ratings to determine a winner.\n'

In [12]:
# exercise 06

"""
Model using two inputs and one output

Now that you have your two inputs (team id 1 and team id 2) and output (score difference), you can wrap them up in a model so you can use it later for fitting to data and evaluating on new data.

Your model will look like the following diagram:
"""

# Instructions

"""

    Define a model with the two teams as inputs and use the score difference as the output.
    Compile the model with the 'adam' optimizer and 'mean_absolute_error' loss.

"""

# solution

# Imports
from tensorflow.keras.layers import Subtract
from tensorflow.keras.models import Model

# Subtraction layer from previous exercise
score_diff = Subtract()([team_1_strength, team_2_strength])

# Create the model
model = Model([team_in_1, team_in_2], score_diff)

# Compile the model
model.compile(optimizer = 'adam', loss = 'mean_absolute_error')

#----------------------------------#

# Conclusion

"""
Now your model is finalized and ready to fit to data.
"""

'\nNow your model is finalized and ready to fit to data.\n'

In [13]:
# exercise 07

"""
Fit the model to the regular season training data

Now that you've defined a complete team strength model, you can fit it to the basketball data! Since your model has two inputs now, you need to pass the input data as a list.
"""

# Instructions

"""

    Assign the 'team_1' and 'team_2' columns from games_season to input_1 and input_2, respectively.
    Use 'score_diff' column from games_season as the target.
    Fit the model using 1 epoch, a batch size of 2048, and a 10% validation split.

"""

# solution

# Get the team_1 column from the regular season data
input_1 = games_season['team_1']

# Get the team_2 column from the regular season data
input_2 = games_season['team_2']

# Fit the model to input 1 and 2, using score diff as a target
model.fit([input_1, input_2],
          games_season['score_diff'],
          epochs=1,
          batch_size=2048,
          validation_split=0.10,
          verbose=True)

#----------------------------------#

# Conclusion

"""
Now our model has learned a strength rating for every team.
"""



'\nNow our model has learned a strength rating for every team.\n'

In [14]:
games_tourney = pd.read_csv(path_data + 'games_tourney.csv')

In [15]:
# exercise 08

"""
Evaluate the model on the tournament test data

The model you fit to the regular season data (model) in the previous exercise and the tournament dataset (games_tourney) are available in your workspace.

In this exercise, you will evaluate the model on this new dataset. This evaluation will tell you how well you can predict the tournament games, based on a model trained with the regular season data. This is interesting because many teams play each other in the tournament that did not play in the regular season, so this is a very good check that your model is not overfitting.
"""

# Instructions

"""

    Assign the 'team_1' and 'team_2' columns from games_tourney to input_1 and input_2, respectively.
    Evaluate the model.

"""

# solution

# Get team_1 from the tournament data
input_1 = games_tourney['team_1']

# Get team_2 from the tournament data
input_2 = games_tourney['team_2']

# Evaluate the model using these inputs
print(model.evaluate([input_1, input_2], games_tourney['score_diff'], verbose=False))

#----------------------------------#

# Conclusion

"""
Great job! Its time to move on to models with more than two inputs.
"""

11.682635307312012


'\nGreat job! Its time to move on to models with more than two inputs.\n'