### Import basic data about games

In [198]:
import tensorflow as tf
import numpy as np
import pandas as pd

CSV_COLUMN_NAMES = ['team_abbreviation_home', 'team_abbreviation_away', 'wl_home']
CSV_RESULTS = ['L', 'W']
data_path = 'game.csv'

data = pd.read_csv(data_path, usecols=CSV_COLUMN_NAMES, header=0)
data.dropna(inplace=True)
data.loc[data['wl_home'] == 'L', 'wl_home'] = 0
data.loc[data['wl_home'] == 'W', 'wl_home'] = 1
data = data.astype({'team_abbreviation_home' : 'string'})
data = data.astype({'team_abbreviation_away' : 'string'})
data = data.astype({'wl_home' : 'int32'})

In [199]:
data.head()

Unnamed: 0,team_abbreviation_home,wl_home,team_abbreviation_away
0,HUS,0,NYK
1,BOM,1,PIT
2,PRO,1,BOS
3,CHS,1,NYK
4,DEF,0,WAS


### Divide data into training and testing sets

In [200]:
train_data = pd.DataFrame(data.sample(frac=0.8, random_state=25))
test_data = pd.DataFrame(data.drop(train_data.index))

print(f'Training data size: {train_data.shape[0]}')
print(f'Testing data size: {test_data.shape[0]}')

Training data size: 49888
Testing data size: 12472


In [201]:
train_y = train_data.pop('wl_home')
test_y = test_data.pop('wl_home')

### Define input function

In [202]:
def input_fn(features, labels, training=True, batch_size=256):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    
    if training:
        dataset = dataset.shuffle(1000).repeat()
        
    return dataset.batch(batch_size)

In [203]:
my_feature_columns = []
for key in train_data.keys():
    vocabulary = train_data[key].unique()
    my_feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(key, vocabulary))

my_feature_columns = [tf.feature_column.indicator_column(c) for c in my_feature_columns]
    
print(my_feature_columns)
print(train_data)
print(train_y)

[IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='team_abbreviation_home', vocabulary_list=('WAS', 'PHL', 'MIL', 'NOH', 'GOS', 'SAC', 'CLE', 'HOU', 'PHX', 'ATL', 'DEN', 'INO', 'SEA', 'POR', 'MEM', 'CHH', 'FTW', 'BOS', 'PRO', 'CHI', 'LAC', 'NOK', 'DAL', 'NYN', 'LAL', 'NYK', 'SAN', 'DET', 'MIA', 'NOP', 'CHA', 'BUF', 'IND', 'MIN', 'PHW', 'NJN', 'BAL', 'ORL', 'UTH', 'SAS', 'SDC', 'CIN', 'PHI', 'UTA', 'MNL', 'SDR', 'TOR', 'JET', 'KCK', 'SYR', 'GSW', 'STL', 'MIH', 'VAN', 'ROC', 'BLT', 'CHS', 'BKN', 'SHE', 'OKC', 'CAP', 'SFW', 'AND', 'CHZ', 'HUS', 'NOJ', 'BOM', 'CHP', 'WAT', 'CLR', 'DEF', 'TCB', 'PIT', 'DN'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='team_abbreviation_away', vocabulary_list=('BOS', 'MIL', 'ATL', 'NJN', 'SDC', 'HOU', 'WAS', 'MEM', 'POR', 'CHI', 'PHW', 'MIA', 'IND', 'PHX', 'SYR', 'LAL', 'SDR', 'CHS', 'BUF', 'OKC', 'MIN', 'DEN', 'LAC', 'CHA', 'DET', 'ROC', 'STL', 'GSW', '

### Choose a model and train it

In [205]:
 classifier = tf.estimator.DNNClassifier(
     feature_columns = my_feature_columns,
     hidden_units = [30, 10],
     n_classes = 3
 )
    
classifier.train(
    input_fn=lambda: input_fn(train_data, train_y, training=True), 
    steps=5000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\jakub\AppData\Local\Temp\tmpildbf4ad\model.ckpt-5000
Instructions for updating:
Use standard file utilities to get mtimes.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 5000...
INFO:tensorflow:Saving checkpoints for 5000 into C:\Users\jakub\AppData\Local\Temp\tmpildbf4ad\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 5000...
INFO:tensorflow:loss = 0.783876, step = 5000
INFO:tensorflow:global_step/sec: 794.043
INFO:tensorflow:loss = 0.7803256, step = 5100 (0.127 sec)
INFO:tensorflow:global_step/sec: 1635.46
INFO:tensorflow:loss = 0.8016169, step = 5200 (0.060 sec)
INFO:tensorflow:global_step/sec: 1575.86
INFO:tensorflow:loss = 0.7897626, step = 53

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x16089ca65f0>

In [206]:
eval_result = classifier.evaluate(
    input_fn=lambda: input_fn(test_data, test_y, training=False))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2023-03-22T23:21:11
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\jakub\AppData\Local\Temp\tmpildbf4ad\model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.25804s
INFO:tensorflow:Finished evaluation at 2023-03-22-23:21:11
INFO:tensorflow:Saving dict for global step 10000: accuracy = 0.6193072, average_loss = 0.7041097, global_step = 10000, loss = 0.70434517
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 10000: C:\Users\jakub\AppData\Local\Temp\tmpildbf4ad\model.ckpt-10000

Test set accuracy: 0.619



### Make a prediction

In [207]:
def input_fn(features, batch_size=256):
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

features = ['team_abbreviation_home', 'team_abbreviation_away']
valid_list = list(my_feature_columns[0].categorical_column.vocabulary_list)
predict = {}

print("Please type numeric values as prompted")
for feature in features:
    valid = False
    while not valid:
        val = input(feature + ": ")
        if val in valid_list: 
            valid = True
            
    predict[feature] = [val]

predictions = classifier.predict(input_fn=lambda: input_fn(predict))
for pred_dict in predictions:
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]
    
    print('Prediction is "{}" ({:.1f}%)'.format(
        CSV_RESULTS[class_id], 100 * probability))

Please type numeric values as prompted
team_abbreviation_home: WAS
team_abbreviation_away: DEN
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\jakub\AppData\Local\Temp\tmpildbf4ad\model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Prediction is "W" (58.8%)
