In [109]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [110]:
# Load dataset and remove player ID
dataset_name = "output_dataset.csv"
df = pd.read_csv(dataset_name)
df = df.iloc[:, 1:]
df

Unnamed: 0,Ones,Twos,Threes,Fours,Fives,Sixes,Total,Bonus,3 of a Kind,4 of a Kind,Full House,Small Straight,Large Straight,Chance,Yahtzee,Score
0,,,,,,,,,,,,30.0,,,,271
1,,,,,,18.0,,,,,,30.0,,,,271
2,,,,,,18.0,,,25.0,,,30.0,,,,271
3,,,,12.0,,18.0,,,25.0,,,30.0,,,,271
4,4.0,,,12.0,,18.0,,,25.0,,,30.0,,,,271
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094,2.0,,9.0,16.0,20.0,18.0,,,27.0,29.0,,,40.0,22.0,,273
1095,2.0,,9.0,16.0,20.0,18.0,,,27.0,29.0,,30.0,40.0,22.0,,273
1096,2.0,0.0,9.0,16.0,20.0,18.0,65.0,35.0,27.0,29.0,,30.0,40.0,22.0,,273
1097,2.0,0.0,9.0,16.0,20.0,18.0,65.0,35.0,27.0,29.0,25.0,30.0,40.0,22.0,,273


In [111]:
# Add masking columns
masked_df = df.copy(deep=True)
for column in masked_df.columns:
    masked_df[f'{column} Mask'] = masked_df[column].isna().astype(float)
masked_df

Unnamed: 0,Ones,Twos,Threes,Fours,Fives,Sixes,Total,Bonus,3 of a Kind,4 of a Kind,...,Total Mask,Bonus Mask,3 of a Kind Mask,4 of a Kind Mask,Full House Mask,Small Straight Mask,Large Straight Mask,Chance Mask,Yahtzee Mask,Score Mask
0,,,,,,,,,,,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
1,,,,,,18.0,,,,,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
2,,,,,,18.0,,,25.0,,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
3,,,,12.0,,18.0,,,25.0,,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
4,4.0,,,12.0,,18.0,,,25.0,,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094,2.0,,9.0,16.0,20.0,18.0,,,27.0,29.0,...,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0
1095,2.0,,9.0,16.0,20.0,18.0,,,27.0,29.0,...,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1096,2.0,0.0,9.0,16.0,20.0,18.0,65.0,35.0,27.0,29.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1097,2.0,0.0,9.0,16.0,20.0,18.0,65.0,35.0,27.0,29.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [112]:
# Replace NaN with -1
masked_df = masked_df.fillna(-1)
masked_df

Unnamed: 0,Ones,Twos,Threes,Fours,Fives,Sixes,Total,Bonus,3 of a Kind,4 of a Kind,...,Total Mask,Bonus Mask,3 of a Kind Mask,4 of a Kind Mask,Full House Mask,Small Straight Mask,Large Straight Mask,Chance Mask,Yahtzee Mask,Score Mask
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
1,-1.0,-1.0,-1.0,-1.0,-1.0,18.0,-1.0,-1.0,-1.0,-1.0,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
2,-1.0,-1.0,-1.0,-1.0,-1.0,18.0,-1.0,-1.0,25.0,-1.0,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
3,-1.0,-1.0,-1.0,12.0,-1.0,18.0,-1.0,-1.0,25.0,-1.0,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
4,4.0,-1.0,-1.0,12.0,-1.0,18.0,-1.0,-1.0,25.0,-1.0,...,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094,2.0,-1.0,9.0,16.0,20.0,18.0,-1.0,-1.0,27.0,29.0,...,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0
1095,2.0,-1.0,9.0,16.0,20.0,18.0,-1.0,-1.0,27.0,29.0,...,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1096,2.0,0.0,9.0,16.0,20.0,18.0,65.0,35.0,27.0,29.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1097,2.0,0.0,9.0,16.0,20.0,18.0,65.0,35.0,27.0,29.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [None]:
seed = 42

# Split the train and test data
X = masked_df.loc[:, masked_df.columns != 'Score']
y = masked_df['Score']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Normalize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y

InvalidIndexError: (slice(None, None, None), slice(None, 15, None))

In [119]:
# Deep Neural Network
model = models.Sequential([
    layers.InputLayer(shape=(31,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

In [120]:
# Compile the model
model.compile(optimizer='adam', loss='mean_absolute_error')

In [122]:
# Train the model
history = model.fit(X_train, y_train, epochs=5, validation_split=0.1)

Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 23.9859 - val_loss: 24.5872
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22.5226 - val_loss: 24.4275
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 20.7269 - val_loss: 24.4093
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 22.4863 - val_loss: 24.3078
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 23.1018 - val_loss: 24.2821


In [123]:
# Test loss
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 30.4439 
Test Loss: 28.928558349609375


In [126]:
# Make predictions on the test data
predictions = model.predict(X_test)
print(X_test[0], predictions[0])

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[-1.16776105 -0.9745158  -0.87039852 -0.89445482 -0.98205915 -0.99477781
 -0.44313977 -0.36539558 -0.89339855 -0.62399453  0.7696349  -1.38268584
 -0.83898463 -1.09786772 -0.46729644  1.45499872  1.06709237  0.92012969
  0.93712599  1.02418311  1.02885762  0.44812908  0.44812908  0.93712599
  0.83481207 -0.7454879   1.3892444   0.92435403  1.12492841  0.84663857
  0.        ] [246.29166]
