<a href="https://colab.research.google.com/github/Qhwelah/NeuralBlock-NeuralNetPredictor/blob/main/NeuralNetPredictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Data Prepping for the Models**

In [64]:
# !pip3 uninstall gdown -y && pip3 install gdown
import gdown

url = "https://drive.google.com/uc?id=1UPXnC2oy20r4Yesw7lPuDRHlsoR0l0DV"
output = "data.csv"
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1UPXnC2oy20r4Yesw7lPuDRHlsoR0l0DV
To: /content/data.csv
100%|██████████| 8.96M/8.96M [00:00<00:00, 44.0MB/s]


'data.csv'

In [65]:
## import the data
import pandas as pd

df = pd.read_csv("data.csv")

print(df)

      xRot   yRot                block_1  block_1_dist                block_2  \
0    81.45  26.98   minecraft:oak_leaves             6   minecraft:oak_planks   
1    59.47   2.02   minecraft:oak_planks             4        minecraft:water   
2   -58.81   3.46         minecraft:dirt           159        minecraft:stone   
3    61.45  36.54   minecraft:oak_leaves            40        minecraft:stone   
4   -44.43  84.42  minecraft:grass_block            56        minecraft:stone   
..     ...    ...                    ...           ...                    ...   
115  23.38  11.33   minecraft:oak_planks            11        minecraft:water   
116   1.92  50.17         minecraft:dirt            72   minecraft:oak_planks   
117 -64.33  83.93   minecraft:oak_planks            21  minecraft:grass_block   
118  -1.77   5.55        minecraft:stone           146        minecraft:stone   
119  11.65  76.97          minecraft:air           113   minecraft:oak_planks   

     block_2_dist          

In [66]:
## Getting all values of non-numeric types only
df.info()
non_numeric_values = df.select_dtypes(exclude=["float", "int"])
# print(non_numeric_values)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Columns: 7202 entries, xRot to block_3600_dist
dtypes: float64(2), int64(3600), object(3600)
memory usage: 6.6+ MB


In [67]:
## Mapping all non-numeric data values to some numeric values

# parameters:
columns_to_check_for_unique_values = 20

# internal values:
block_mapping_conversions = []

print("Event log for block conversion pushes: ")
j = 1  # iterator for determining which block column to use
while(j < columns_to_check_for_unique_values+1):
  block_column = f"block_{j}"
  block_mapping = df[block_column].factorize()
  # print(f"Conversion values for {block_column}: ")
  # print(block_mapping)
  # print()
  i = 0  # iterator for checking within a single column's conversion mapping
  while i < len(block_mapping[0]):
    mapped_index = block_mapping[0][i]
    if mapped_index == -1:
      i += 1
      continue
    new_conversion = block_mapping[1][mapped_index]
    if new_conversion not in block_mapping_conversions:
      block_mapping_conversions.append(new_conversion)
      print(f"pushing {new_conversion} to values at index {len(block_mapping_conversions)-1}")
    i += 1
  j += 1

block_mapping_conversions.append('unknown')
print(f"pushing unknown to values at index {len(block_mapping_conversions)-1}")

print()
print("Final conversion values: ")
print(block_mapping_conversions)

Event log for block conversion pushes: 
pushing minecraft:oak_leaves to values at index 0
pushing minecraft:oak_planks to values at index 1
pushing minecraft:dirt to values at index 2
pushing minecraft:grass_block to values at index 3
pushing minecraft:air to values at index 4
pushing minecraft:stone to values at index 5
pushing minecraft:water to values at index 6
pushing minecraft:oak_log to values at index 7
pushing unknown to values at index 8

Final conversion values: 
['minecraft:oak_leaves', 'minecraft:oak_planks', 'minecraft:dirt', 'minecraft:grass_block', 'minecraft:air', 'minecraft:stone', 'minecraft:water', 'minecraft:oak_log', 'unknown']


In [68]:
## Converting actual data values in the dataframe to numbers using block mapping
def convertBlock(val):
  for i in range(len(block_mapping_conversions)):
    if val == block_mapping_conversions[i]:
      return i
  return -1

block_columns = [col for col in df.columns if col.startswith("block_") and not col.endswith("_dist")]
# print(block_columns)
df_numerical = df.copy() # make a backup copy of the dataframe to reference later
for col in block_columns:
  df_numerical[col] = df_numerical[col].apply(convertBlock)

print()
print(df_numerical) # the numerized dataframe


      xRot   yRot  block_1  block_1_dist  block_2  block_2_dist  block_3  \
0    81.45  26.98        0             6        1            94        7   
1    59.47   2.02        1             4        6             9        4   
2   -58.81   3.46        2           159        5            75        4   
3    61.45  36.54        0            40        5            21        4   
4   -44.43  84.42        3            56        5            41        5   
..     ...    ...      ...           ...      ...           ...      ...   
115  23.38  11.33        1            11        6            50        4   
116   1.92  50.17        2            72        1            93        1   
117 -64.33  83.93        1            21        3           110        5   
118  -1.77   5.55        5           146        5           151        5   
119  11.65  76.97        4           113        1            18        3   

     block_3_dist  block_4  block_4_dist  ...  block_3596  block_3596_dist  \
0       

**KNN Model Predictions**

In [69]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [70]:
distance_columns = [col for col in df_numerical.columns if 'dist' in col]

target_distance_column = 'block_5_dist'

for col in distance_columns:
    if col != target_distance_column:
        df_numerical[col] = (df_numerical[col] - df_numerical[target_distance_column]).abs()

df_numerical = df_numerical.drop(columns=[target_distance_column])

In [71]:
target_block = 'block_5'

X = df_numerical.drop(columns=[target_block])
y = df_numerical[target_block]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [72]:
knn = KNeighborsClassifier(n_neighbors=26, weights='distance')
knn.fit(X_train, y_train)

In [73]:
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"KNN model accuracy: {accuracy:.2f}")

KNN model accuracy: 0.38


**Neural Network via TensorFlow Predictions**

In [74]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

Data Masking and Test/Train Splits

In [75]:
def mask_entries(X, mask_candidate_column_indices, missing_val=9, missing_rate=0.1):
    print("started")
    X_masked = X.copy()
    mask = pd.DataFrame(False, index=X.index, columns=X.columns)
    print("copies made")

    # Randomly selects (missing_rate)% of the data columns provided in mask_candidate_column_indices
    #  to "mask" (swap out for the missing_val) for each individual data row.
    for i in range(X.shape[0]):  # for each row
        num_mask = max(1, int(len(mask_candidate_column_indices) * missing_rate))
        mask_indices = np.random.choice(mask_candidate_column_indices, num_mask, replace=False)
        mask_columns = X.columns[mask_indices]
        # print(mask_indices)
        X_masked.loc[i, mask_columns] = missing_val
        mask.loc[i, mask_columns] = True  # track what was masked

    print("Exited masking loop")
    return X_masked, mask

In [76]:
## Parameters --- ##
# Percentage of data to mask and ask the model to fill in (0-1):
missing_rate = 0.1
# The location in the conversion list where the 'unknown' conversion was appended
missing_val = len(block_mapping_conversions)-1
## -------------- ##

# Data prepping
X_NN = df_numerical.copy()
print(X_NN.shape)

# Train/Test splits
from sklearn.model_selection import train_test_split
X_NN_train, X_NN_test = train_test_split(X_NN, test_size=0.2, random_state=42)

# Save current indices from overall list and reset the train/test indices each starting at index 0
train_indices = X_NN_train.index.copy()
test_indices = X_NN_test.index.copy()
X_NN_train = X_NN_train.reset_index(drop=True)
X_NN_test = X_NN_test.reset_index(drop=True)

# Masking data in each row for prediction later
masking_candidates = [col for col in df_numerical.columns if not col.endswith("_dist") and not col.endswith("Rot")]
  # exclude rotation values from masking, as well as distance values. Currently it only subs in fake *block ids*, not distances.
masking_candidate_indices = [df_numerical.columns.get_loc(col) for col in masking_candidates]

X_NN_train_masked, train_mask = mask_entries(X_NN_train, masking_candidate_indices, missing_val, missing_rate)
X_NN_test_masked, test_mask = mask_entries(X_NN_test, masking_candidate_indices, missing_val, missing_rate)

print("Training data:")
print(X_NN_train_masked.shape)

print()
print("Testing data:")
print(X_NN_test_masked.shape)

(120, 7201)
started
copies made
Exited masking loop
started
copies made
Exited masking loop
Training data:
(96, 7201)

Testing data:
(24, 7201)


In [77]:
print("Training data:")
print(X_NN_train_masked)

print()
print("Testing data:")
print(X_NN_test_masked)

Training data:
     xRot   yRot  block_1  block_1_dist  block_2  block_2_dist  block_3  \
0  -63.50  25.09        7            58        5             8        1   
1   22.69  22.52        5            35        5            38        7   
2  -30.97  52.76        4            48        8            40        5   
3  -83.30  15.05        2            51        6            11        5   
4   60.84  11.27        4            93        6            89        7   
..    ...    ...      ...           ...      ...           ...      ...   
91  81.58  28.55        5            19        6             4        1   
92  21.75  81.58        7            55        8            11        1   
93 -41.43  10.16        4            86        5            38        1   
94   2.65   8.53        8            44        8             9        1   
95 -71.45  54.84        1            95        3             2        0   

    block_3_dist  block_4  block_4_dist  ...  block_3596  block_3596_dist  \
0      

TensorFlow Model

In [78]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [79]:
X_train_input = X_NN_train_masked.values  # Masked input
y_train_target = X_NN_train.values        # Original target

X_test_input = X_NN_test_masked.values
y_test_target = X_NN_test.values

# Model building
input_dim = X_train_input.shape[1]

model = keras.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(2048, activation='relu'),
    layers.Dense(1024, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(input_dim)  # output size = input size, no activation
])

model.summary()


In [80]:
# Define the loss function
def masked_mse_loss(y_true, y_pred):
    missing_val = 9.0  # whatever you used during masking

    # Create a mask: 1 if y_true == missing_val, else 0
    mask = tf.cast(tf.equal(y_true, missing_val), dtype=tf.float32)

    # Loss only where mask == 1
    loss = tf.square(y_true - y_pred) * mask

    # Average the loss only over the masked elements
    loss_sum = tf.reduce_sum(loss)
    num_masked = tf.reduce_sum(mask)

    # avoid division by zero
    return tf.cond(num_masked > 0, lambda: loss_sum / num_masked, lambda: tf.constant(0.0))


In [81]:
# compile TensorFlow model
model.compile(optimizer='adam', loss=masked_mse_loss)

In [82]:
# Model Training and Hyperparameters
history = model.fit(
    X_train_input, y_train_target,
    validation_data=(X_test_input, y_test_target),
    epochs=50,
    batch_size=32
)

Epoch 1/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 425ms/step - loss: 301.9956 - val_loss: 154.4536
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 653ms/step - loss: 120.5138 - val_loss: 84.8493
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 359ms/step - loss: 70.5280 - val_loss: 76.2467
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 354ms/step - loss: 54.4414 - val_loss: 64.2219
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 348ms/step - loss: 40.6081 - val_loss: 72.5957
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 350ms/step - loss: 32.0064 - val_loss: 78.1542
Epoch 7/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 346ms/step - loss: 31.9285 - val_loss: 64.6208
Epoch 8/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 336ms/step - loss: 20.5397 - val_loss: 58.1338
Epoch 9/50
[1m3/3[0m [32m━━━━━━━━━

In [83]:
# applying the model to make our predictions
predictions = model.predict(X_test_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step


In [84]:
print(predictions)

[[ 1.7210871 -3.1851954  3.245874  ... -4.8067675  4.656707  -2.2068124]
 [ 2.222076  -2.695091   2.5393176 ... -4.0886908  3.433571  -1.7120732]
 [ 1.6621848 -2.601854   2.5674028 ... -3.9741967  3.4968452 -1.8197329]
 ...
 [ 2.436329  -3.6374373  3.5539448 ... -4.889296   4.223875  -2.5482621]
 [ 1.8860097 -2.7537384  2.613662  ... -3.3857663  2.9287193 -1.5278008]
 [ 1.805586  -3.2791007  3.1268415 ... -3.9903526  4.23667   -1.8596596]]
