In [68]:
from keras.models import Sequential
from keras.layers import Dense, Input, Concatenate
from keras import Model
from tensorflow import keras

In [69]:
import os
import pandas as pd
import numpy as np
from statistics import mean
import matplotlib.pyplot as plt
%matplotlib inline

In [70]:
def make_model():
  x = Input(shape=(60,), name="Game Play Data")
  y = Input(shape=(3,), name="Selected Character")
  g = Concatenate()([x, y])

  g = Dense(500, activation='relu', input_dim=8)(g)
  g = Dense(100, activation='relu')(g)
  g = Dense(50, activation='relu')(g)
  g = Dense(3, activation='softmax')(g)
  model = Model(inputs=[x, y], outputs=g)
  return model

In [71]:
opt = keras.optimizers.Adam()

In [72]:
type_path = "sample_data/test_data"
graph_data = { }
game_play_data = { }
hash_data = {}
environments = {}

In [73]:
level_cap = 5
configuration_data = {}

for filename in os.listdir(type_path):
    file_path = os.path.join(type_path, filename)
    if os.path.isfile(file_path):
        df = pd.read_csv(file_path)
        if "level_selection" in file_path:
            if filename not in hash_data:
                hash_data[filename] = []
                environments[filename] = {}
            for i in range(len(df)):
                battle_environment = df.loc[i, 'BattleEnvironment']
                battle_id = df.loc[i, 'ID']
                if battle_environment not in environments:
                    environments[filename][battle_environment] = [0, 0, 0, 0, 0]
                if battle_id not in hash_data:
                    level = df.loc[i, 'Level']
                    environments[filename][battle_environment][level] = environments[filename][battle_environment][level] + 1
                    hash_data[filename].append(battle_id)
        elif "level_configuration" in filename:
            for i in range(len(df)):
              config = df.iloc[i].values.tolist()
              print(config)
              configuration_data[config[0]] = config[1:]
              configuration_data["1"] = config[1:]
        else:
            if filename not in game_play_data:
                game_play_data[filename] = {
                    "Brawler": {},
                    "Mage": {},
                    "Swordsman": {}
                }
                for key in game_play_data[filename]:
                    game_play_data[filename][key] = [[], [], [], [], []]
                graph_data[filename] = {}
            adventurer_type = filename.split('_')[0]
            for i in range(len(df)):
                level = df.loc[i, 'Level']
                max_steps = df.loc[i, 'MaxCount']
                game_play_data[filename][adventurer_type][level-1].append(max_steps)
            for key in game_play_data[filename]:
                graph_data[filename][key] = [[], [], [], [], []]
                for x in range(level_cap):
                    if len(game_play_data[filename][key][x]) > 0:
                        graph_data[filename][key][x] = mean(game_play_data[filename][key][x])

['02/08/2022 00:32:31', 0.2001211, 0.1932651, 0.2002864]


In [74]:
print(configuration_data)
print(game_play_data)

{'02/08/2022 00:32:31': [0.2001211, 0.1932651, 0.2002864], '1': [0.2001211, 0.1932651, 0.2002864]}
{'Brawler_1_02_08_2022_00_49.csv': {'Brawler': [[494, 1047, 1453, 9190, 11785, 12378, 14560, 20776, 20776, 378, 2840], [8829, 15548, 18060, 19795, 19863, 20417, 21139, 21762, 27576, 3054, 4660], [10534, 22181, 26814, 30327, 35099, 41009, 46389, 56164, 58369, 7926, 15190], [36723, 46389, 51863, 63547, 76626, 79677, 2328, 3964, 4257, 31775], [77339, 0, 18269, 0, 19783, 0]], 'Mage': [[], [], [], [], []], 'Swordsman': [[], [], [], [], []]}, 'Swordsman_1_02_08_2022_00_49.csv': {'Brawler': [[], [], [], [], []], 'Mage': [[], [], [], [], []], 'Swordsman': [[494, 1453, 2421, 6915, 9918, 11785, 13652, 2252, 9840, 4520], [1220, 1220, 4259, 12314, 13652, 14215, 15112, 21430, 21430, 28681, 5763, 480, 6785, 2015, 15286, 10027, 14770], [14560, 14972, 20417, 22201, 25807, 28365, 28831, 35099, 37490, 42255, 66534, 13000, 13281, 13161], [29660, 34003, 37490, 41312, 48011, 49147, 50170, 64643, 68249, 77007,

In [75]:
print(config)

['02/08/2022 00:32:31', 0.2001211, 0.1932651, 0.2002864]


In [77]:
reviews = { "02/08/2022 00:32:31": [0, 0, 1], "Test2": [0, 0, 1] }
from sklearn.model_selection import train_test_split
y = list(reviews.keys())
labels = []
labeled_config = []
for key in y:
  if key in configuration_data:
    data = configuration_data[key]
    labeled_config.append(data)
    labels.append(reviews[key])

unlabeled_data = []
for key in configuration_data:
  if key not in y:
    data = configuration_data[key]
    unlabeled_data.append(data)

In [78]:
print(labeled_config)
print(config[0])
config = [config, config]
print(labels)
labels.append(labels[0])
print(unlabeled_data)

[[0.2001211, 0.1932651, 0.2002864]]
02/08/2022 00:32:31
[[0, 0, 1]]
[[0.2001211, 0.1932651, 0.2002864]]


In [80]:
print(config)
print(labels)

[['02/08/2022 00:32:31', 0.2001211, 0.1932651, 0.2002864], ['02/08/2022 00:32:31', 0.2001211, 0.1932651, 0.2002864]]
[[0, 0, 1], [0, 0, 1]]


In [81]:
train_data, train_labels, val_data, val_labels = train_test_split(config, labels, test_size=0.1)

In [82]:
# Helper function for merging new history objects with older ones
def append_history(losses, val_losses, accuracy, val_accuracy, history):
    losses = losses + history.history["loss"]
    val_losses = val_losses + history.history["val_loss"]
    accuracy = accuracy + history.history["categorical_accuracy"]
    val_accuracy = val_accuracy + history.history["val_categorical_accuracy"]
    return losses, val_losses, accuracy, val_accuracy


# Plotter function
def plot_history(losses, val_losses, accuracies, val_accuracies):
    plt.plot(losses)
    plt.plot(val_losses)
    plt.legend(["train_loss", "val_loss"])
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.show()

    plt.plot(accuracies)
    plt.plot(val_accuracies)
    plt.legend(["train_accuracy", "val_accuracy"])
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.show()

In [83]:
import tensorflow as tf

In [None]:
def train_active_learning(
    num_iterations=3,
    sampling_size=5000):
  # inspired from this https://keras.io/examples/nlp/active_learning_review_classification/
  model = make_model()
  model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=[
            keras.metrics.CategoricalAccuracy(),
            keras.metrics.FalseNegatives(),
            keras.metrics.FalsePositives(),
        ],)
  losses, val_losses, accuracies, val_accuracies = [], [], [], []

  # Defining checkpoints.
  # The checkpoint callback is reused throughout the training since it only saves the best overall model.
  checkpoint = keras.callbacks.ModelCheckpoint(
      "AL_Model.h5", save_best_only=True, verbose=1
  )
  # Here, patience is set to 4. This can be set higher if desired.
  early_stopping = keras.callbacks.EarlyStopping(patience=4, verbose=1)

  print(f"Starting to train with {len(train_data)} samples")
  # Initial fit with a small subset of the training set
  history = model.fit(
      x=train_data,
      y=train_labels,
      epochs=20,
      validation_data=(val_data, val_labels),
      callbacks=[checkpoint, early_stopping],
  )

  # Appending history
  losses, val_losses, accuracies, val_accuracies = append_history(
      losses, val_losses, accuracies, val_accuracies, history
  )

  for iteration in range(num_iterations):
      # Getting predictions from previously trained model
      predictions = model.predict(train_data)

      # Generating labels from the output probabilities
      rounded = tf.where(tf.greater(predictions, 0.5), 1, 0)

      # Evaluating the number of zeros and ones incorrrectly classified
      scores = model.evaluate(train_data, train_labels, verbose=0)

      print("-" * 100)
      print(
          f"Number of zeros incorrectly classified: {false_negatives}, Number of ones incorrectly classified: {false_positives}"
      )

      # This technique of Active Learning demonstrates ratio based sampling where
      # Number of ones/zeros to sample = Number of ones/zeros incorrectly classified / Total incorrectly classified
      if false_negatives != 0 and false_positives != 0:
          total = false_negatives + false_positives
          sample_ratio_ones, sample_ratio_zeros = (
              false_positives / total,
              false_negatives / total,
          )
      # In the case where all samples are correctly predicted, we can sample both classes equally
      else:
          sample_ratio_ones, sample_ratio_zeros = 0.5, 0.5

      print(
          f"Sample ratio for positives: {sample_ratio_ones}, Sample ratio for negatives:{sample_ratio_zeros}"
      )

      # Sample the required number of ones and zeros
      sampled_dataset = pool_negatives.take(
          int(sample_ratio_zeros * sampling_size)
      ).concatenate(pool_positives.take(int(sample_ratio_ones * sampling_size)))

      # Skip the sampled data points to avoid repetition of sample
      pool_negatives = pool_negatives.skip(int(sample_ratio_zeros * sampling_size))
      pool_positives = pool_positives.skip(int(sample_ratio_ones * sampling_size))

      # Concatenating the train_dataset with the sampled_dataset
      train_dataset = train_dataset.concatenate(sampled_dataset).prefetch(
          tf.data.AUTOTUNE
      )

      print(f"Starting training with {len(train_dataset)} samples")
      print("-" * 100)

      # We recompile the model to reset the optimizer states and retrain the model
      model.compile(
          loss="categorical_crossentropy",
          optimizer="rmsprop",
          metrics=[
              keras.metrics.CategoricalAccuracy(),
              keras.metrics.FalseNegatives(),
              keras.metrics.FalsePositives(),
          ],
      )
      history = model.fit(
          x=train_data,
          y=train_labels,
          validation_data=(val_data, val_labels),
          epochs=20,
          callbacks=[
              checkpoint,
              keras.callbacks.EarlyStopping(patience=4, verbose=1),
          ],
      )

      # Appending the history
      losses, val_losses, accuracies, val_accuracies = append_history(
          losses, val_losses, accuracies, val_accuracies, history
      )

      # Loading the best model from this training loop
      model = keras.models.load_model("AL_Model.h5")

      # Plotting the overall history and evaluating the final model
  plot_history(losses, val_losses, accuracies, val_accuracies)
  print("-" * 100)
  print(
      "Test set evaluation: ",
      model.evaluate(test_dataset, verbose=0, return_dict=True),
  )
  print("-" * 100)

  return model

In [84]:
train_active_learning()

TypeError: ignored

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)