In [10]:
import argparse
import random
import tensorflow as tf
import os
import minerl
import tree_trajectory
import network
import numpy as np
from tensorflow.keras import datasets, layers, models

In [11]:
#paths
workspace_path= 'C:/Users/Halim/Downloads/minecraftRL/minecraft_bot_dev-master'
data_path='C:/Users/Halim/Downloads/minecraftRL/MineRLenv'
env_name = 'MineRLTreechop'
gpu_use = True
pretrained_model = None
tree_data = minerl.data.make('MineRLTreechop-v0', data_dir=data_path)

In [12]:
#if gpu exists
if gpu_use == True:
    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_virtual_device_configuration(gpus[0],
                [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])
else:
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

#location for model summary
writer = tf.summary.create_file_writer(workspace_path + "/tree_tensorboard")

In [13]:
class TreeTrajectoryDataset(tf.data.Dataset):
    def _generator(num_trajectorys):
        while True:
            trajectory_names = tree_data.get_trajectory_names()
            #print("len(trajectory_names): ", len(trajectory_names))
                
            #https://minerl.io/docs/api/data.html
            trajectory_name = random.choice(trajectory_names)
            print("trajectory_name: ", trajectory_name)
                
            trajectory = tree_data.load_data(trajectory_name, skip_interval=0, include_metadata=False)
            #print("trajectory: ", trajectory)
                
            noop_action_num = 0
                
            all_actions = []
            all_obs = []
            for dataset_observation, dataset_action, reward, next_state, done in trajectory:  
                #print("reward: ", reward)
                    
                #state_pov = dataset_observation['pov']
                #observation = np.concatenate((dataset_observation['pov'] / 255.0, inventory_channel), axis=2)
                # OrderedDict([('pov', array([[[ 0,  0,  0],
                #         [ 0,  0,  2],
                #         [ 0,  2,  0],
                #         ...,
                #         [30, 57, 16],
                #         [ 0,  2,  0],
                #         [ 0,  2,  0]]], dtype=uint8))])
                observation = dataset_observation['pov'] / 255.0

                #OrderedDict([('attack', 1), ('back', 0), ('camera', array([0., 0.], dtype=float32)), ('forward', 0), ('jump', 0), ('left', 0), ('right', 0), ('sneak', 0), ('sprint', 0)])
                act_cam_0 = dataset_action['camera'][0]
                act_cam_1 = dataset_action['camera'][1]
                act_attack = dataset_action['attack'] #1
                act_forward = dataset_action['forward'] #2
                act_jump = dataset_action['jump'] #3
                act_back = dataset_action['back'] #4
                act_left = dataset_action['left'] #5
                act_right = dataset_action['right'] #6
                act_sneak = dataset_action['sneak'] #7

                if (abs(act_cam_0 > 0) or abs(act_cam_1 > 0)): #if there was a change in camera at all
                    if ( (act_cam_1 < 0) & ( abs(act_cam_0) < abs(act_cam_1) ) ): #left (negative) camera turn
                        if (act_forward == 1):
                            if (act_attack == 1):
                                act_index = 0
                            elif (act_jump == 1):
                                act_index = 1
                            elif (act_sneak == 1):
                                act_index = 2
                            else:
                                act_index = 3

                        elif (act_back == 1):
                            if (act_attack == 1):
                                act_index = 4
                            elif (act_jump == 1):
                                act_index = 5
                            elif (act_sneak == 1):
                                act_index = 6
                            else:
                                act_index = 7

                        elif (act_right == 1):
                            if (act_attack == 1):
                                act_index = 8
                            elif (act_jump == 1):
                                act_index = 9
                            elif (act_sneak == 1):
                                act_index = 10
                            else:
                                act_index = 11

                        elif (act_left == 1):
                            if (act_attack == 1):
                                act_index = 12
                            elif (act_jump == 1):
                                act_index = 13
                            elif (act_sneak == 1):
                                act_index = 14
                            else:
                                act_index = 15

                        elif (act_sneak == 1):
                            if (act_attack == 1):
                                act_index = 16
                            elif (act_jump == 1):
                                act_index = 17
                            elif (act_sneak == 1):
                                act_index = 18
                            else:
                                act_index = 19

                        elif (act_attack == 1):
                            act_index = 20

                        elif (act_jump == 1):
                            act_index = 21

                        else:
                            act_index = 22


                    elif ( (act_cam_1 > 0) & ( abs(act_cam_0) < abs(act_cam_1) ) ): #right (positive) camera turn

                        if (act_forward == 1):
                            if (act_attack == 1):
                                act_index = 23
                            elif (act_jump == 1):
                                act_index = 24
                            elif (act_sneak == 1):
                                act_index = 25
                            else:
                                act_index = 26

                        elif (act_back == 1):
                            if (act_attack == 1):
                                act_index = 27
                            elif (act_jump == 1):
                                act_index = 28
                            elif (act_sneak == 1):
                                act_index = 29
                            else:
                                act_index = 30

                        elif (act_right == 1):
                            if (act_attack == 1):
                                act_index = 31
                            elif (act_jump == 1):
                                act_index = 32
                            elif (act_sneak == 1):
                                act_index = 33
                            else:
                                act_index = 34

                        elif (act_left == 1):
                            if (act_attack == 1):
                                act_index = 35
                            elif (act_jump == 1):
                                act_index = 36
                            elif (act_sneak == 1):
                                act_index = 37
                            else:
                                act_index = 38

                        elif (act_sneak == 1):
                            if (act_attack == 1):
                                act_index = 39
                            elif (act_jump == 1):
                                act_index = 40
                            elif (act_sneak == 1):
                                act_index = 41
                            else:
                                act_index = 42

                        elif (act_attack == 1):
                            act_index = 43

                        elif (act_jump == 1):
                            act_index = 44

                        else:
                            act_index = 45

                    elif ( (act_cam_0 < 0) & ( abs(act_cam_0) > abs(act_cam_1) ) ): #down (negative) camera turn
                        if (act_forward == 1):
                            if (act_attack == 1):
                                act_index = 46
                            elif (act_jump == 1):
                                act_index = 47
                            elif (act_sneak == 1):
                                act_index = 48
                            else:
                                act_index = 49

                        elif (act_back == 1):
                            if (act_attack == 1):
                                act_index = 50
                            elif (act_jump == 1):
                                act_index = 51
                            elif (act_sneak == 1):
                                act_index = 52
                            else:
                                act_index = 53

                        elif (act_right == 1):
                            if (act_attack == 1):
                                act_index = 54
                            elif (act_jump == 1):
                                act_index = 55
                            elif (act_sneak == 1):
                                act_index = 56
                            else:
                                act_index = 57

                        elif (act_left == 1):
                            if (act_attack == 1):
                                act_index = 58
                            elif (act_jump == 1):
                                act_index = 59
                            elif (act_sneak == 1):
                                act_index = 60
                            else:
                                act_index = 61

                        elif (act_sneak == 1):
                            if (act_attack == 1):
                                act_index = 62
                            elif (act_jump == 1):
                                act_index = 63
                            elif (act_sneak == 1):
                                act_index = 64
                            else:
                                act_index = 65

                        elif (act_attack == 1):
                            act_index = 66

                        elif (act_jump == 1): #add attack here? for jump-attack?
                            act_index = 67

                        else:
                            act_index = 68

                    elif ( (act_cam_0 > 0) & ( abs(act_cam_0) > abs(act_cam_1) ) ): #up (positive) camera turn
                        if (act_forward == 1):
                            if (act_attack == 1):
                                act_index = 69
                            elif (act_jump == 1):
                                act_index = 70
                            elif (act_sneak == 1):
                                act_index = 71
                            else:
                                act_index = 72

                        elif (act_back == 1):
                            if (act_attack == 1):
                                act_index = 73
                            elif (act_jump == 1):
                                act_index = 74
                            elif (act_sneak == 1):
                                act_index = 75
                            else:
                                act_index = 76

                        elif (act_right == 1):
                            if (act_attack == 1):
                                act_index = 77
                            elif (act_jump == 1):
                                act_index = 78
                            elif (act_sneak == 1):
                                act_index = 79
                            else:
                                act_index = 80

                        elif (act_left == 1):
                            if (act_attack == 1):
                                act_index = 81
                            elif (act_jump == 1):
                                act_index = 82
                            elif (act_sneak == 1):
                                act_index = 83
                            else:
                                act_index = 84

                        elif (act_sneak == 1):
                            if (act_attack == 1):
                                act_index = 85
                            elif (act_jump == 1):
                                act_index = 86
                            elif (act_sneak == 1):
                                act_index = 87
                            else:
                                act_index = 88

                        elif (act_attack == 1):
                            act_index = 89

                        elif (act_jump == 1):
                            act_index = 90

                        else:
                            act_index = 91


                else:
                    if (act_forward == 1):
                        if (act_attack == 1):
                            act_index = 92
                        elif (act_jump == 1):
                            act_index = 93
                        elif (act_sneak == 1):
                            act_index = 94
                        else:
                            act_index = 95

                    elif (act_back == 1):
                        if (act_attack == 1):
                            act_index = 96
                        elif (act_jump == 1):
                            act_index = 97
                        elif (act_sneak == 1):
                            act_index = 98
                        else:
                            act_index = 99

                    elif (act_right == 1):
                        if (act_attack == 1):
                            act_index = 100
                        elif (act_jump == 1):
                            act_index = 101
                        elif (act_sneak == 1):
                            act_index = 102
                        else:
                            act_index = 103

                    elif (act_left == 1):
                        if (act_attack == 1):
                            act_index = 104
                        elif (act_jump == 1):
                            act_index = 105
                        elif (act_sneak == 1):
                            act_index = 106
                        else:
                            act_index = 107

                    elif (act_sneak == 1):
                        if (act_attack == 1):
                            act_index = 108
                        elif (act_jump == 1):
                            act_index = 109
                        elif (act_sneak == 1):
                            act_index = 110
                        else:
                            act_index = 111

                    elif (act_attack == 1): #MOST COMMON?
                        act_index = 112

                    elif (act_jump == 1):
                        act_index = 113

                    else:
                        act_index = 114
                    
                

#                 if (dataset_action['attack'] == 0 and dataset_action['back'] == 0 and dataset_action['camera'][0] == 0.0 and 
#                     dataset_action['camera'][1] == 0.0 and dataset_action['forward'] == 0 and dataset_action['jump'] == 0 and 
#                     dataset_action['left'] == 0 and dataset_action['right'] == 0 and dataset_action['sneak'] == 0):
#                     #print("continue: ")
#                     continue

#                 if action_index == 41:
#                     #print("camera_threshols: ", camera_threshols)
#                     #print("dataset_action: ", dataset_action)
#                     noop_action_num += 1
                        
                #print("observation.shape: ", observation.shap
                #print("action_index: ", action_index)
                #print("done: ", done)

                all_obs.append([observation])#added []
                all_actions.append(np.array([act_index]))

            print("len(all_obs): ", len(all_obs))
#             print("noop_action_num: ", noop_action_num)
            print("")
            yield (all_obs, all_actions)

            break
    
    def __new__(cls, num_trajectorys=3):
        return tf.data.Dataset.from_generator(
            cls._generator,
            output_types=(tf.dtypes.float32, tf.dtypes.int32),
            args=(num_trajectorys,)
        )


In [14]:
# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 1

num_actions = 115 # was 43
num_hidden_units= 512
CLASSES_LIST=num_actions

def create_convlstm_model():
    '''
    This function will construct the required convlstm model.
    Returns:
        model: It is the required constructed convlstm model.
    '''

    # We will use a Sequential model for model construction
    model = models.Sequential()

    # Define the Model Architecture.
    ########################################################################################################################
    
    model.add(layers.ConvLSTM2D(filters = 4, kernel_size = (3, 3), activation = 'tanh',data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True, input_shape = (SEQUENCE_LENGTH,
                                                                                      IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(layers.TimeDistributed(layers.Dropout(0.2)))
    
    model.add(layers.ConvLSTM2D(filters = 8, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(layers.TimeDistributed(layers.Dropout(0.2)))
    
    model.add(layers.ConvLSTM2D(filters = 14, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(layers.TimeDistributed(layers.Dropout(0.2)))
    
    model.add(layers.ConvLSTM2D(filters = 16, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(layers.MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    #model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(layers.Flatten()) 
    
    model.add(layers.Dense(CLASSES_LIST, activation = "softmax"))
    
    ########################################################################################################################
     
    # Display the models summary.
    model.summary()
    
    # Return the constructed convlstm model.
    return model

In [15]:
# Construct the required convlstm model.
convlstm_model = create_convlstm_model()

# Display the success message. 
print("Model Created Successfully!")

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d_4 (ConvLSTM2D)  (None, 1, 62, 62, 4)      1024      
                                                                 
 max_pooling3d_4 (MaxPooling  (None, 1, 31, 31, 4)     0         
 3D)                                                             
                                                                 
 time_distributed_3 (TimeDis  (None, 1, 31, 31, 4)     0         
 tributed)                                                       
                                                                 
 conv_lstm2d_5 (ConvLSTM2D)  (None, 1, 29, 29, 8)      3488      
                                                                 
 max_pooling3d_5 (MaxPooling  (None, 1, 15, 15, 8)     0         
 3D)                                                             
                                                      

In [16]:
# Compile the model and specify loss function, optimizer and metrics values to the model
optimizer = tf.keras.optimizers.Adam(0.0001)
convlstm_model.compile(loss='categorical_crossentropy',optimizer=optimizer, metrics=['accuracy'])

In [None]:
train_loss=[]
train_acc=[]
# Start training the model.
for training_episode in range(0,200):
    
    dataset = tf.data.Dataset.range(1).interleave(TreeTrajectoryDataset, num_parallel_calls=tf.data.AUTOTUNE).batch(1).prefetch(tf.data.AUTOTUNE)
    
    for batch in dataset:
        episode_size = batch[0].shape[1] #only 1 batch per video
        print("episode_size: ", episode_size) #number of images in the batch/video

        replay_obs_list = batch[0][0] #all images in the single batch/video
        replay_act_list = batch[1][0] #all actions per image in the single batch/video

        replay_obs_array = tf.concat(replay_obs_list, 0) #just to convert format to a tf.Tensor 
        replay_act_array = tf.concat(replay_act_list, 0) #https://www.tensorflow.org/api_docs/python/tf/concat

        batch_size = replay_obs_array.shape[0]
        tf.print("batch_size: ", batch_size)
            
        replay_act_array_onehot = tf.one_hot(replay_act_array, num_actions)
        replay_act_array_onehot = tf.reshape(replay_act_array_onehot, (batch_size, num_actions))

    
    convlstm_model_training_history = convlstm_model.fit(x = replay_obs_array, y = replay_act_array_onehot, epochs = 5)
    train_loss.append(convlstm_model_training_history.history['loss'])
    train_acc.append(convlstm_model_training_history.history['accuracy']) # if applicable
    convlstm_model.save_weights(workspace_path + '/convlstm_model/tree_supervised_model_' + str(training_episode))

trajectory_name:  v3_poised_radicchio_titan-8_960-5780


100%|██████████| 4383/4383 [00:01<00:00, 3205.84it/s] 


len(all_obs):  4383

episode_size:  4383
batch_size:  4383
Epoch 1/5
 30/137 [=====>........................] - ETA: 14s - loss: 2.1445 - accuracy: 0.2375

In [18]:
convlstm_model_training_history.history

{'loss': [4.730299472808838,
  4.558948516845703,
  3.1164138317108154,
  2.2933568954467773,
  2.2455859184265137],
 'accuracy': [0.4233957529067993,
  0.22075529396533966,
  0.44181761145591736,
  0.47344180941581726,
  0.47344180941581726]}

In [None]:
# if arguments.pretrained_model != None:
#     print("Load Pretrained Model")
#     model.load_weights("model/" + arguments.pretrained_model)

In [None]:
predict_example = convlstm_model.predict(tf.expand_dims(replay_obs_array[2657,:,:,:], 0))

In [None]:
predict_example.shape #PREDICTION

In [None]:
tf.argmax(predict_example[0]) #PREDICTION

In [None]:
predict_example_onehot = tf.one_hot(tf.argmax(predict_example[0]), depth=115) #PREDICTION

In [None]:
predict_example_onehot #PREDICTION

In [None]:
replay_act_array_onehot[2465] #ACTUAL

In [None]:
tf.argmax(replay_act_array_onehot[2657]) #ACTUAL

https://www.tensorflow.org/guide/keras/transfer_learning

![image-2.png](attachment:image-2.png)

![image.png](attachment:image.png)

https://github.com/keras-team/keras/issues/4446