In [1]:
import tensorflow as tf
import numpy as np
import os
import pickle
import glob
import re
import random
import cv2
from PIL import Image
from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, Lambda, ReLU, LeakyReLU, MaxPool2D, Flatten, Dense, Concatenate, Dropout, LayerNormalization
from tensorflow.keras.activations import relu
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD

In [2]:
from easydict import EasyDict as edict
config = edict()

config.TRAIN_DATA_PATH = '/Users/vijay/Downloads/Code_Data/ADNet/Data/'
config.GENERATED_DATA_PATH = '/Users/vijay/Downloads/Code_Data/ADNet/generated_data/'
config.LABEL_DIRS_TRAINED_TXT_PATH = '/Users/vijay/Downloads/Code_Data/ADNet/SL_PRETRAIN_LABELS_READ.txt'
config.FINAL_WEIGHTS_DIR = '/Users/vijay/Downloads/Code_Data/ADNet/final_weights/'
config.ADNET_CKPT_DIR = '/Users/vijay/Downloads/Code_Data/ADNet/checkpoints/'
config.ONLINE_TUNE_TEST_SEQ_PATH = '/Users/vijay/Downloads/Code_Data/ADNet/test_seq/'
config.ACTION_HISTORY_SHAPE = [1, 110]
config.GAMMA = 0.9
config.UNIFORM_TRANSLATION = 0.9
config.UNIFORM_SCALE = 10

config.SL_PRETRAIN_EPOCHS = 300
config.SL_PRETRAIN_BATCH_SIZE = 10
config.SL_POS_SAMPS_THRESHOLD = 0.7
config.SL_NEG_SAMPS_THRESHOLD = 0.3

config.RL_PRETRAIN_BATCH_SIZE = 128
config.NUM_FRAMES_IN_PRETRAIN_RL = 10
config.RL_POS_SAMPS_THRESHOLD = 0.7
config.RL_NEG_SAMPS_THRESHOLD = 0.3


config.REDECTION_NUM_SAMPLES = 256
config.ONLINE_FINETUNE_BATCH_SIZE = 256
config.ONLINE_FINETUNE_NUM_SAMPLES = 250
config.ONLINE_FINETUNE_FREQUENCY = 10
config.ONLINE_FINETUNE_NUM_EPOCHS = 30

In [3]:
class GENERATE_SAMPLES:
    def __init__(self):
        
        self.GENERATED_DATA_PATH = config.GENERATED_DATA_PATH
        self.TRAIN_DATA_PATH = config.TRAIN_DATA_PATH # '/Users/vijay/Downloads/Code_Data/ADNet/Data/'
#         self.TRAIN_GT_PATH  = '/Users/vijay/Downloads/Code_Data/ADNet/Data/'
        self.NUM_ACTIONS = 11 # LEFT, RIGHT, UP, DOWN, DOUBLE_LEFT, DOUBLE_RIGHT, DOUBLE_UP, DOUBLE_DOWN, SCLAE_UP, SCALE_DOWN, STOP
        self.NUM_CLASSES = 2 # OBJECT, BACKGROUND
        self.REQ_ACTION_HISTORY = 10 # Last 10 values of actions are required
        self.alpha = 0.03
        self.ACTIONS = np.array([
                                    [-1, 0, 0, 0], # left
                                    [-2, 0, 0, 0], # double left
                                    [+1, 0, 0, 0], # right
                                    [+2, 0, 0, 0], # double right
                                    [0, -1, 0, 0], # up
                                    [0, -2, 0, 0], # double up 
                                    [0, +1, 0, 0], # down
                                    [0, +2, 0, 0], # double down
                                    [0, 0, 0, 0], # stop
                                    [0, 0, -1, -1], # scale up
                                    [0, 0, 1, 1] # sclae down
                                 ], dtype=np.float16)   # SCALE_DOWN
        
    
    ################################
    
    def get_gt_values(self, gt_path):
        with open(gt_path, 'r') as f:

            lines = f.readlines()

            boxes = []
            for line in lines:

                if not line.strip():
                    continue
                x, y, w, h = [int(x) for x in line.split(',')]

                boxes.append([x, y, w, h])

            return boxes


    
    
    
    ################################
    
    # https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
    def calculate_IOU(self, box_1, box_2):
        b1_x1, b1_y1, b1_x2, b1_y2 = box_1[0], box_1[1], box_1[0] + box_1[2], box_1[1] + box_1[3] # x, y, x+w = x_max, y + w = y_max
        b2_x1, b2_y1, b2_x2, b2_y2 = box_2[0], box_2[1], box_2[0] + box_2[2], box_2[1] + box_2[3]
        
        
        i_x1 = max(b1_x1, b2_x1)
        i_y1 = max(b1_y1, b2_y1)
        i_x2 = min(b1_x2, b2_x2)
        i_y2 = min(b1_y2, b2_y2)
        
        
        intersection_area = max(0, i_x2 - i_x1 + 1) * max(0, i_y2 - i_y1 + 1)
        
        
        box1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
        box2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
        
        iou = intersection_area / float(box1_area + box2_area - intersection_area)
        
        return iou
        
        
    
    
    ################################
    
    def add_gaussian_noise(self, img_path):
        img = cv2.imread(img_path)
        img_shape = np.shape(img)
        mean = 0
        var = 10
        sigma = var ** 0.5
        gaussian = np.random.normal(mean, sigma, (img_shape[0], img_shape[1])) #  np.zeros((224, 224), np.float32)

        noisy_image = np.zeros(img_shape, np.float32)

        if len(img.shape) == 2:
            noisy_image = img + gaussian
        else:
            noisy_image[:, :, 0] = img[:, :, 0] + gaussian
            noisy_image[:, :, 1] = img[:, :, 1] + gaussian
            noisy_image[:, :, 2] = img[:, :, 2] + gaussian

        cv2.normalize(noisy_image, noisy_image, 0, 255, cv2.NORM_MINMAX, dtype=-1)
        noisy_image = noisy_image.astype(np.uint8)

        return noisy_image
    
    
    
    
    ###########################
    
    def get_noisy_boxes_wrt_gt_box_value(self, gt_values, noise_type, num_samples):
        
        x = gt_values[0]
        y = gt_values[1]
        w = gt_values[2]
        h = gt_values[3]
        
        noisy_samples = []
        if noise_type == 'gaussian':
            
    
            cov_matrix = np.diag([pow((0.3 * w), 2), pow((0.3 * h), 2), pow((0.1 * w), 2), pow((0.1 * h), 2)])

            for i in range(num_samples):
                noisy_value = np.add(gt_values, np.random.multivariate_normal([0, 0, 0, 0], cov_matrix, 1)).astype(np.int16)
#                 iou = self.calculate_IOU(noisy_value[0], gt_values)
            
            
                noisy_samples.append(noisy_value[0])
            return noisy_samples
        
        elif noise_type == 'uniform':
            mean_wh = (w + h) * 0.5
            centre_x, centre_y = x + (w * 0.5), y + (h * 0.5)
            
            for _ in range(num_samples):
                dx = config.UNIFORM_TRANSLATION * mean_wh * random.uniform(-1.0, 1.0)
                dy = config.UNIFORM_TRANSLATION * mean_wh * random.uniform(-1.0, 1.0)
                dwh = 1.05 ** (config.UNIFORM_SCALE * random.uniform(-1.0, 1.0))


                new_centre_x, new_centre_y = centre_x + dx, centre_y + dy
                new_width, new_height = int(round(w * dwh)), int(round(h * dwh))

                new_x = int(round(new_centre_x - (new_width * 0.5)))
                new_y = int(round(new_centre_y - (new_height * 0.5))) 
                noisy_box = [new_x, new_y, new_width, new_height]
                noisy_samples.append(noisy_box)
            return noisy_samples
      
    
    
    
    ################################
    
    def get_pos_neg_noisy_boxes(self, gt_values, img_path, num_of_samples, pos_threshold, neg_threshold):
        num_pos_samples = int(num_of_samples * 0.5)
        num_neg_samples = num_of_samples - num_pos_samples
        
        all_gaussian_samples = self.get_noisy_boxes_wrt_gt_box_value(gt_values, 'gaussian', num_pos_samples * 3)
        all_uniform_samples = self.get_noisy_boxes_wrt_gt_box_value(gt_values, 'uniform', num_neg_samples * 3)
        
        all_pos_samples = [sample for sample in all_gaussian_samples if self.calculate_IOU(gt_values, sample) >= pos_threshold]
        all_neg_samples = [sample for sample in all_uniform_samples if self.calculate_IOU(gt_values, sample) < neg_threshold]
        
        if len(all_pos_samples) >= num_pos_samples:
            pos_samples = random.sample(all_pos_samples, num_pos_samples)
            
        else:
            
            p_remaining = num_pos_samples - len(all_pos_samples)
#             print(p_remaining, num_pos_samples, len(all_pos_samples))
            for _ in range(p_remaining):
                all_pos_samples.append(random.choice(all_pos_samples))
            
            pos_samples = all_pos_samples
            
        
        
        if len(all_neg_samples) >= num_neg_samples:
            neg_samples = random.sample(all_neg_samples, num_neg_samples)
            
        else:

            n_remaining = num_neg_samples - len(all_neg_samples)
            for _ in range(n_remaining):
                all_neg_samples.append(random.choice(all_neg_samples))
            
            neg_samples = all_neg_samples
            
        return pos_samples, neg_samples
        
        
        
        
    ################################
    
    def get_new_bbox_values_wrt_action(self, action_index, bbox_values):
        
        '''
        If action is not stop or scaleup or scaledown,
        add delta_x and delta_y to the first two values of self.ACTIONS[action_index]  [(1, 0), (-1, 0), (0, 1), (0, -1)
        (2, 0), (-2, 0), (0, 2), (-2, 0)] and add the values to the old x and y
        
        if action is stop, then return the bbox values as it is.
        
        if action is scale_up or scale_down then multiply delta_x and delta_y to 3, 4 values of self.ACTIONS[action_index] and add the
        new values to the old width and height
        '''
#         print(bbox_values, action_index)
        
        delta_x = self.alpha * bbox_values[2] # 0.03 * width
        delta_y = self.alpha * bbox_values[3] # 0.03 * height
            
        if action_index < 8:
            delta_x = max(1, delta_x)
            delta_y = max(1, delta_y)
            
            new_delta_x = self.ACTIONS[action_index][0] * delta_x
            new_delta_y = self.ACTIONS[action_index][1] * delta_y
            
            new_x = int(round(bbox_values[0] + new_delta_x))
            new_y = int(round(bbox_values[1] + new_delta_y))
            
            return [new_x, new_y, bbox_values[2], bbox_values[3]]
        
        elif action_index == 8:
            return bbox_values
        
        else:
            delta_x = max(2, delta_x)
            delta_y = max(2, delta_y)
            
            new_delta_w = self.ACTIONS[action_index][2] * delta_x
            new_delta_h = self.ACTIONS[action_index][3] * delta_y
            
            new_w = int(round(new_delta_w + bbox_values[2]))
            new_h = int(round(new_delta_h + bbox_values[3]))
            
            return [bbox_values[0], bbox_values[1], new_w, new_h]
            
    
    
    ################################
    
    def get_action_and_class_label_for_noisy_samples(self, gt_values, noisy_samples, img_path, samples_types):
        bbox_action_class_label_list = []
        if samples_type == 'pos:'
            for noisy_box_value in noisy_samples:

                iou_of_new_values = []
                new_gt_values_after_action = []
                for index in range(self.NUM_ACTIONS):

                    new_values = self.get_new_bbox_values_wrt_action(index, noisy_box_value)
        #             print( ' ' + str(new_values))
                    iou = self.calculate_IOU(gt_values, new_values)
                    iou_of_new_values.append(iou)
                    new_gt_values_after_action.append(new_values)

                action_label = self.get_max_iou_index(iou_of_new_values) # index of the iou with maximum value which corresponds to the action performed.
                class_label = self.get_class_label(gt_values, new_gt_values_after_action[action_label])
                bbox_action_class_label_list.append(np.concatenate([noisy_box_value, [action_label], [class_label], [img_path]]))

            return bbox_action_class_label_list
        else:
            for box in noisy_samples:
                action_label = -1
                class_label = 1
                bbox_action_class_label_list.append(np.concatenate([box], [action_label], [class_label], [img_path]))
            return bbox_action_class_label_list
    
    
    
    ##################################
    
    def get_noisy_samples_action_and_class_label_for_single_gt_value(self, gt_values, img_path, num_of_samples, pos_threshold, neg_threshold):
        
    
        pos_samples, neg_samples = self.get_pos_neg_noisy_boxes(gt_values, img_path, num_of_samples, pos_threshold, neg_threshold)
        pos_samples_action_class_labels_list = self.get_action_and_class_label_for_noisy_samples(gt_values, pos_samples, img_path, 'pos')
        neg_samples_action_class_labels_list = self.get_action_and_class_label_for_noisy_samples(gt_values, neg_samples, img_path, 'neg')
        
        
        return pos_samples_action_class_labels_list, neg_samples_action_class_labels_list

    
    
    
    
    ################################
    
    def get_class_label(self, gt_values, max_iou_bbox_value):
        iou = self.calculate_IOU(gt_values, max_iou_bbox_value)
        if iou > 0.7:
            return 1
        else:
            return 0
        
    
    
    
    ################################
        
    def create_new_dir(self, new_path):
        if not os.path.exists(new_path):
            os.makedirs(new_path)
    
    
    
    ################################
    
    def save_new_values_into_a_txt_file(self, values_list, txt_file_path):
        '''
        This function reads in every entry and then joins all the values in a single entry using a comma and then write that row as one entity
        '''
        with open(txt_file_path, "w") as output:
            for row in values_list:
                row = ','.join(map(str, row)) 
                output.write(row + '\n')
    

    
    
    ################################

    def get_max_iou_index(self, iou_for_all_actions):
        
        max_index = np.argmax(iou_for_all_actions)
        
        return max_index
    
    
    
    
#     ################################
    
#     def get_action_and_class_label_for_single_gt_value(self, gt_values, img_path, class_name):
        
#         bbox_action_class_label_list = self.perform_actions_and_get_IOU(gt_values, img_path, class_name)
        
#         return bbox_action_class_label_list
    
    

    
    ################################
    
    def generate_train_data_wrt_each_gt_value(self, frames_path, gt_values_path, class_name, save_to_txt_file):
        '''
        action_class_bbox.txt file values
        x, y, w, h, action_label, class_label, img_path
        '''
        all_pos_samples = []  # action_label, class_label, bbox_values with max iou
        all_neg_samples = []
        all_imgs_names = os.listdir(frames_path)
        all_imgs_names.sort(key=lambda f: int(re.sub('\D', '', f))) # sort wrt to the image name '001.jpg', '002.jpg', ...
        all_images_full_path = []
#         new_path = self.GENERATED_DATA_PATH + class_name + '/img/'
#         self.create_dir_for_new_noisy_imgs_for_each_class(new_path)
        
        gt_values = self.get_gt_values(gt_values_path)
        
        if len(gt_values) == len(all_imgs_names):
            for index, image_name in enumerate(all_imgs_names):
                
#                 noisy_image = self.add_gaussian_noise(frames_path + image_name)
#                 Image.fromarray(noisy_image).save(new_path + image_name)
                if '.jpg' in image_name or '.png' in image_name or '.jpeg' in image_name:
                    img_path = frames_path + image_name
#                     bbox_action_class_label_list = self.get_action_and_class_label_for_single_gt_value(gt_values[index], img_path, class_name, num_of_samples) 
    #                 bbox_action_class_label_list.append([index]) 
                    pos_samples_list, neg_samples_list = self.get_noisy_samples_action_and_class_label_for_single_gt_value(gt_values[index], img_path, 250, 
                                                                                                                          config.SL_POS_SAMPS_THRESHOLD,
                                                                                                                          config.SL_NEG_SAMPS_THRESHOLD)
                                                                                                                 
#                     bbox_action_class_values = bbox_action_class_values + bbox_action_class_label_list.copy()
                    all_pos_samples = all_pos_samples + pos_samples_list.copy()
                    all_neg_samples = all_neg_samples + neg_samples_list.copy()

                    if index >= 5:
                        break
            
            
            if save_to_txt_file:
                dir_path_to_save_txt_files = self.GENERATED_DATA_PATH + class_name + '/'
                self.create_new_dir(dir_path_to_save_txt_files)
#                 self.save_new_values_into_a_txt_file(bbox_action_class_values, dir_path_to_save_txt_files + '_action_class_bbox.txt')
                self.save_new_values_into_a_txt_file(all_pos_samples, dir_path_to_save_txt_files + 'pos_action_class_bbox.txt')
                self.save_new_values_into_a_txt_file(all_neg_samples, dir_path_to_save_txt_files + 'neg_action_class_bbox.txt')
            
            else:
                return all_pos_samples, all_neg_samples
#             self.save_new_values_into_a_txt_file(all_images_full_path, dir_path_to_save_txt_files + 'images_names_index.txt')
     
    
    
    ################################
    
    def get_train_samples(self):
        '''
        For the current dataset, Data directory contains directories with class names which has images and correspoding one 
        ground truth values txt file
        '''
        all_dirs_in_data = os.listdir(self.TRAIN_DATA_PATH)
        for class_name in all_dirs_in_data:
#             if class_name == 'Biker':
            frames_data_path = self.TRAIN_DATA_PATH + class_name + '/img/'
            gt_values_path  = self.TRAIN_DATA_PATH + class_name + '/groundtruth_rect.txt'

            self.generate_train_data_wrt_each_gt_value(frames_data_path, gt_values_path, class_name, True)



In [56]:
generate_samples_obj = GENERATE_SAMPLES()
generate_samples_obj.get_train_samples()

In [27]:
adnet = ADNetwork()
adnet_model = adnet.create_network()
m = adnet_model
input_data = np.arange(1 * 112 * 112 * 3).reshape(1, 112, 112, 3)
print(np.shape(input_data[0]))
action_hist = np.zeros(shape = (1, 1, 1, 110))

action_logits, class_logits = m([input_data, action_hist], training = True)

(112, 112, 3)


In [30]:
tf.nn.softmax(class_logits)[0][0]

<tf.Tensor: shape=(), dtype=float32, numpy=0.54080665>

In [28]:
m.summary()

Model: "functional_43"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_43 (InputLayer)           [(None, 112, 112, 3) 0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 53, 53, 96)   14208       input_43[0][0]                   
__________________________________________________________________________________________________
tf_op_layer_LRN_63 (TensorFlowO [(None, 53, 53, 96)] 0           conv_1[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_63 (MaxPooling2D) (None, 52, 52, 96)   0           tf_op_layer_LRN_63[0][0]         
______________________________________________________________________________________

In [118]:
for variable in adnet_model.trainable_variables:
    print(variable.name)

conv_1/kernel:0
conv_1/bias:0
conv_2/kernel:0
conv_2/bias:0
conv_3/kernel:0
conv_3/bias:0
fc_4/kernel:0
fc_4/bias:0
fc_5/kernel:0
fc_5/bias:0
action/kernel:0
action/bias:0
class/kernel:0
class/bias:0


In [5]:
class ADNet_pretraining_data_loading:
    def __init__(self):
        self.GENERATED_DATA_PATH = config.GENERATED_DATA_PATH
        self.bbox_action_class_img_path_list = []
        self.pos_bbox_action_class_path_list = []
        self.neg_bbox_action_class_path_list = []
        self.NUM_ACTIONS = 11
        self.NUM_CLASSES = 2
#         self.read_generated_txt_files()
        self.get_all_created_dirs()
    
    def get_all_created_dirs(self):
        self.generated_dirs = os.listdir(self.GENERATED_DATA_PATH)
        
    
    
    def get_values_from_txt_files(self, bbox_action_label_txt_path):
        
#         bbox_action_label_txt_path = '/Users/vijay/Downloads/Code_Data/ADNet/generated_data/Biker/action_class_bbox.txt'
        bbox_action_class_img_path_list = []
#         self.pos_bbox_action_class_path_list = []
#         self.neg_bbox_action_class_path_list = []
        with open(bbox_action_label_txt_path, 'r') as f:

            lines = f.readlines()
            for line in lines:

                if not line.strip():
                    continue
                line_split = line.split(',')
#                 x, y, w, h, action, class_label, img_path = [int(x.strip()) if index < 6 else x.strip() for index, x in enumerate(line_split)]
                x, y, w, h, action, class_label, img_path = [int(x.strip()) if index < 6 else x.strip() for index, x in enumerate(line_split)]
                bbox_action_class_img_path_list.extend([[[x, y, w, h], action, class_label, img_path]])
        
        if 'pos' in bbox_action_label_txt_path:
            
            self.pos_bbox_action_class_path_list = bbox_action_class_img_path_list
        else:
            
            self.neg_bbox_action_class_path_list = bbox_action_class_img_path_list
        
        
    
    def read_generated_txt_files(self, path):
        class_dirs = glob.glob(path + '*')
        self.bbox_action_class_img_path_list = []
        for text_file_path in class_dirs:
#             print(text_file_path)
            self.get_values_from_txt_files(text_file_path)
        
    
    @tf.function
    def parse_data(self, row_data):
        batch_data = []
#         np_row_data = row_data.numpy()
        for entry in row_data:
            
#             np_entry = entry.numpy()
            sub_list = []
            for index, item in enumerate(entry):
                
                if index != 6:
                    item = int(tf.compat.as_str_any(item.numpy()))
                
                else:
                    item = tf.compat.as_str_any(item)
                sub_list.append(item)

            batch_data.append(sub_list)
        return batch_data
                    
    
    
    def get_input_and_labels_for_pretraining_adnet(self, batch_data):
        input_bbox_values = []
        action_labels = []
        class_labels = []
        img_paths = []
        for input_label_path in batch_data:
            bbox_values = []
            for index, value in enumerate(input_label_path):
                if index < 4:
                    bbox_values.append(values)
                elif index == 4:
                    action_labels.append(value)
                elif index == 5:
                    class_labels.append(value)
                else:
                    img_paths.append(value)
            input_bbox_values.append(bbox_values)
        return input_bbox_values
    
    
    
    def convert_labels_to_one_hot(self, label, label_type = 'action'):
#         one_hot_labels = []
        if label_type == 'action':
            zeros_length = self.NUM_ACTIONS
        elif label_type == 'class':
            zeros_length = self.NUM_CLASSES
            
        zero_label = np.zeros(shape = [1, zeros_length], dtype = np.float16)
        if label == -1:
            return zero_label
        else:
            
            zero_label[0,label] = 1
            return zero_label
        
    
    
    def get_image_using_bbox_values(self, bbox_values, img_path):
#         print(bbox_values, img_path)
        img = cv2.imread(img_path)
#         print(np.shape(img))
        x, y = bbox_values[0], bbox_values[1]
        xmax, ymax = x + bbox_values[2], y + bbox_values[3]
        
        cropped_img = img[y : ymax, x : xmax]
        resized_img = cv2.resize(cropped_img, (112, 112))
#         return np.expand_dims(resized_img, axis = 0)
        return resized_img
    
    
    
    
    
    def get_dataset(self):

        dataset = tf.data.Dataset.from_tensor_slices(self.bbox_action_class_img_path_list)

        dataset = dataset.shuffle(buffer_size = 100)
        dataset = dataset.batch(5)
        dataset = dataset.map(lambda row :self.parse_data(row)) 
        return dataset

In [101]:
# adnet_pretraining_data = ADNet_pretraining_data_loading()

# adnet_pretraining_data.read_generated_txt_files('/Users/vijay/Downloads/Code_Data/ADNet/generated_data/Biker/pos_action_class_bbox.txt')
# # dataset_getter = adnet_pretraining_data.get_dataset()

In [None]:
class ADNetwork:
    def __init__(self):
        self.NUM_ACTIONS = 11 # LEFT, RIGHT, UP, DOWN, DOUBLE_LEFT, DOUBLE_RIGHT, DOUBLE_UP, DOUBLE_DOWN, SCLAE_UP, SCALE_DOWN, STOP
        self.NUM_CLASSES = 2 # OBJECT, BACKGROUND
        self.REQ_ACTION_HISTORY = 10 # Last 10 values of actions are required
       
        
#         self.action_history = np.zeros(shape=(1, 110)).astype(np.float32)
        
    
        
    
    def create_network(self):
        input_data = Input(shape = (112, 112, 3))
        action_history = Input(shape = (1, 1, 110))
        
        
        ## CONV_1
#         print(np.shape(input_data), np.shape(action_history))
        data = Conv2D(filters = 96, kernel_size = (7, 7), strides = (2, 2), padding = 'VALID', activation = 'relu', name = 'conv_1')(input_data)
#         data = LayerNormalization()(data)
#         print('conv_1 ' + str(np.shape(data)) + ' ' + str(np.shape(action_history)))
        data = tf.nn.local_response_normalization(data, depth_radius = 5, bias = 2, alpha = 1e-4*5, beta = 0.75)
        data = MaxPool2D(pool_size=(2, 2), strides = (1, 1), padding = 'VALID')(data)


        ## CONV_2
        data = Conv2D(filters = 256, kernel_size = (5, 5), strides = (2, 2), padding = 'VALID', activation = 'relu', name = 'conv_2')(data)
#         data = LayerNormalization()(data)
        data = tf.nn.local_response_normalization(data, depth_radius = 5, bias = 2, alpha = 1e-4*5, beta = 0.75)
        data = MaxPool2D(pool_size=(3, 3), strides = (2, 2), padding = 'VALID')(data)


        ## CONV_3
        data = Conv2D(filters = 512, kernel_size = (3, 3), strides = (2, 2), padding = 'VALID', activation = 'relu', name = 'conv_3')(data)
#         data = LayerNormalization()(data)
        data = tf.nn.local_response_normalization(data, depth_radius = 5, bias = 2, alpha = 1e-4*5, beta = 0.75)
        data = MaxPool2D(pool_size=(3, 3), strides = (1, 1), padding = 'VALID')(data)

    
        data = Conv2D(filters = 512, kernel_size = (3, 3), strides = (1, 1), padding = 'VALID', activation = 'relu', name='fc_4')(data)
        data = Dropout(rate = 0.5, name = 'fc4_dropout')(data)
        data = Concatenate(axis = -1)([data, action_history])
#         data = Concatenate(axis = -1)([])
        data = Conv2D(filters = 512, kernel_size = [1, 1], strides = (1, 1), padding='VALID', activation = 'relu', name = 'fc_5')(data)
                               
        data = Dropout(rate = 0.5, name = 'fc5_dropout')(data)

        # auxilaries
        action_logits = Conv2D(filters = 11, kernel_size = [1, 1], strides = (1, 1), padding='VALID', name = 'action')(data)
        class_logits = Conv2D(filters = 2, kernel_size = [1, 1], strides = (1, 1), padding='VALID', name = 'class')(data)
        action_logits_f = Flatten()(action_logits)
        class_logits_f = Flatten()(class_logits)
        
        
        return Model([input_data, action_history], [action_logits_f, class_logits_f])
        
    

        
        

In [8]:
class Train_networks:
    def __init__(self):
        self.adnet_ckpt_dir = config.ADNET_CKPT_DIR
        self.FINAL_WEIGHTS_DIR = config.FINAL_WEIGHTS_DIR
        adnetwork = ADNetwork()
        
        adent_model = adnetwork.create_network()
        self.adnet_ckpt = tf.train.Checkpoint(curr_epoch = tf.Variable(0),
                                                        optimizer_1 = SGD(learning_rate = 0.0001, momentum = 0.9),
                                                        optimizer_2 = SGD(learning_rate = 0.001, momentum = 0.9),
                                                         model = adnet_model
                                                 )
        self.adnet_ckpt_manager = tf.train.CheckpointManager(self.adnet_ckpt,
                                                                directory = self.adnet_ckpt_dir,
                                                                max_to_keep = 3)
        self.adnet_pretrain_action_history = np.zeros(shape = (config.SL_PRETRAIN_BATCH_SIZE, 1, 1, 110), dtype = np.float16)
        self.adnet_model_ip = []
#         self.act_hist = np.zeros(config.SL_PRETRAIN_BATCH_SIZE * 1 * 1* 110).reshape(config.SL_PRETRAIN_BATCH_SIZE, 1, 1, 110)
        self.generate_samples_obj = GENERATE_SAMPLES()
        self.adnet_pretrain_data_loader = ADNet_pretraining_data_loading()
        self.adnet_action_labels = []
        self.adnet_class_labels = []
       
    
    '''###################################'''
    
    def create_dirs(self):
        
        if not os.path.exists(config.GENERATED_DATA_PATH):
            os.makedirs(config.GENERATED_DATA_PATH)

        if not os.path.exists(config.FINAL_WEIGHTS_DIR):
            os.makedirs(config.FINAL_WEIGHTS_DIR)
    
    
    
    '''###################################'''
    
    def restore_recent_checkpoint(self):
        
        if self.adnet_ckpt_manager.latest_checkpoint:
            self.adnet_ckpt.restore(self.adnet_ckpt_manager.latest_checkpoint)
    
    
    
    '''###################################'''
    
    def update_action_label_variables(self, input_batch, action_labels, action_history):
        with tf.GradientTape(persistent = True) as action_tape:
            action_logits, _ = self.adnet_ckpt.model([input_batch, action_history], training = True)
            action_vars = []
            
            for variable in self.adnet_ckpt.model.trainable_variables:
                if 'action' in variable.name or 'fc' in variable.name:
                    action_vars.append(variable)
            
            action_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = action_labels, logits = action_logits))
        
        action_gradients = action_tape.gradient(action_loss, action_vars)
        action_grad_ops = self.adnet_ckpt.optmizer_1.apply_gradients(zip(action_gradients, action_vars))
        
        
        
    
    '''###################################'''
    
    def update_class_label_variables(self, input_batch, class_labels, action_history):
        with tf.GradientTape(persistent = True) as class_tape:
            _, class_logits = self.adnet_ckpt.model([input_batch, action_history], training = True)
            class_vars = []
            
            for variable in self.adnet_ckpt.model.trainable_variables:
                if 'class' in variable.name or 'fc' in variable.name:
                    class_vars.append(variable)
            
            class_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = class_labels, logits = class_logits))
        
        class_gradients = class_tape.gradient(class_loss, class_vars)
        class_grad_ops = self.adnet_ckpt.optmizer_2.apply_gradients(zip(class_gradients, class_vars))
    
    
     
   
    '''###################################'''
    
    def get_batch_imgs_action_and_class_labels(self, batch_data):
        imgs = []
        action_labels = []
        class_labels = []
        for values in batch_data: # values is a list item
            imgs.append(self.adnet_pretrain_data_loader.get_image_using_bbox_values(values[0], values[-1]))
            
            action_label = self.adnet_pretrain_data_loader.convert_labels_to_one_hot(values[1], 'action')
            class_label = self.adnet_pretrain_data_loader.convert_labels_to_one_hot(values[2], 'class')

            action_labels.append(action_label)
            class_labels.append(class_label)
#         print(np.shape(imgs))
        return imgs, action_labels, class_labels
    
    
    
    
    
    
    

    
    
    
    

In [None]:
class Pretrain_SL(Train_networks):
    def __init__(self):
        super.__init__()
        
    
    
    '''###################################'''
    
    def in_each_pretrain_adnet_step(self, imgs, action_labels, class_labels, train_mode = 'both', action_history = None):
        
#         test = True
        if train_mode == 'action':
            self.update_action_label_variables(imgs, action_labels, action_history)
        
        elif train_mode == 'class':
            self.update_class_label_variables(imgs, class_labels, action_history)
        
        else:
            p_imgs = imgs[0]
            n_imgs = imgs[1]
            cls_imgs = p_imgs + n_imgs
            
            p_action_labels = action_labels[0]
            
            p_class_labels = class_labels[0]
            n_class_labels = class_labels[1]
            
            cls_class_labels = p_class_labels + n_class_labels
            
            '''Pretraining ADNetwork with supervised learning'''
            with tf.GradientTape(persistent = True) as conv_tape, tf.GradientTape(persistent = True) as fc_tape:
#                 print(np.shape(self.adnet_model_ip))

                action_logits, _ = self.adnet_ckpt.model([p_imgs, self.adnet_pretrain_action_history], training = True)
                _, class_logis   = self.adnet_ckpt.model([cls_imgs, self.adnet_pretrain_action_history], training = True)

                conv_vars = []
                fc_vars = []
                
                for variable in self.adnet_ckpt.model.trainable_variables:
                    if 'action' not in variable.name and 'class' not in variable.name:
                        conv_vars.append(variable)
                    else:
                        fc_vars.append(variable)

            
                action_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = p_action_labels, logits  = action_logits))
                class_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = cls_class_labels, logits = class_logits))
                total_loss = action_loss + class_loss
            
            print(total_loss.numpy())
            conv_gradients = conv_tape.gradient(total_loss, conv_vars)
            fc_gradients = fc_tape.gradient(total_loss, fc_vars)

            conv_grad_ops = self.adnet_ckpt.optimizer_1.apply_gradients(zip(conv_gradients, conv_vars))
            fc_grad_ops = self.adnet_ckpt.optimizer_2.apply_gradients(zip(fc_gradients, fc_vars))
        

            return total_loss
    

    
    
    
    '''###################################'''
    
    def pretrain_adnet_SL(self):
                                                                     
        loss_log = tf.keras.metrics.Mean('loss', dtype = tf.float32)
                                                  
        action_history = np.zeros(shape=(1, 1, 1, 110)) # during adnet pretraining, action history values vector is set to 0
#         adnetwork = ADNetwork()
#         self.adnet_pretrain_model = adnetwork.create_network()
        adnet_pretrain_data_loader = ADNet_pretraining_data_loading()
        self.restore_recent_checkpoint()
        
        '''
        generated_dirs contains folders with a txt file containing bbox values, action_label, class label and img_path of generated files
        We read each text file, and train the network for 300 epochs for each txt file
        '''
        for label_dir in self.adnet_pretrain_data_loader.generated_dirs:
            '''before reading a new set of gen text file (pos and neg), clear the respective pos and neg lists accumulated with
            bbox and label values of previously read label dir files'''
            self.adnet_pretrain_data_loader.pos_bbox_action_class_path_list.clear()
            self.adnet_pretrain_data_loader.neg_bbox_action_class_path_list.clear()
            txt_files_path = self.adnet_pretrain_data_loader.GENERATED_DATA_PATH + label_dir + '/'
            if '.DS_Store' not in txt_files_path:
                self.adnet_pretrain_data_loader.read_generated_txt_files(txt_files_path)

                num_of_pos_samples = int(config.SL_PRETRAIN_BATCH_SIZE * 0.5)
                num_of_neg_samples = config.SL_PRETRAIN_BATCH_SIZE - num_of_pos_samples

                for epoch in range(config.SL_PRETRAIN_EPOCHS):

#                     print(len(adnet_pretrain_data_loader.pos_bbox_action_class_path_list), len(adnet_pretrain_data_loader.neg_bbox_action_class_path_list))
                    pos_samples = random.sample(self.adnet_pretrain_data_loader.pos_bbox_action_class_path_list, num_of_pos_samples)
                    neg_samples = random.sample(self.adnet_pretrain_data_loader.neg_bbox_action_class_path_list, num_of_neg_samples)


                    p_imgs, p_action_labels, p_class_labels = self.get_batch_imgs_action_and_class_labels(pos_samples)
                    n_imgs, n_action_labels, n_class_labels = self.get_batch_imgs_action_and_class_labels(neg_samples)

                    '''
                    To train the model to predict class scores, we use both pos and neg samples. pos samples, since their IOU is greater than 0.7,
                    class label value would be 1 for all those positive samples. And for neg samples, the same class score label would be 0
                    '''
                    cls_imgs = p_imgs + n_imgs
                    cls_class_labels = [1] * len(p_imgs) + [0] * len(n_imgs)
                    
                    total_loss = self.in_each_pretrain_adnet_step([p_imgs, n_imgs], [p_action_labels, None], [p_class_labels, n_class_labels], 'both')
                    loss_log.update_state(total_loss)

                    if epoch % 100 == 0 and epoch > 0: 
                        self.adnet_pretrain_ckpt_manager.save()

                    if epoch == config.SL_PRETRAIN_EPOCHS -1:
                        self.adnet_ckpt.model.save_weights(config.FINAL_WEIGHTS_DIR + 'adnet_SL_pretrain_weights.h5')
                        print('ADNet model SL pre-training finished')


                with open(config.LABEL_DIRS_TRAINED_TXT_PATH, "a") as output:
                    for row in [label_dir, self.adnet_pretraining_data_loader.GENERATED_DATA_PATH + label_dir + '/']:
                        row = ','.join(map(str, row)) 
                        output.write(row + '\n')
    #             print('loss is ' + str(loss_log.result()))
                loss_log.reset_states()
    

In [69]:
l = [1, 2, 3]
k = [4, 5, 6]
j = list(zip(l, k))
random.shuffle(j)
l, k = zip(*j)
print(l, k)

(3, 1, 2) (6, 4, 5)


In [11]:
class Pretrain_RL(Train_networks):
    def __init__(self):
        super().__init__()
    
    
    
    
    '''###################################'''
    
    def compute_z_score(self, gt_bbox_values, gen_bbox_values):
        z_scores = []
        if len(gt_bbox_values) == len(gen_bbox_values):
            for gt_bbox, gen_bbox in zip(gt_bbox_values, gen_bbox_values):
                iou = self.generate_samples_obj.calculate_IOU(gt_bbox, gen_bbox)
                if iou > 0.7:
                    z_scores.append(1)
                else:
                    z_scores.append(0)
        
        return z_scores
    
    

    '''###################################'''
    
    def initial_finetune_adnet_pretrain_RL(self):
        '''
        finetune for the first two images
        When we are training only using action_label values, then it doesn't make any sense in using negative samples during training as
        the IOU of negative samples is less than 0.3
        '''
        pos_samples, _ = self.generate_samples_obj.get_noisy_samples_action_and_class_label_for_single_gt_value(curr_bbox_values, img_path, 3000, 
                                                                                                               config.RL_POS_SAMPS_THRESHOLD,
                                                                                                               config.RL_NEG_SAMPS_THRESHOLD)
        pos_samples_labels = self.prepare_samples_for_training(pos_samples)
        p_imgs, p_action_labels, _ = self.get_batch_imgs_action_and_class_labels(pos_samples_labels)
        imgs_labels_list = list(zip(p_imgs, p_action_labels))
        action_history = np.zeros(config.RL_PRETRAIN_BATCH_SIZE, 1, 1, 110)
        for epoch in range(300):
            
            random.sample(imgs_labels_list, config.RL_PRETRAIN_BATCH_SIZE)
            batch_imgs, batch_action_labels = zip(*imgs_labels_list)
            self.in_each_pretrain_adnet_step(batch_imgs, batch_action_labels, None, 'action', action_history)
        
            if epoch % 100 == 0:
                self.adnet_ckpt_manager.save()
        
        
        
    '''###################################'''
    
    def tracking_procedure(self, img_path, curr_bbox_values, action_history):
        
#         action_history = np.zeros(shape = (1, 10, 11), dtype = np.float32)
        cropped_img = self.get_img_from_bbox_values(curr_bbox_values, img_path)
        
    
        prev_bbox_values = []
#         prev_to_do_action_idx = -1
        to_do_action_idx = 0
        action_hist_insert_idx = 0
#         num_steps_until_termination = 0
        boxes_history = []
        perform_tracking = True
        probs_of_actions = []
        boxes_history.append(curr_bbox_values)
        prev_action_prob = 0
        
        '''Tracking simulation has to be stopped if the action selected is STOP or if the agent is oscillating'''
        while to_do_action_idx != 8 and perform_tracking:
            
            
            flatten_action_histories = action_history.copy().reshape(1, 1, 1, 110)
            
            '''get action probabilities'''
            action_logits, _ = self.adnet_ckpt.model([np.expand_dims(cropped_img, axis = 0), flatten_action_histories], training = False)
            action_probs = tf.nn.softmax(action_logits)
            
            
            to_do_action_idx = np.argmax(action_probs)
            
            
            
            
            '''
            if to_do_action_idx is to STOP then there is no need to perform action on the bbox values. Therefore, it is enough to
            return previous curr_bboxes, probability of action selected before termination, action_histories, num_of_steps taken to terminate
            '''
            if to_do_action_idx != 8: # index 8 = STOP
                
                prev_action_prob = max(action_probs)
                '''get new bbox values'''
                temp_bbox_values = self.generate_samples_obj.get_new_bbox_values_wrt_action(to_do_action_idx, curr_bbox_values.copy())

                '''
                if actions are Left, right, left then it means that agent returned to the earlier place. 
                If this happens, then we have to terminate the
                tracking process. In other words, if a bbox value is repeated more than once, 
                then we can be certain that oscillation occured.
                '''
                if temp_bbox_values not in boxes_history:

                    boxes.append(temp_bbox_values.copy())

                    prev_bbox_values = curr_bbox_values.copy()
                    curr_bbox_values = temp_bbox_values.copy()
                    



                    '''get new img patch using new bbox values'''
                    cropped_img = self.generate_samples_obj.get_img_from_bbox_values(curr_bbox_values, img_path)

                    '''update the action histories with newly obtained action probs'''
                    action_history[:, action_hist_insert_idx, :] = action_probs.copy()
                    action_hist_insert_idx = action_hist_insert_idx + 1
                    action_hist_insert_idx = 0 if action_hist_insert_idx > 9 else action_hist_insert_idx

#                     num_steps_until_termination = num_steps_until_termination + 1



                else:
                    perfrom_tracking = False

            
        
        return prev_act_prob, action_history, curr_bbox_values
        
        
        
    
    '''###################################'''
    
    def select_a_random_index_for_image_sampling(self, num_of_frames):
        random_index = random.choice(np.arange(num_of_frames - config.NUM_FRAMES_IN_PRETRAIN_RL))
        return [index+random_index for index in range(10)]
        
        
    
    '''###################################'''
    
    def prepare_samples_for_training(self, batch_data):
        samples = []
        for sample in batch_data:
            temp_samples = []
            bbox_values = []
#             print(sample)
            for index, item in enumerate(sample):
                if index < 3:
                    bbox_values.append(int(item))
                elif index == 3:
                    bbox_values.append(int(item))
                    temp_samples.append(bbox_values)
                elif index > 3 and index < 6:
                    temp_samples.append(int(item))
                else:
                    temp_samples.append(item)
                
            samples.append(temp_samples)
        
        
        return samples
                
            
    
    '''###################################'''
    
    def perfrom_tracking_and_finetune_weights(self, vid_imgs_path):
        '''
        vid_imgs_path is a path to a particular dir which contains video frames as images in 'img' dir and correspoinding gt values of the object
        that is being tracked in 'groundtruth_rect.txt' file
        
        Now, randomly select 10 consecutinve images and except for the first image, 
        perform the tracking and store results in the corresponding lists.
        '''
        all_imgs_in_dir = os.listdir(vid_imgs_path + 'img/') 
        gt_values = self.generate_samples_obj.get_gt_values(vid_imgs_path + 'groundtruth_rect.txt')
        
        all_imgs_in_dir = [img_name for img_name in all_imgs_in_dir if '.DS_Store' not in img_name]
        all_imgs_in_dir.sort(key=lambda f: int(re.sub('\D', '', f)))
        all_imgs_in_dir_with_index = [(index, img_name) for index, img_name in enumerate(all_imgs_in_dir)]
        
#         total_action_histories = []
#         last_action_idxs = []
#         num_steps_taken_in_each_episode = []
        bboxes_from_tracking = []
        action_probs = []
        
        if len(gt_values) == len(all_imgs_in_dir):
            
            '''in pretraining adnet with RL, for every video, we have to select 10 frames for tracking simulation'''
            randomly_selected_indices = self.select_a_random_index_for_image_sampling(len(all_imgs_in_dir))
            imgs_in_curr_selection = [all_imgs_in_dir[index] for index in randomly_selected_indices]
            gt_values_of_curr_selection = [gt_values[index] for index in randomly_selected_indices]
            
            
            action_history = np.zeros(shape = (1, 10, 11))
            initial_finetune = True
            curr_bbox_values = gt_values_of_curr_selection[0]
            
            for img_name in imgs_in_curr_selection:

                img_path = vid_imgs_path + 'img/' + img_name
                
                
                if initial_finetune:
                    
                    self.initial_finetune_adnet_pretrain_RL()
                    
                    initial_finetune = False
                else:
                    '''
                    the resultant bboxes from previous self.tracking_procedure, has to be the input to the next iteration of the self.tracking_procedure
                    '''
                    stop_before_action_prob, action_history, curr_bbox_values = self.tracking_procedure(img_path, curr_bbox_values, action_history)
                    bboxes_from_tracking.append(curr_bbox_values.copy())
#                     num_steps_taken_in_each_episode.append(num_steps_taken)
#                     total_action_histories.append(action_histories.copy())
                    action_probs.append(stop_before_action_prob)
            
            return gt_boxes_of_curr_selection, action_probs, bboxes_from_tracking
                
                
        else:
            raise Exception("Sorry, there is a problem in reading frames and their correspoinding gt values")
        
    
    
    
    
    '''###################################'''
    
    def discount_rewards(self, rewards):
        discounted_rewards = [((config.GAMMA ** idx) * reward) for idx, reward in enumerate(rewards)]
        return discounted_rewards
    
    
    
    
    '''###################################'''
    
    def get_adnet_RL_train_vars(self):
        rl_train_vars = []
        for variable in self.adnet_ckpt.model.trainable_variables:
            if 'class' not in variable.name:
                rl_train_vars.append(variable)
    
    
    
    
    '''###################################'''
    
    def in_each_adnet_RL_train_step(self, discounted_rewards, action_probs):
        rl_train_vars = self.get_adnet_RL_train_vars()
        for reward, prob in zip(discounted_rewards, action_probs):
            with tf.GradientTape(persistent = True) as tape:
                loss = -np.log(prob) * reward
                         
            
            gradients = tape.gradient(loss, rl_train_vars)
            self.adnet_ckpt.optimizer_2.apply_gradients(zip(gradients, rl_train_vars))
        
        
                         
        
    '''###################################'''
    
    def perform_adnet_RL_training(self):
        self.restore_recent_checkpoint()
        all_train_vid_dirs = os.listdir(config.TRAIN_DATA_PATH)
        random.shuffle(all_train_vid_dirs)
        for vid_dir_name in all_train_vid_dirs:
            vid_imgs_path = config.TRAIN_DATA_PATH + vid_dir_name + '/'
            gt_values, action_probs, boxes_from_tracking = self.perfrom_tracking_and_finetune_weights(vid_imgs_path)
            rewards = self.compute_z_score(gt_values.copy(), boxes_from_tracking.copy())
            discount_rewards = self.discount_rewards(rewards.copy())
            self.in_each_adnet_RL_train_step(discounted_rewards.copy(), action_probs.copy())

In [None]:
train_using_rl = Pretrain_RL()
train_using_rl.perform_adnet_RL_training()

In [None]:
class Online_finetuning_RL(Pretrain_RL):
    def __init__(self):
        super().__init__()
    
    
    def finetune_online(self, img_path = None, bbox_gt_values = None, num_epochs = -1, new_samples = True):
        action_history = np.zeros(shape = (config.ONLINE_FINETUNE_BATCH_SIZE, 1, 1, 110))
        if new_samples:
            pos_samples, neg_samples = self.generate_samples_obj.get_noisy_samples_action_and_class_label_for_single_gt_value(bbox_gt_values, img_path, 2000)
        else: # if samples from the last 20 frames
            pos_samples, neg_samples = self.get_samples()
            
        pos_samples = self.prepare_samples_for_training(pos_samples)
        neg_samples = self.prepare_samples_for_training(neg_samples)
        
        p_imgs, p_action_labels, p_class_labels = self.get_batch_imgs_action_and_class_labels(pos_samples)
        n_imgs, n_action_labels, n_class_labels = self.get_batch_imgs_action_and_class_labels(neg_samples)
#         data_samples = self.prepare_samples_for_training(pos_samples + neg_samples)
#         random.shuffle(data_samples)
#         imgs, action_labels, _ = self.get_batch_imgs_action_and_class_labels(data_samples)
        p_imgs_labels = list(zip(p_imgs, p_action_labels, p_class_labels))
        n_imgs_labels = list(zip(n_imgs, n_action_labels, n_class_labels))   
        for epoch in range(num_epochs):
            random.sample(p_imgs_labels, config.ONLINE_FINETUNE_BATCH_SIZE)
            random.sample(n_imgs_labels, config.ONLINE_FINETUNE_BATCH_SIZE)
            p_batch_imgs, p_batch_a_labels, p_batch_c_labels = zip(*p_imgs_labels)
            n_batch_imgs, n_batch_a_labels, n_batch_c_labels = zip(*n_imgs_labels)
            
            self.in_each_pretrain_adnet_step(p_batch_imgs, p_batch_a_labels, None, 'action', action_history)
            self.in_each_pretrain_adnet_step(p_batch_imgs + n_batch_imgs, None, 
                                             [1] * config.ONLINE_FINETUNE_BATCH_SIZE + [0] * config.ONLINE_FINETUNE_BATCH_SIZE,
                                             'class', action_history)
            if epoch % 100 = 0:
                self.adnet_ckpt_manager.save()
            
    
    
    
    
    def redetect_the_target_position(self, bbox_values, img_path):
        imgs = []
        pos_samples, neg_samples = self.generate_samples_obj.get_noisy_samples_action_and_class_label_for_single_gt_value(curr_bbox, img_path, 
                                                                                             config.REDECTION_NUM_SAMPLES)
        data_samples = self.preprate_samples_for_training(pos_samples + neg_samples)
        bbox_values = [sample[0] for sample in data_samples]
        for bbox_value in bbox_values:
            imgs.append(self.adnet_pretrain_data_loader.get_image_using_bbox_values(bbox_values, img_path))
        
        _, class_logits = self.adnet_ckpt.model([imgs, np.zeros(config.REDECTION_NUM_SAMPLES, 1, 1, 110)], training = False)
        class_scores = tf.nn.softmax(class_logits)
        target_scores = [score[0] for score in class_scores]
        max_score_index = np.argmax(target_scores)
        max_bbox_value = bbox_values[max_score_index]
        
        return max_bbox_value
        
        
        
    
    def get_samples(self):
#         pos_samples = [sample for samples_from_each_frame in self.pos_samples_for_online_finetune for sample in samples_from_each_frame]
#         neg_samples = [sample for samples_from_each_frame in self.neg_samples_for_online_finetune for sample in samples_from_each_frame]
        pos_samples = []
        neg_samples = []
        assert len(self.pos_samples_for_online_finetune) == len(self.neg_samples_for_online_finetune)
        num_pos_samples = len(self.pos_samples_for_online_finetune)
        
        
        
            
        for index in range(20):
            if index > num_pos_samples - 1:
                break
            else:
                p_samples = self.pos_samples_for_online_finetune[(-index + 1)]
                n_samples = self.pos_samples_for_online_finetune[(-index + 1)]

                temp_p_samples=[sample for sample in p_samples]
                temp_n_samples = [sample for sample in n_samples]

                pos_samples.extend(temp_p_samples)
                neg_samples.extend(temp_n_samples)
        
        return pos_samples, neg_samples

    
    
    def update_weights(self):
        self.finetune_online(None, None, 30, False)

    
    
    def finetune_weights_online_using_rl(self):
        self.restore_recent_checkpoint()
        self.is_tracked = True
        self.pos_samples_for_online_finetune = []
        self.neg_samples_for_online_finetune = []
        assert os.path.exists(config.ONLINE_TUNE_TEST_SEQ_PATH)
        adnetwork
        gt_values_txt_path = config.ONLINE_TUNE_TEST_SEQ_PATH + 'groundtruth_rect.txt'
        gt_values = self.generate_samples_obj.get_gt_values(gt_values_txt_path)
        
        imgs_names = os.listdir(config.ONLINE_TUNE_TEST_SEQ_PATH)
        imgs_names = [name for name in imgs_names if '.DS_Store' not in name]
        action_histories = np.zeros(shape = (1, 10, 11))
        
        
        for index, name in enumerate(imgs_names):
            img_path = config.ONLINE_TUNE_TEST_SEQ_PATH + 'img/' + name
            gt_values = gt_values[index]
            if index == 0 or index == 1:
                
                self.initial_finetune_online(img_path, gt_values, 300, True)
                curr_bbox = gt_values.copy()
            
            else:
                action_probs, action_history, bbox_values = self.tracking_procedure(img_path, curr_bbox, action_histories)
                cropped_img = self.generate_samples_obj.get_img_from_bbox_values(bbox_values, img_path)
                action_logits, class_logits = self.adnet_ckpt.model([cropped_img, action_history], training = False)
                confidence_score = tf.nn.softmax(class_logits)
                if confidence_score[0][0] > 0.5: # targets score
                    '''
                    generate samples using the returned bbox_values and the current image
                    all the samples contain bboxvalues, action label and class label
                    '''
                    
                    pos_samples, neg_samples = self.generate_samples_obj.get_noisy_samples_action_and_class_label_for_single_gt_value(curr_bbox, img_path, 
                                                                                                                                config.ONLINE_FINETUNE_NUM_SAMPLES, 
                                                                                                                                config.RL_POS_SAMPS_THRESHOLD,
                                                                                                                                config.RL_NEG_SAMPS_THRESHOLD)
                    
                    
                    '''
                    While tuning weights using online mode, we use generated samples from the last 20 frames, therefore if the len of samples saved is greater 
                    than 20, then we have to remove the first added item and the recent item get appended at the end.
                    '''
                    if len(self.pos_samples_for_online_finetune) > 19:
                        del self.pos_samples_for_online_finetune[0]
                    
                    if len(self.neg_samples_for_online_finetuen) > 19:
                        del self.neg_samples_for_online_finetuene[0]
                        
                    self.pos_samples_for_online_finetune.extend([pos_samples])
                    self.neg_samples_for_online_finetune.extend([neg_samples])
                    curr_bbox = bbox_values.copy()
                    action_histories = action_history.copy()
                else:
                    '''perform redetection'''
                    self.is_tracked = False
                    max_bbox_value = self.redetect_the_target_position()
                    curr_bbox = max_bbox_value.copy()
                    action_histories = action_history.copy()
                    
            if index % config.ONLINE_FINETUNE_FREQUENCY == 0 and index > 19:
                self.update_weights()
            