In [1]:
!pip install pandas
!pip install tensorflow
!pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.1.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.1.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.1.0 scikit-optimize-0.10.2


In [2]:
import gdown
import zipfile
import os
# UCF 4 class link
# shared_link = "https://drive.google.com/file/d/1HyXsNuW-P2yYEdtnTwU70CYCAFa-3Jse/view?usp=sharing"
# UCF 10 class link
shared_link = "https://drive.google.com/file/d/14z32aNr6m86q3TaV2chAoJG8cYfsF8nO/view?usp=sharing"
# Extract the file ID from the shared link
file_id = shared_link.split('/d/')[1].split('/')[0]
download_url = f"https://drive.google.com/uc?id={file_id}"
# Define the output path for the downloaded ZIP file
output_zip = "file.zip"
# Download the file
gdown.download(download_url, output_zip, quiet=False)
# Extract the ZIP file
output_folder = "extracted_files"
os.makedirs(output_folder, exist_ok=True)
with zipfile.ZipFile(output_zip, 'r') as zip_ref:
    zip_ref.extractall(output_folder)
print(f"Files extracted to: {output_folder}")
# List the extracted files
for root, dirs, files in os.walk(output_folder):
    for file in files:
        print(os.path.join(root, file))

Downloading...
From (original): https://drive.google.com/uc?id=14z32aNr6m86q3TaV2chAoJG8cYfsF8nO
From (redirected): https://drive.google.com/uc?id=14z32aNr6m86q3TaV2chAoJG8cYfsF8nO&confirm=t&uuid=49e6036f-3c18-45e7-90e6-fe4585ff9fc6
To: /content/file.zip
100%|██████████| 732M/732M [00:16<00:00, 44.0MB/s]


Files extracted to: extracted_files
extracted_files/Skiing/v_Skiing_g03_c07.avi
extracted_files/Skiing/v_Skiing_g19_c05.avi
extracted_files/Skiing/v_Skiing_g06_c05.avi
extracted_files/Skiing/v_Skiing_g07_c01.avi
extracted_files/Skiing/v_Skiing_g02_c03.avi
extracted_files/Skiing/v_Skiing_g10_c03.avi
extracted_files/Skiing/v_Skiing_g03_c02.avi
extracted_files/Skiing/v_Skiing_g13_c06.avi
extracted_files/Skiing/v_Skiing_g22_c03.avi
extracted_files/Skiing/v_Skiing_g15_c05.avi
extracted_files/Skiing/v_Skiing_g09_c05.avi
extracted_files/Skiing/v_Skiing_g21_c06.avi
extracted_files/Skiing/v_Skiing_g19_c01.avi
extracted_files/Skiing/v_Skiing_g18_c01.avi
extracted_files/Skiing/v_Skiing_g03_c06.avi
extracted_files/Skiing/v_Skiing_g18_c03.avi
extracted_files/Skiing/v_Skiing_g11_c04.avi
extracted_files/Skiing/v_Skiing_g14_c03.avi
extracted_files/Skiing/v_Skiing_g11_c02.avi
extracted_files/Skiing/v_Skiing_g11_c03.avi
extracted_files/Skiing/v_Skiing_g24_c05.avi
extracted_files/Skiing/v_Skiing_g01_c03.

In [3]:
import os
import math
import time
import json
import numpy as np
from sklearn.model_selection import train_test_split
import cv2

# Producer part
def error_callback(exc):
  raise Exception('Error while sendig data to kafka: {0}'.format(str(exc)))


def frames_extraction(video_path):
  # Store the video frames
  frames_list = []

  # Read the video
  video_reader = cv2.VideoCapture(video_path)

  # Get total number of frames (of this video)
  video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

  # Calculate the interval after which frames will be stored (the step)
  skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

  # Iterate over video-frames
  for frame_counter in  range(SEQUENCE_LENGTH):
    # Adjust the pointer of current frame
    video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

    # Read the corresponding frame
    success, frame = video_reader.read()

    if not success:
      break

    # Resize-normalize the frame and save it to the corresponding list
    resized_frame = cv2.resize(frame, (64, 64))
    frames_list.append(resized_frame)
    # normalized_frame = resized_frame / 255
    # frames_list.append(normalized_frame)

  video_reader.release()

  return frames_list


def create_dataset():
  if len(CLASSES_LIST) == 4:
    data_dir = "/content/extracted_files/UCF4"
  else:
    data_dir = "/content/extracted_files"
  # Lists that contain the extracted features, the labels and the paths of the videos
  features = []
  labels = []
  video_files_paths = []

  # Iterate through all (selected) classes
  for class_index, class_name in enumerate(CLASSES_LIST):
    print(f'Extracting Data of Class: {class_name}')

    # Get the videos that are contained in each (selected) class
    files_list = os.listdir(os.path.join(data_dir, class_name))
    for file_name in files_list:
      video_file_path = os.path.join(data_dir, class_name, file_name)
      frames = frames_extraction(video_file_path)
      if len(frames) == SEQUENCE_LENGTH:
        features.append(frames)
        labels.append(class_index)
        video_files_paths.append(video_file_path)

  # Convert lists to numpy arrays
  features = np.asarray(features)
  labels = np.asarray(labels)

  return features, labels, video_files_paths


try:
  with open('config_video.json') as json_file:
    config = json.load(json_file)
except:
  print("config_video.json not found")
  exit()
  args = sys.argv[1:]
SEQUENCE_LENGTH = config['sequence_length']
tmp_filter_train = config['stream_batch_train'] * SEQUENCE_LENGTH
tmp_filter_test = config['stream_batch_test'] * SEQUENCE_LENGTH
CLASSES_LIST = config['classes_list']
features, labels, video_files_paths = create_dataset()
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.25, shuffle=True)
received_images_reshaped = features_train
received_labels_decoded = labels_train
received_images_reshaped_test = features_test
received_labels_decoded_test = labels_test

Extracting Data of Class: WalkingWithDog
Extracting Data of Class: HorseRace
Extracting Data of Class: Diving
Extracting Data of Class: PushUps
Extracting Data of Class: Fencing
Extracting Data of Class: Punch
Extracting Data of Class: Skiing
Extracting Data of Class: MilitaryParade
Extracting Data of Class: Rowing
Extracting Data of Class: Billiards


In [5]:
from re import I
import gc
import json
import multiprocessing
import pickle
import socket
import sys
import random
import math
from datetime import datetime
import time
import json
from multiprocessing import Queue
import pandas as pd
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.datasets import fashion_mnist, mnist, cifar10, cifar100
from tensorflow.keras import layers, models
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam
from skopt import gbrt_minimize, gp_minimize
from skopt.utils import use_named_args
from skopt.space import Real, Categorical, Integer
from tensorflow.python.keras import backend as K
import matplotlib.pyplot as plt
import struct

try:
    with open('config_video.json') as json_file:
        config = json.load(json_file)
except:
    print("config_video.json not found")
    exit()

call_counter = 0
SEQUENCE_LENGTH = config['sequence_length']
tmp_filter_train = config['bo_data_train'] * SEQUENCE_LENGTH
tmp_filter_test = config['bo_data_test'] * SEQUENCE_LENGTH
size_of_batch = config['size_of_batch']
lr = config['lr']
# size_of_batch_low = config['size_of_batch_low']
# size_of_batch_high = config['size_of_batch_high']
# lr_low = config['lr_low']
# lr_high = config['lr_high']
num_of_conv_layers_low = config['num_of_conv_layers_low']
num_of_conv_layers_high = config['num_of_conv_layers_high']
num_of_pool_layers_low = config['num_of_pool_layers_low']
num_of_pool_layers_high = config['num_of_pool_layers_high']
num_of_dense_layers_low = config['num_of_dense_layers_low']
num_of_dense_layers_high = config['num_of_dense_layers_high']
num_of_lstm_layers_low = config['num_of_lstm_layers_low']
num_of_lstm_layers_high = config['num_of_lstm_layers_high']
num_of_gru_layers_low = config['num_of_gru_layers_low']
num_of_gru_layers_high = config['num_of_gru_layers_high']
num_of_rnn_layers_low = config['num_of_rnn_layers_low']
num_of_rnn_layers_high = config['num_of_rnn_layers_high']
sample_size_low = config['sample_size_low']
sample_size_high = config['sample_size_high']
num_of_epochs_low = config['num_of_epochs_low']
num_of_epochs_high = config['num_of_epochs_high']
sampling_method_id = 1
acquisition_f = config['acquisition_f']
theta_parameter = config['theta_parameter']
lamda_acc = config['lamda_acc']
bo_call_number = config['bo_call_number']
DATASET_SHAPE = [64, 64, 3]
UNIQUE_CLASS_LABELS = range(len(config['classes_list']))
# Percentage of the dataset that will be used for testing
perc_test = 1

# tunnel of 65433
streamlit_tunnel_addr = "0.tcp.eu.ngrok.io"
streamlit_tunnel_port = 12092

# tunnel of 65435
streamlit_live_tunnel_addr = "6.tcp.eu.ngrok.io"
streamlit_live_tunnel_port = 12403

# tunnel of 65436
sbto_run_tunnel_addr = "7.tcp.eu.ngrok.io"
sbto_run_tunnel_port = 18880

extra_results = []

dimensions = []
dimension_names = []
default_parameters = []
column_names = []
dim_sample_size = Real(low=sample_size_low, high=sample_size_high, name='sample_size')
dimensions.append(dim_sample_size)
dimension_names.append('sample_size')
column_names.append('Sample Size')
default_parameters.append(0.25)
dim_epochs_number = Integer(low=num_of_epochs_low, high=num_of_epochs_high, name='epochs_number')
dimensions.append(dim_epochs_number)
dimension_names.append('epochs_number')
column_names.append('Epochs')
default_parameters.append(5)
dim_conv_number = Integer(low=num_of_conv_layers_low, high=num_of_conv_layers_high, name='conv_number')
dimensions.append(dim_conv_number)
dimension_names.append('conv_number')
column_names.append('Conv')
default_parameters.append(2)
dim_pool_number = Integer(low=num_of_pool_layers_low, high=num_of_pool_layers_high, name='pool_number')
dimensions.append(dim_pool_number)
dimension_names.append('pool_number')
column_names.append('Pool')
default_parameters.append(2)
if num_of_lstm_layers_low != num_of_lstm_layers_high:
  dim_lstm_number = Integer(low=num_of_lstm_layers_low, high=num_of_lstm_layers_high, name='lstm_number')
  dimensions.append(dim_lstm_number)
  dimension_names.append('lstm_number')
  column_names.append('LSTM')
  default_parameters.append(1)
if num_of_gru_layers_low != num_of_gru_layers_high:
  dim_gru_number = Integer(low=num_of_gru_layers_low, high=num_of_gru_layers_high, name='gru_number')
  dimensions.append(dim_gru_number)
  dimension_names.append('gru_number')
  column_names.append('GRU')
  default_parameters.append(0)
if num_of_rnn_layers_low != num_of_rnn_layers_high:
  dim_rnn_number = Integer(low=num_of_rnn_layers_low, high=num_of_rnn_layers_high, name='rnn_number')
  dimensions.append(dim_rnn_number)
  dimension_names.append('rnn_number')
  column_names.append('RNN')
  default_parameters.append(0)
dim_dense_number = Integer(low=num_of_dense_layers_low, high=num_of_dense_layers_high, name='dense_number')
dimensions.append(dim_dense_number)
dimension_names.append('dense_number')
column_names.append('Dense')
default_parameters.append(1)
# dim_lr = Real(low=lr_low, high=lr_high, name='learning_rate')
# dim_batch_size = Integer(low=size_of_batch_low, high=size_of_batch_high, name='batch_size')
# dimensions = [dim_sample_size,
#               dim_epochs_number,
#               dim_conv_number,
#               dim_pool_number,
#               dim_lstm_number,
#               dim_gru_number,
#               dim_rnn_number,
#               dim_dense_number
#              ]
# default_parameters = [0.99, 4, 2, 2, 1, 0, 0, 1]
# default_parameters = [(sample_size_low + sample_size_high) / 2,
#                      (num_of_epochs_low + num_of_epochs_high) // 2,
#                      (num_of_conv_layers_low + num_of_conv_layers_high) // 2,
#                      (num_of_pool_layers_low + num_of_pool_layers_high) // 2,
#                      (num_of_lstm_layers_low + num_of_lstm_layers_high) // 2,
#                      (num_of_gru_layers_low + num_of_gru_layers_high) // 2,
#                      (num_of_rnn_layers_low + num_of_rnn_layers_high) // 2,
#                      (num_of_dense_layers_low + num_of_dense_layers_high) // 2]


CONV_PADDING = 'same'
MAX_POOL_PADDING = 'same'
CONV_NEURONS_CONST = 16
CONV_NEURONS_BOUND = 64
DENSE_NEURONS_CONST = 128
DENSE_NEURONS_BOUND = 32
UNITS_CONST = 32
UNITS_BOUND = 32
streamlit_live_socket = None

def start_controller():
    """
    Initialize the socket and listen for keyboard input
    """
    # Open config file and get the desired port for socket communication
    try:
        with open('config_video.json') as json_file:
            config = json.load(json_file)
    except:
        print("config_video.json not found")
        exit()
    # Initialize an IPv4 socket with TCP (default) and try to connect to the nn
    streamlit_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    streamlit_socket.connect((streamlit_tunnel_addr, streamlit_tunnel_port))
    streamlit_live_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    streamlit_live_socket.connect((streamlit_live_tunnel_addr, streamlit_live_tunnel_port))
    return streamlit_socket, streamlit_live_socket, config

def start_bo(config, streamlit_socket, unique_class_labels, received_images_reshaped, received_labels_decoded, received_images_reshaped_test, received_labels_decoded_test, dataset_shape):
    global call_counter
    call_counter = 0
    global extra_results
    extra_results = []
    # Get unique labels in our (received) training dataset
    unique_class_labels = np.unique(received_labels_decoded)
    for i in range(1):
        # Get the new number of epochs from the keyboard
        data, my_stats = bo_res(unique_class_labels, received_images_reshaped, received_labels_decoded, received_images_reshaped_test, received_labels_decoded_test, dataset_shape)
        serialized_df = pickle.dumps(my_stats)
        streamlit_socket.sendall(serialized_df)
    # # # Close socket on exit
    # streamlit_socket.close()
    # streamlit_live_socket.close()
    # print("Sockets Closed")

# @use_named_args(dimensions = dimensions)
def fitness(*args):
    args = args[0]
    sample_size, epochs_number, conv_number, pool_number, dense_number, lstm_number, gru_number, rnn_number = [0, 0, 0, 0, 0, 0, 0, 0]
    for i, dim_name in enumerate(dimension_names):
      if dim_name=='sample_size':
        sample_size = args[i]
      elif dim_name=='epochs_number':
        epochs_number = args[i]
      elif dim_name=='conv_number':
        conv_number = args[i]
      elif dim_name=='pool_number':
        pool_number = args[i]
      elif dim_name=='dense_number':
        dense_number = args[i]
      elif dim_name=='lstm_number':
        lstm_number = args[i]
      elif dim_name=='gru_number':
        gru_number = args[i]
      elif dim_name=='rnn_number':
        rnn_number = args[i]
    print()
    print(f"EPOCHS to: {epochs_number} \n SAMPLE RATE to: {sample_size} \n NUM_OF_CONV_LAYERS to: {conv_number} \n NUM_OF_POOL_LAYERS to: {pool_number} \n NUM_OF_DENSE_LAYERS to: {dense_number} \n NUM_OF_LSTM_LAYERS to: {lstm_number} \n NUM_OF_GRU_LAYERS to: {gru_number} \n NUM_OF_RNN_LAYERS to: {rnn_number}")
    print()
    global call_counter
    call_counter += 1
    input_str = f"CALL: {call_counter}/{bo_call_number} \n EPOCHS to: {epochs_number} \n SAMPLE RATE to: {sample_size} \n NUM_OF_CONV_LAYERS to: {conv_number} \n NUM_OF_POOL_LAYERS to: {pool_number} \n NUM_OF_DENSE_LAYERS to: {dense_number}\n NUM_OF_LSTM_LAYERS to: {lstm_number} \n NUM_OF_GRU_LAYERS to: {gru_number} \n NUM_OF_RNN_LAYERS to: {rnn_number}\n"
    # Call the sampling method
    x_train, y_train = sampling_method(sampling_method_id, received_images_reshaped, received_labels_decoded, sample_size)

    layers_lst = []
    if conv_number > 0:
      if conv_number > pool_number:
        for i in range(0, conv_number - pool_number):
          layers_lst.append('conv')
        for i in range(0, pool_number):
          layers_lst.append('conv')
          layers_lst.append('pool')
      elif conv_number == pool_number:
        for i in range(0, conv_number):
          layers_lst.append('conv')
          layers_lst.append('pool')
      else:
        for i in range(0, conv_number):
          layers_lst.append('conv')
          layers_lst.append('pool')
        for i in range(conv_number, pool_number):
          layers_lst.append('pool')
    if lstm_number > 0:
      for i in range(0, lstm_number):
        layers_lst.append('lstm')
    if gru_number > 0:
      for i in range(0, gru_number):
        layers_lst.append('gru')
    if rnn_number > 0:
      for i in range(0, rnn_number):
        layers_lst.append('rnn')
    if dense_number > 0:
      for i in range(0, dense_number):
        layers_lst.append('dense')
    print(f"---------------------------->{layers_lst}")
    q = Queue()
    process_eval = multiprocessing.Process(target=my_evaluate, args=(q, x_train, y_train, received_images_reshaped_test, received_labels_decoded_test, layers_lst, epochs_number, lr, size_of_batch, CONV_NEURONS_CONST, UNITS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, UNITS_BOUND, DENSE_NEURONS_BOUND, input_str))
    process_eval.start()
    test_acc, tr_time, train_loss, train_acc, tradeOff_metric = q.get()
    process_eval.join()


    # Print the results.
    print()
    print("Accuracy (on the testing dataset): {0:.2%}".format(test_acc))
    print(f"Training time: ", tr_time)
    print(tradeOff_metric)
    print()
    # Store the accuracy and the training speed of the corresponding model in order to be printed in the final cell
    tmp = [test_acc, tr_time, train_loss, train_acc]
    extra_results.append(tmp)
    # Delete the Keras model with these hyper-parameters from memory.
    K.clear_session()
    gc.collect()
    del x_train
    del y_train
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    tf.compat.v1.reset_default_graph()
    return -tradeOff_metric


def bo_res(unique_class_labels, received_images_reshaped, received_labels_decoded, received_images_reshaped_test, received_labels_decoded_test, dataset_shape):

    gp_result = gp_minimize(func=fitness,
                            dimensions = dimensions,
                            n_calls = bo_call_number,
                            acq_func = acquisition_f,
                            noise = "gaussian",
                            n_jobs = -1,
                            x0 = default_parameters)
    all_cols = ['Sample Size', 'Epochs', 'Conv', 'Pool', 'Dense', 'LSTM', 'GRU', 'RNN']
    df_extra = pd.DataFrame(extra_results, columns=["Accuracy", "Training Speed (sec)", "Loss Epoch", "Acc Epoch"])
    df_zero = pd.DataFrame(np.zeros((bo_call_number, len(all_cols)), dtype=int), columns=all_cols)
    df_res = pd.DataFrame(gp_result.x_iters, columns=column_names)
    for col in df_zero.columns:
      if col in df_res.columns:
        df_zero[col] = df_res[col]
    print(df_res)
    print(df_zero)
    pd_tmp = pd.concat([df_zero, (pd.Series(gp_result.func_vals * -1, name="Score"))], axis=1)
    final_result = pd.concat([pd_tmp, df_extra], axis=1)
    final_result['Sample Size'] = final_result['Sample Size'].round(3)

    # print(gp_result.x)
    all_dim_names = ['sample_size', 'epochs_number', 'conv_number', 'pool_number', 'dense_number', 'lstm_number', 'gru_number', 'rnn_number']
    result = ['0', '0', '0', '0', '0', '0', '0', '0']
    for i, dim_name in enumerate(dimension_names):
      result[all_dim_names.index(dim_name)] = gp_result.x[i].__str__()

    print(f" NEW EPOCHS to: {result[1]} \n NEW SAMPLE RATE to: {result[0]} \n NUM_OF_CONV_LAYERS to: {result[2]} \n NUM_OF_POOL_LAYERS to: {result[3]} \n NUM_OF_DENSE_LAYERS to: {result[4]} \n NUM_OF_LSTM_LAYERS to: {result[5]} \n NUM_OF_GRU_LAYERS to: {result[6]} \n NUM_OF_RNN_LAYERS to: {result[7]}")

    tmp = result[1] +','+result[0]+','+result[2]+','+result[3]+','+result[4]+','+result[5]+','+result[6]+','+result[7]

    return tmp, final_result


# A function that prints the occurence of each class in a list
def print_times_per_label(lst, labels_all):
  # Get unique labels in our training dataset
  unique_labels = np.unique(labels_all)
  for i in range(0, len(unique_labels)):
    print("Class", unique_labels[i], "has", lst.count(i), "samples in our dataset...")

# Select k items from a stream of items-data

# A function to randomly select k items from stream[0..n-1].
def reservoir_sampling(stream, n, k):
  i = 0     # index for elements in stream[]

  # reservoir[] is the output array.
  # Initialize it with first k elements from stream[]
  reservoir = [0] * k

  for i in range(k):
    reservoir[i] = stream[i]

  # Iterate from the (k+1)th element to Nth element
  while(i < n):
    # Pick a random index from 0 to i.
    j = random.randrange(i+1)

    # If the randomly picked
    # index is smaller than k,
    # then replace the element
    # present at the index
    # with new element from stream
    if(j < k):
      reservoir[j] = stream[i]
    i+=1

  return reservoir

# A function that finds the size of each reservoir for every class depending on its occurence in the initial dataset
# and returns the unique labels that exist in our dataset along with the corresponding percentage
def reservoir_size_per_class(init_labels):

  # Get unique labels and their counts (how many times they appear) in our training dataset
  unique_labels, counts = np.unique(init_labels, return_counts = True)

  # Transform to list
  unique_labels_lst = unique_labels.tolist()
  counts_lst = counts.tolist()

  perc_per_class = []
  for i in range(len(unique_labels_lst)):
    perc_per_class.append(counts_lst[i]/len(init_labels))

  # print(perc_per_class)

  return perc_per_class, unique_labels_lst

def sampling_method(sampling_method_id, received_images_reshaped, received_labels_decoded, sample_size):
  # print("Percentage of filtering in our training dataset was set:")
  # print(sample_size)

  if sampling_method_id == 0:
    # Simple reservoir sampling over the whole training dataset
    # Total size of the stream (or training dataset)
    n_train = len(received_images_reshaped)

    # Number of samples that will be drawn
    k_train = int(n_train * sample_size)

    # Use the indexes of dataset in order to decide which samples will be drawn
    idx_tmp_train_list = list(range(0, n_train))

    # Find the indexes in order to construct the dataset that will be used during the training process
    idx_train = reservoir_sampling(idx_tmp_train_list, n_train, k_train)
  else:
    # Reservoir sampling in each class based on the number of samples (per class) that exist in the initial dataset
    # Find the size of each reservoir for every class depending on its occurence in the initial training dataset
    class_perc, unique_ids = reservoir_size_per_class(received_labels_decoded)

    # Stores the indexes (from all classes) in order to construct the dataset that will be used during the training process
    idx_train = []

    # Run for every single class the reservoir sampling seperately
    for i in range(0, len(unique_ids)):
      # Find the locations of each sample belonging to our class of interest
      tmp = np.where(np.asarray(received_labels_decoded) == unique_ids[i])
      idx_of_class = tmp[0].tolist()

      # Run the reservoir sampling for the class of interest
      sampled_idx_of_class = reservoir_sampling(idx_of_class, len(idx_of_class), int(len(received_images_reshaped) * sample_size * class_perc[i]))

      # Store the (sampled) samples from this class
      for j in range(0, len(sampled_idx_of_class)):
        idx_train.append(sampled_idx_of_class[j])

  # Store the corresponding images and labels from training dataset based on the sampled indexes
  train_images_lst = []
  for i in idx_train:
    train_images_lst.append(received_images_reshaped[i])

  train_labels_lst = []
  for i in idx_train:
    train_labels_lst.append(received_labels_decoded[i])

  # Check the occurence of each class in the final training dataset
  # print_times_per_label(train_labels_lst, received_labels_decoded)

  # Tranfsorm the lists that we stored our samples into arrays
  train_images = np.asarray(train_images_lst)
  train_labels = np.asarray(train_labels_lst)

  # Verify that the desired filtering was performed in both datasets
  # print("Training dataset before sampling:")
  # print(len(received_images_reshaped))
  # print(len(received_labels_decoded))
  # print("Training dataset after sampling:")
  # print(train_images.shape)
  # print(train_labels.shape)

  return train_images, train_labels


def my_evaluate(q, x_train, y_train, features_test, labels_test, layers_lst, epochs_number, learning_rate, batch_size, CONV_NEURONS_CONST, UNITS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, UNITS_BOUND, DENSE_NEURONS_BOUND, input_str):
  error_flag = -1

  try:
    # Function that creates the model
    model, *_ = create_model(layers_lst, 'dense', CONV_NEURONS_CONST, UNITS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, UNITS_BOUND, DENSE_NEURONS_BOUND)

    if model == -1:
      return -1000000

    # If the just-added-layer was conv or pool then add manually a flatten layer
    if 'lstm' not in layers_lst and 'gru' not in layers_lst and 'rnn' not in layers_lst and 'dense' not in layers_lst:
      model.add(Flatten())

    # Softmax is an activation function that is used mainly for classification tasks
    # It normalizes the input vector into a probability distribution  that is proportional to the exponential of the input numbers.
    model.add(tf.keras.layers.Dense(len(UNIQUE_CLASS_LABELS), activation = "softmax"))
  except ValueError:
    print("No valid input...:(")
    error_flag = 1

  if error_flag == -1:
    model.compile(optimizer=Adam(learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    short_model_summary = "\n".join(stringlist)

    # Here we reshape the input of the network based on the type of the first layer of the network
    # If the first layer is conv
    if layers_lst[0] == 'conv':
      reshaped_x_train = x_train
      reshaped_x_test = features_test
    # If the first layer is lstm-gru-rnn
    elif layers_lst[0] == 'lstm' or layers_lst[0] == 'gru' or layers_lst[0] == 'rnn':
      num_samples, num_frames, height, width, channels = x_train.shape
      reshaped_x_train = x_train.reshape(num_samples, num_frames, height * width * channels)
      num_samples, num_frames, height, width, channels = features_test.shape
      reshaped_x_test = features_test.reshape(num_samples, num_frames, height * width * channels)
    # If the first layer is dense
    else:
      num_samples, num_frames, height, width, channels = x_train.shape
      reshaped_x_train = x_train.reshape(num_samples, num_frames * height * width * channels)
      num_samples, num_frames, height, width, channels = features_test.shape
      reshaped_x_test = features_test.reshape(num_samples, num_frames * height * width * channels)
    start = time.time()
    np.int = int
    blackbox = model.fit(x=reshaped_x_train,
                        y=y_train,
                        epochs=epochs_number,
                        batch_size=batch_size
                        )
    stop = time.time()
    tr_loss_lst = blackbox.history['loss']
    tr_accuracy_lst = blackbox.history['accuracy']
    # Compute the training speed of this CNN architecture
    tr_time = stop - start

    # Compute the accuracy of our training model in the testing dataset
    test_loss, test_acc = model.evaluate(reshaped_x_test,  labels_test, verbose=2)

    # # Return the validation accuracy for the last epoch.
    # accuracy = blackbox.history['val_accuracy'][-1]

    # Compute the metric that captures the accuracy--speed tradeoff
    tradeOff_metric = lamda_acc * test_acc - (1 - lamda_acc) * math.tanh(tr_time/theta_parameter - 1)

    # Print the results.
    print()
    print("Accuracy (on the testing dataset): {0:.2%}".format(test_acc))
    print(f"Training time: ", tr_time)
    print(tradeOff_metric)
    print()

    tmp = "\nAccuracy (on the testing dataset): {0:.2%}".format(
        test_acc) + '\nTraining time:' + tr_time.__str__() + '\nTradeOff Metric:' + tradeOff_metric.__str__() + '\n\n'
    msg = pickle.dumps(input_str + short_model_summary + tmp)
    data_length = struct.pack('!I', len(msg))  # !I is for network order and unsigned int (4 bytes)

    streamlit_live_socket.sendall(data_length)
    streamlit_live_socket.sendall(msg)

    # Delete the Keras model with these hyper-parameters from memory.
    del model
    q.put([test_acc, tr_time, tr_loss_lst, tr_accuracy_lst, tradeOff_metric])
  else:
    q.put([0, 1000000000, 1000000000, 0, 0])


def create_model(layers_lst, layer2add, CONV_NEURONS_CONST, UNITS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND,
                 UNITS_BOUND, DENSE_NEURONS_BOUND):

    if layers_lst[0] == 'pool' or len(layers_lst) == 0:
        return -1

    # Initialize a sequential model
    model = tf.keras.models.Sequential()

    # Define the number of neurons for conv and dense layers and the number of units for lstm-gru-rnn
    conv_tmp2 = CONV_NEURONS_CONST
    units_tmp2 = UNITS_CONST
    dense_tmp2 = DENSE_NEURONS_CONST

    # Find the type of the next and the previous layer because you need different configurations
    for count, layer in enumerate(layers_lst):
        if count == 0 and len(layers_lst) > 1:
            previous_layer_tmp = 'no'
            next_layer_tmp = layers_lst[count + 1]
        elif count == 0:
            previous_layer_tmp = 'no'
            next_layer_tmp = 'no'
        elif count == len(layers_lst) - 1:
            next_layer_tmp = 'no'
            previous_layer_tmp = layers_lst[count - 1]
        else:
            previous_layer_tmp = layers_lst[count - 1]
            next_layer_tmp = layers_lst[count + 1]

        # Recreate the so-far-model
        # First layer conv
        if layer == 'conv' and count == 0:
            model.add(TimeDistributed(Conv2D(int(conv_tmp2), (3, 3), padding='same', activation='relu'),
                                      input_shape=(SEQUENCE_LENGTH, DATASET_SHAPE[0], DATASET_SHAPE[1], 3)))
            conv_tmp2 = conv_tmp2 * 2
        # First layer lstm-gru-rnn (change the shape of the input) and next or 2-be-added layer lstm-gru-rnn (should add the 'return conf')
        elif ((layer == 'lstm' or layer == 'gru' or layer == 'rnn') and (((count == 0) and len(
            layers_lst) == 1 and (layer2add == 'lstm' or layer2add == 'gru' or layer2add == 'rnn')) or (
                                                                             (count == 0) and (
                                                                             next_layer_tmp == 'lstm' or next_layer_tmp == 'gru' or next_layer_tmp == 'rnn')))):
            if layer == 'lstm':
                model.add(tf.keras.layers.LSTM(int(units_tmp2), return_sequences=True,
                                               input_shape=(SEQUENCE_LENGTH, DATASET_SHAPE[0] * DATASET_SHAPE[1] * 3)))
            elif layer == 'gru':
                model.add(tf.keras.layers.GRU(int(units_tmp2), return_sequences=True,
                                              input_shape=(SEQUENCE_LENGTH, DATASET_SHAPE[0] * DATASET_SHAPE[1] * 3)))
            else:
                model.add(tf.keras.layers.SimpleRNN(int(units_tmp2), return_sequences=True,
                                                    input_shape=(SEQUENCE_LENGTH, DATASET_SHAPE[0] * DATASET_SHAPE[1] * 3)))
            units_tmp2 = units_tmp2 / 2
        # First layer lstm-gru-rnn (change the shape of the input)
        elif ((layer == 'lstm' or layer == 'gru' or layer == 'rnn') and count == 0):
            if layer == 'lstm':
                model.add(tf.keras.layers.LSTM(int(units_tmp2),
                                               input_shape=(SEQUENCE_LENGTH, DATASET_SHAPE[0] * DATASET_SHAPE[1] * 3)))
            elif layer == 'gru':
                model.add(tf.keras.layers.GRU(int(units_tmp2),
                                              input_shape=(SEQUENCE_LENGTH, DATASET_SHAPE[0] * DATASET_SHAPE[1] * 3)))
            else:
                model.add(tf.keras.layers.SimpleRNN(int(units_tmp2),
                                                    input_shape=(SEQUENCE_LENGTH, DATASET_SHAPE[0] * DATASET_SHAPE[1] * 3)))
            units_tmp2 = units_tmp2 / 2
        # First layer densse (change the shape of the input)
        elif layer == 'dense' and count == 0:
            model.add(tf.keras.layers.Dense(int(dense_tmp2), activation='relu',
                                            input_shape=(SEQUENCE_LENGTH * DATASET_SHAPE[0] * DATASET_SHAPE[1] * 3,)))
            dense_tmp2 = dense_tmp2 / 2
        # For the remaining layers
        else:
            if layer == 'conv':
                # Add a conv layer by doubling its neurons if they do not violate our user-defined bound
                if conv_tmp2 <= CONV_NEURONS_BOUND:
                    model.add(TimeDistributed(Conv2D(int(conv_tmp2), (3, 3), padding='same', activation='relu')))
                    conv_tmp2 = conv_tmp2 * 2
                else:
                    model.add(
                        TimeDistributed(Conv2D(int(CONV_NEURONS_BOUND), (3, 3), padding='same', activation='relu')))
                    conv_tmp2 = CONV_NEURONS_BOUND
            elif layer == 'pool':
                # Add a pool layer
                model.add(TimeDistributed(MaxPooling2D((4, 4))))
            elif layer == 'lstm':
                # If the previous layer is conv or pool add a flatten layer first
                if previous_layer_tmp == 'conv' or previous_layer_tmp == 'pool':
                    model.add(TimeDistributed(BatchNormalization()))
                    model.add(TimeDistributed(Flatten()))
                # Add a lstm layer by reducing (* 0.5) its units if they do not violate our user-defined bound
                if units_tmp2 >= UNITS_BOUND:
                    # If the next layer is dense then do not return sequences
                    if next_layer_tmp == 'dense' or (layer2add == 'dense' and count == len(layers_lst) - 1):
                        model.add(tf.keras.layers.LSTM(int(units_tmp2)))
                    else:
                        model.add(tf.keras.layers.LSTM(int(units_tmp2), return_sequences=True))
                    units_tmp2 = units_tmp2 / 2
                else:
                    # If the next layer is dense then do not return sequences
                    if next_layer_tmp == 'dense' or (layer2add == 'dense' and count == len(layers_lst) - 1):
                        model.add(tf.keras.layers.LSTM(int(UNITS_BOUND)))
                    else:
                        model.add(tf.keras.layers.LSTM(int(UNITS_BOUND), return_sequences=True))
                    units_tmp2 = UNITS_BOUND
            elif layer == 'gru':
                # If the previous layer is conv or pool add a flatten layer first
                if previous_layer_tmp == 'conv' or previous_layer_tmp == 'pool':
                    model.add(TimeDistributed(BatchNormalization()))
                    model.add(TimeDistributed(Flatten()))
                # Add a gru layer by reducing (* 0.5) its units if they do not violate our user-defined bound
                if units_tmp2 >= UNITS_BOUND:
                    # If the next layer is dense then do not return sequences
                    if next_layer_tmp == 'dense' or (layer2add == 'dense' and count == len(layers_lst) - 1):
                        model.add(tf.keras.layers.GRU(int(units_tmp2)))
                    else:
                        model.add(tf.keras.layers.GRU(int(units_tmp2), return_sequences=True))
                    units_tmp2 = units_tmp2 / 2
                else:
                    # If the next layer is dense then do not return sequences
                    if next_layer_tmp == 'dense' or (layer2add == 'dense' and count == len(layers_lst) - 1):
                        model.add(tf.keras.layers.GRU(int(UNITS_BOUND)))
                    else:
                        model.add(tf.keras.layers.GRU(int(UNITS_BOUND), return_sequences=True))
                    units_tmp2 = UNITS_BOUND
            elif layer == 'rnn':
                # If the previous layer is conv or pool add a flatten layer first
                if previous_layer_tmp == 'conv' or previous_layer_tmp == 'pool':
                    model.add(TimeDistributed(BatchNormalization()))
                    model.add(TimeDistributed(Flatten()))
                # Add a rnn layer by reducing (* 0.5) its units if they do not violate our user-defined bound
                if units_tmp2 >= UNITS_BOUND:
                    # If the next layer is dense then do not return sequences
                    if next_layer_tmp == 'dense' or (layer2add == 'dense' and count == len(layers_lst) - 1):
                        model.add(tf.keras.layers.SimpleRNN(int(units_tmp2)))
                    else:
                        model.add(tf.keras.layers.SimpleRNN(int(units_tmp2), return_sequences=True))
                    units_tmp2 = units_tmp2 / 2
                else:
                    # If the next layer is dense then do not return sequences
                    if next_layer_tmp == 'dense' or (layer2add == 'dense' and count == len(layers_lst) - 1):
                        model.add(tf.keras.layers.SimpleRNN(int(UNITS_BOUND)))
                    else:
                        model.add(tf.keras.layers.SimpleRNN(int(UNITS_BOUND), return_sequences=True))
                    units_tmp2 = UNITS_BOUND
            else:
                if previous_layer_tmp == 'conv' or previous_layer_tmp == 'pool':
                    model.add(Flatten())
                # Add a dense layer by reducing (* 0.5) its neurons if they do not violate our user-defined bound
                if dense_tmp2 >= DENSE_NEURONS_BOUND:
                    model.add(tf.keras.layers.Dense(int(dense_tmp2), activation='relu'))
                    dense_tmp2 = dense_tmp2 / 2
                else:
                    model.add(tf.keras.layers.Dense(int(DENSE_NEURONS_BOUND), activation='relu'))
                    dense_tmp2 = DENSE_NEURONS_BOUND

    return model, conv_tmp2, units_tmp2, dense_tmp2

def run_subito_opt():
    streamlit_socket, streamlit_live_socket_tmp, config = start_controller()
    global streamlit_live_socket
    streamlit_live_socket = streamlit_live_socket_tmp
    start_bo(config, streamlit_socket, UNIQUE_CLASS_LABELS, received_images_reshaped, received_labels_decoded, received_images_reshaped_test, received_labels_decoded_test, DATASET_SHAPE)
    streamlit_socket.close()
    streamlit_live_socket.close()
    print("Sockets Closed")
    print("Returned")

def socket_listener(conn):
    print("Waiting for run signal...")
    while True:
        # Receive data from the socket
        try:
            data = conn.recv(10)

            print('raw data is', data)
            data = data.decode()
            print('decoded data is', data)
            if data == 'start':
                print("Received run signal")
                run_subito_opt()
                print("--------------Go subito Go--------------")
        except Exception as error:
            print("An exception occurred:", error)
            print("Disconnecting Socket")
            #run_socket.connect((sbto_run_tunnel_addr, sbto_run_tunnel_port))

if __name__ == "__main__":
    import warnings
    warnings.filterwarnings("ignore")
    try:
        with open('config_video.json') as json_file:
            config = json.load(json_file)
    except:
        print("config_video.json not found")
        exit()
    print("Received Training Data:")
    print("------> # of received videos:", len(received_images_reshaped))
    print("------> # of received labels:", len(received_labels_decoded))
    print("Received Testing Data:")
    print("------> # of received video:", len(received_images_reshaped_test))
    print("------> # of received labels:", len(received_labels_decoded_test))
    # Initialize an IPv4 socket with TCP (default) and try to connect to the nn
    run_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    run_socket.connect((sbto_run_tunnel_addr, sbto_run_tunnel_port))
    socket_listener(run_socket)

Received Training Data:
------> # of received videos: 1003
------> # of received labels: 1003
Received Testing Data:
------> # of received video: 335
------> # of received labels: 335
Waiting for run signal...
raw data is b'p'
decoded data is p
raw data is b'start'
decoded data is start
Received run signal

EPOCHS to: 5 
 SAMPLE RATE to: 0.25 
 NUM_OF_CONV_LAYERS to: 2 
 NUM_OF_POOL_LAYERS to: 2 
 NUM_OF_DENSE_LAYERS to: 1 
 NUM_OF_LSTM_LAYERS to: 1 
 NUM_OF_GRU_LAYERS to: 0 
 NUM_OF_RNN_LAYERS to: 0

---------------------------->['conv', 'pool', 'conv', 'pool', 'lstm', 'dense']
Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 28ms/step - accuracy: 0.1807 - loss: 2.2299
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.4938 - loss: 1.8274
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.5169 - loss: 1.4758
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

KeyboardInterrupt: 