In [None]:
import os
import subprocess

def download(url, path):

    # comando para fazer o download
    cmd = ['wget', '-q', url, '-O', '%s' %(path)]

    # executa o comando sem exibir a saída na tela
    processo = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # aguarda o término do download
    stdout, stderr = processo.communicate()

    # Verifica se o processo foi concluído com sucesso (código de retorno zero)
    if processo.returncode == 0:
        print(f'Processo finalizado.')
    else:
        print(f'Houve um erro: {stderr.decode("utf-8")}')

# especifica o local onde ficarao os arquivos
pathFiles = 'dados/'

# cria uma pasta onde ficarao os arquivos
if not os.path.isdir(pathFiles):
    os.mkdir(pathFiles)

url = 'https://www.nuscenes.org/data/v1.0-mini.tgz'
pathDataset1 = pathFiles + '/nuscenes_v1.0-mini.tgz'
download(url, pathDataset1)

Processo finalizado.


In [None]:
import tarfile

def descompacta(path, pathFolder):

    try:
        # Descompacta ao arquivo
        with tarfile.open(pathDataset1, 'r:gz') as tar:
          tar.extractall(path=pathFolder)

        print("Arquivo descompactado com sucesso!")
    except:
        print("Houve um erro ao tentar descompactar o arquivo")

pathFiles = pathFiles + "/nuscenes"

# Cria o diretório de extração se não existir
os.makedirs(pathFiles, exist_ok=True)

descompacta(pathDataset1, pathFiles)

Arquivo descompactado com sucesso!


In [None]:
!pip install nuscenes-devkit &> /dev/null  # Install nuScenes.

# Converte uma bounding box 3D para 2D

A função abaixo recebe as informações da bounding box 3D de um determinado objeto e as informações de calibração da câmera. Com base nisso, ela faz a conversão para 2D. Esse código foi baseado no notebook disponível nesse link:
- <https://github.com/asvath/mobile_robotics/blob/master/nuscenes%20extract%20and%20write%20out%202d%20annotation%20boxes-revised%20to%20truncate%20bb.ipynb>

In [None]:
import numpy as np
import pandas as pd
import cv2
from nuscenes.nuscenes import NuScenes

from pyquaternion import Quaternion

from nuscenes.utils.data_classes import Box
from nuscenes.utils.geometry_utils import view_points, BoxVisibility, box_in_image

import matplotlib.pyplot as plt

def threeD_2_twoD(boxsy,intrinsic): #input is a single annotation box
    '''
    given annotation boxes and intrinsic camera matrix
    outputs the 2d bounding box coordinates as a list (all annotations for a particular sample image)
    '''
    corners = boxsy.corners()
    x = corners[0,:]
    y = corners[1,:]
    z = corners[2,:]
    x_y_z = np.array((x,y,z))
    orthographic = np.dot(intrinsic,x_y_z)
    perspective_x = orthographic[0]/orthographic[2]
    perspective_y = orthographic[1]/orthographic[2]
    perspective_z = orthographic[2]/orthographic[2]

    min_x = int(np.min(perspective_x))
    max_x = int(np.max(perspective_x))
    min_y = int(np.min(perspective_y))
    max_y = int(np.max(perspective_y))

    return min_x,max_x,min_y,max_y

def getBoundBox(nusc, sample, camera, annotation_metadata):
    """
    Obtém a bounding box 3D e, depois, usando as informações da câmera,
    chama a função que converte para 2D
    """

    camera_token = sample['data'][camera]

    # Carregue os dados da câmera
    cam_data = nusc.get('sample_data', camera_token)

    cam_intrinsics = np.array(nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token'])['camera_intrinsic'])
    cam_height = cam_data['height']
    cam_width = cam_data['width']

    # obtenha a bounding box 3D do pedestre
    # a variavel ann foi filtrada a partir do token do pedestre. Portanto, o parametro translation contem
    # as coordenadas do pedestre em questao
    translation = annotation_metadata['translation']
    size = annotation_metadata['size']
    rotation = Quaternion(annotation_metadata['rotation'])
    bbox = Box(translation, size, rotation)

    # Obtenha a pose da câmera
    ego_pose = nusc.get('ego_pose', cam_data['ego_pose_token'])
    cam_pose = nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token'])

    # Transforme a bounding box do pedestre para o sistema de coordenadas da câmera
    bbox.translate(-np.array(ego_pose['translation']))
    bbox.rotate(Quaternion(ego_pose['rotation']).inverse)
    bbox.translate(-np.array(cam_pose['translation']))
    bbox.rotate(Quaternion(cam_pose['rotation']).inverse)

    # Verificar se a bounding box está dentro do campo de visão da câmera
    if not box_in_image(bbox, cam_intrinsics, imsize=[cam_width, cam_height], vis_level=BoxVisibility.ANY):
        return None, None, None, None

    # converts box into image plane
    min_x, max_x, min_y, max_y = threeD_2_twoD(bbox,cam_intrinsics)

    return min_x, max_x, min_y, max_y

A função abaixo aplica a conversão das bounding box para todas as anotações da base de dados. Além disso, guarda todas as informações em um dataframe.

In [None]:
def get_scenes(nusc):

    columns = ['scene_token', 'category_name', 'instance_token', 'translation', 'size', 'rotation']
    annotations_df = pd.DataFrame(columns=columns)

    dataList = []
    for scene in nusc.scene:
        scene_token = scene['token']
        print(f"Processing scene: {scene_token}")  # Adicione esta linha para depuração
        sample_token = scene['first_sample_token']

        while sample_token:
            sample = nusc.get('sample', sample_token)

            scene_token = sample['scene_token']
            timestamp = sample['timestamp']

            # coletar todos os caminhos das imagens da câmera
            for sensor_channel in ['CAM_FRONT', 'CAM_BACK', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT']:
                sample_data_token_camera = sample['data'][sensor_channel]
                sample_data_camera = nusc.get('sample_data', sample_data_token_camera)
                image_path = sample_data_camera['filename']


                for annotation_token in sample['anns']:
                    annotation_metadata = nusc.get('sample_annotation', annotation_token)
                    instance_metadata = nusc.get('instance', annotation_metadata['instance_token'])
                    category_metadata = nusc.get('category', instance_metadata['category_token'])

                    # visibility_token = annotation_metadata['visibility_token']

                    # obter descrição da visibilidade
                    # visibility = nusc.get('visibility', visibility_token)['description']

                    min_x, max_x, min_y, max_y = getBoundBox(nusc, sample, sensor_channel, annotation_metadata)

                    data = {
                        'sample_token_camera': sample_data_token_camera,
                        'sample_token': sample_token,
                        'scene_token': scene_token,  # Certifique-se de que `scene_token` está sendo atribuído aqui
                        'instance_token': annotation_metadata['instance_token'],
                        'category_name': category_metadata['name'],
                        'image_path': image_path,
                        'camera': sensor_channel,
                        'translation': annotation_metadata['translation'],
                        'size': annotation_metadata['size'],
                        'rotation': annotation_metadata['rotation'],
                        'timestamp': timestamp,
                        # 'box_visibility': visibility_token,
                        'bbox_min_x': min_x,
                        'bbox_max_x': max_x,
                        'bbox_min_y': min_y,
                        'bbox_max_y': max_y,
                    }

                    dataList.append(data)

            sample_token = sample['next']

    # converta para um dataframe
    annotations_df = pd.DataFrame(dataList)

    # Ordenar pelo campo 'timestamp'
    annotations_df = annotations_df.sort_values(by='timestamp')

    annotations_df.to_csv('annotations_with_scenes.csv', index=False)

    return annotations_df

Converte toda a base de dados e salva como .csv

In [None]:
if __name__ == "__main__":

    pathDataset = "/content/dados/nuscenes/"

    path = "dados/annotations_bbox2D.csv"

    nusc = NuScenes(version='v1.0-mini', dataroot=pathDataset, verbose=True)

    annotations_df = get_scenes(nusc)

    annotations_df.to_csv(path, index=False)

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.598 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.
Processing scene: cc8c0bf57f984915a77078b10eb33198
Processing scene: fcbccedd61424f1b85dcbf8f897f9754
Processing scene: 6f83169d067343658251f72e1dd17dbc
Processing scene: bebf5f5b2a674631ab5c88fd1aa9e87a
Processing scene: 2fc3753772e241f2ab2cd16a784cc680
Processing scene: c5224b9b454b4ded9b5d2d2634bbda8a
Processing scene: 325cef682f064c55a255f2625c533b75
Processing scene: d25718445d89453381c659b9c8734939
Processing scene: de7d80a1f5fb4c3e82ce8a4f213b450a
Processing scene: e233467e827140efa4b42d2b4c435855


# Filtra a base de dados

A função abaixo filtra apenas algumas categorias de interesse.


In [None]:
def filtraCategorias(annotations_df):
    # Definir as categorias de interesse - apenas pedestres
    categories_of_interest = [
        'human.pedestrian.adult',
        'human.pedestrian.child',
        'human.pedestrian.construction_worker',
        'human.pedestrian.personal_mobility',
        'human.pedestrian.police_officer'
    ]

    # Filtrar as anotações que correspondem a essas categorias
    filtered_annotations_df = annotations_df[annotations_df['category_name'].isin(categories_of_interest)]

    return filtered_annotations_df


A função abaixo remove as bounding box nulas. Depois, aplica as duas funções.

In [None]:
def remove_bbox_nula(df):
    """
    Remove as linhas onde a bounding box é nula
    """

    df = df.dropna(subset=['bbox_min_x', 'bbox_max_x', 'bbox_min_y', 'bbox_max_y'])

    return df


Aplica a função que seleciona algumas categorias e a função que remove as bounding box nulas.

In [None]:
if __name__ == "__main__":

  path_filtered = "dados/annotations_bbox2D_filtered.csv"

  # aplica a função que filtra as categorias desejadas
  annotations_df_filtered = filtraCategorias(annotations_df)

  # aplica a função que remove as bouding box nulas
  annotations_df_filtered = remove_bbox_nula(annotations_df_filtered)

  # salva o dataframe
  annotations_df_filtered.to_csv(path_filtered, index=False)

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, MaxPooling2D, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import random
import os

# Função para definir a seed global para reprodução dos resultados
def set_seed(seed_value=42):
    random.seed(seed_value)
    np.random.seed(seed_value)
    tf.random.set_seed(seed_value)

    # Configurações adicionais para reprodução consistente no TensorFlow
    os.environ['PYTHONHASHSEED'] = str(seed_value)  # Hash seed
    os.environ['TF_DETERMINISTIC_OPS'] = '1'  # Força operações determinísticas no TensorFlow

    # Configurações adicionais para TensorFlow
    # Definir configuração para limitar o uso de threads paralelos
    tf.config.threading.set_intra_op_parallelism_threads(1)
    tf.config.threading.set_inter_op_parallelism_threads(1)

# Chamar a função para definir a seed
set_seed()

In [None]:
# Função para calcular o IoU
def calcula_iou(bbox_real, bbox_predito):
    xA = max(bbox_real[0], bbox_predito[0])
    yA = max(bbox_real[2], bbox_predito[2])
    xB = min(bbox_real[1], bbox_predito[1])
    yB = min(bbox_real[3], bbox_predito[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    boxA_area = (bbox_real[1] - bbox_real[0] + 1) * (bbox_real[3] - bbox_real[2] + 1)
    boxB_area = (bbox_predito[1] - bbox_predito[0] + 1) * (bbox_predito[3] - bbox_predito[2] + 1)

    iou = interArea / float(boxA_area + boxB_area - interArea)
    return iou

def calcula_iou_medio(y_true, y_pred):
    iou_sum = 0
    count = len(y_true)

    for i in range(count):
        iou_sum += calcula_iou(y_true[i], y_pred[i])

    return iou_sum / count

In [None]:

# Carregar o DataFrame gerado com as anotações das bounding boxes
annotations_df = pd.read_csv('/content/dados/annotations_bbox2D_filtered.csv')

# Ordenar pelo 'instance_token' e 'timestamp' para garantir que os dados estejam na ordem correta
annotations_df = annotations_df.sort_values(by=['instance_token', 'timestamp'])

# Shift para obter o bbox do timestamp anterior e criar colunas de diferença
annotations_df['prev_bbox_min_x'] = annotations_df.groupby('instance_token')['bbox_min_x'].shift(1)
annotations_df['prev_bbox_max_x'] = annotations_df.groupby('instance_token')['bbox_max_x'].shift(1)
annotations_df['prev_bbox_min_y'] = annotations_df.groupby('instance_token')['bbox_min_y'].shift(1)
annotations_df['prev_bbox_max_y'] = annotations_df.groupby('instance_token')['bbox_max_y'].shift(1)

# Remove as linhas onde não temos dados prévios
annotations_df = annotations_df.dropna(subset=['prev_bbox_min_x', 'prev_bbox_max_x', 'prev_bbox_min_y', 'prev_bbox_max_y'])

# Definir as features (inputs) e a variável alvo (outputs)
X = annotations_df[['prev_bbox_min_x', 'prev_bbox_max_x', 'prev_bbox_min_y', 'prev_bbox_max_y',
                    'bbox_min_x', 'bbox_max_x', 'bbox_min_y', 'bbox_max_y']]
y = annotations_df[['bbox_min_x', 'bbox_max_x', 'bbox_min_y', 'bbox_max_y']]

# Converter para numpy arrays e ajustar a dimensão para o modelo RNN
X = X.values.reshape((X.shape[0], 1, X.shape[1]))
y = y.values

In [None]:
# Dividir em treino e teste mantendo a ordem temporal
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Função que cria o modelo RNN
def create_rnn_model(input_shape, hidden_units, activation, learning_rate):
    # Definir o input da rede
    input_layer = Input(shape=input_shape)

    # Adicionar camadas LSTM
    x = input_layer
    for units in hidden_units:
        x = LSTM(units=units, activation=activation, return_sequences=True)(x)

    # Adicionar a última camada LSTM sem "return_sequences"
    x = LSTM(units=hidden_units[-1], activation=activation)(x)

    # Camada densa final
    output_layer = Dense(4, activation='linear')(x)

    # Criar o modelo
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

# Função para testar os modelos com diferentes hiperparâmetros
def test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list):
    best_iou = 0
    best_params = None

    for hidden_units in hidden_units_list:
        for activation in activations:
            for learning_rate in learning_rates:
                for epochs in epochs_list:
                    print(f"Testing model with hidden_units={hidden_units}, activation={activation}, learning_rate={learning_rate}, epochs={epochs}")

                    # Criar o modelo
                    model = create_rnn_model(X_train.shape[1:], hidden_units, activation, learning_rate)

                    # Treinar o modelo
                    model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=0)

                    # Fazer previsões
                    y_pred = model.predict(X_test)

                    # Calcular o IoU médio
                    iou_medio = calcula_iou_medio(y_test, y_pred)
                    print(f"IoU médio: {iou_medio}")

                    # Atualizar os melhores parâmetros se necessário
                    if iou_medio > best_iou:
                        best_iou = iou_medio
                        best_params = (hidden_units, activation, learning_rate, epochs)

    print(f"Melhor IoU: {best_iou}")
    print(f"Melhores parâmetros: hidden_units={best_params[0]}, activation={best_params[1]}, learning_rate={best_params[2]}, epochs={best_params[3]}")

# Execução do código no ambiente Colab
set_seed()

# Definir os hiperparâmetros a serem testados
hidden_units_list = [
    (128, 64, 32, 16, 8), (64, 32, 16, 8), (32, 16, 8), (16, 8), (64, 64), (64, 32), (32, 32)]  # Diferentes tamanhos de camadas ocultas
activations = ['linear', 'relu', 'leaky_relu', 'selu', 'swish', 'mish']  # Funções de ativação
learning_rates = [0.0001, 0.001, 0.01]  # Diferentes taxas de aprendizado
epochs_list = [10, 20, 30, 40, 50]  # Diferentes números de épocas a serem testados

# Testar os modelos
test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list)

Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=10
IoU médio: 0.03241936945436609
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=20
IoU médio: 0.055836415512628336
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=30
IoU médio: 0.08151512334369597
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=40
IoU médio: 0.14631403312470484
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=50
IoU médio: 0.08042616252170141
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.001, epochs=10
IoU médio: 0.1486628654485841
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.001, epochs=20
IoU médio: 0.34003606770486233
Testing model with hidden_units=(128, 64, 32, 16, 

In [None]:
from tensorflow.keras.layers import GRU

In [None]:
# Dividir em treino e teste mantendo a ordem temporal
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Função que cria o modelo RNN (agora utilizando GRU)
def create_rnn_model(input_shape, hidden_units, activation, learning_rate):
    # Definir o input da rede
    input_layer = Input(shape=input_shape)

    # Adicionar camadas GRU
    x = input_layer
    for units in hidden_units:
        x = GRU(units=units, activation=activation, return_sequences=True)(x)

    # Adicionar a última camada GRU sem "return_sequences"
    x = GRU(units=hidden_units[-1], activation=activation)(x)

    # Camada densa final
    output_layer = Dense(4, activation='linear')(x)

    # Criar o modelo
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

# Função para testar os modelos com diferentes hiperparâmetros
def test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list):
    best_iou = 0
    best_params = None

    for hidden_units in hidden_units_list:
        for activation in activations:
            for learning_rate in learning_rates:
                for epochs in epochs_list:
                    print(f"Testing model with hidden_units={hidden_units}, activation={activation}, learning_rate={learning_rate}, epochs={epochs}")

                    # Criar o modelo
                    model = create_rnn_model(X_train.shape[1:], hidden_units, activation, learning_rate)

                    # Treinar o modelo
                    model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=0)

                    # Fazer previsões
                    y_pred = model.predict(X_test)

                    # Calcular o IoU médio
                    iou_medio = calcula_iou_medio(y_test, y_pred)
                    print(f"IoU médio: {iou_medio}")

                    # Atualizar os melhores parâmetros se necessário
                    if iou_medio > best_iou:
                        best_iou = iou_medio
                        best_params = (hidden_units, activation, learning_rate, epochs)

    print(f"Melhor IoU: {best_iou}")
    print(f"Melhores parâmetros: hidden_units={best_params[0]}, activation={best_params[1]}, learning_rate={best_params[2]}, epochs={best_params[3]}")

# Execução do código no ambiente Colab
set_seed()

# Definir os hiperparâmetros a serem testados
hidden_units_list = [
    (128, 64, 32, 16, 8), (64, 32, 16, 8), (32, 16, 8), (16, 8), (64, 64), (64, 32), (32, 32)]  # Diferentes tamanhos de camadas ocultas
activations = ['linear', 'relu', 'leaky_relu', 'selu', 'swish', 'mish']  # Funções de ativação
learning_rates = [0.0001, 0.001, 0.01]  # Diferentes taxas de aprendizado
epochs_list = [10, 20, 30, 40, 50]  # Diferentes números de épocas a serem testados

# Testar os modelos
test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list)

Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=10
IoU médio: 0.0027979637841374703
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=20
IoU médio: 0.005795452782118578
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=30
IoU médio: 0.008729198715110516
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=40
IoU médio: 0.017487609553004442
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.0001, epochs=50
IoU médio: 0.016465474590666225
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.001, epochs=10
IoU médio: 0.007857579452866744
Testing model with hidden_units=(128, 64, 32, 16, 8), activation=linear, learning_rate=0.001, epochs=20
IoU médio: 0.011298004973275228
Testing model with hidden_units=(128, 64, 

In [None]:
# Função para calcular o IoU
def calcula_iou(bbox_real, bbox_predito):
    xA = max(bbox_real[0], bbox_predito[0])
    yA = max(bbox_real[2], bbox_predito[2])
    xB = min(bbox_real[1], bbox_predito[1])
    yB = min(bbox_real[3], bbox_predito[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    boxA_area = (bbox_real[1] - bbox_real[0] + 1) * (bbox_real[3] - bbox_real[2] + 1)
    boxB_area = (bbox_predito[1] - bbox_predito[0] + 1) * (bbox_predito[3] - bbox_predito[2] + 1)

    iou = interArea / float(boxA_area + boxB_area - interArea)
    return iou

def calcula_iou_medio(y_true, y_pred):
    iou_sum = 0
    count = len(y_true)

    for i in range(count):
        iou_sum += calcula_iou(y_true[i], y_pred[i])

    return iou_sum / count

# Carregar o DataFrame gerado com as anotações das bounding boxes
annotations_df = pd.read_csv('/content/dados/annotations_bbox2D_filtered.csv')

# Ordenar pelo 'instance_token' e 'timestamp' para garantir que os dados estejam na ordem correta
annotations_df = annotations_df.sort_values(by=['instance_token', 'timestamp'])

# Shift para obter múltiplos bboxes anteriores e criar colunas de diferença
time_steps = 5  # Número de timestamps anteriores a serem usados como input
for i in range(1, time_steps + 1):
    annotations_df[f'prev_bbox_min_x_{i}'] = annotations_df.groupby('instance_token')['bbox_min_x'].shift(i)
    annotations_df[f'prev_bbox_max_x_{i}'] = annotations_df.groupby('instance_token')['bbox_max_x'].shift(i)
    annotations_df[f'prev_bbox_min_y_{i}'] = annotations_df.groupby('instance_token')['bbox_min_y'].shift(i)
    annotations_df[f'prev_bbox_max_y_{i}'] = annotations_df.groupby('instance_token')['bbox_max_y'].shift(i)

# Remove as linhas onde não temos dados suficientes dos timestamps anteriores
annotations_df = annotations_df.dropna(subset=[f'prev_bbox_min_x_{i}' for i in range(1, time_steps + 1)])

# Definir as features (inputs) e a variável alvo (outputs)
input_columns = [
    f'prev_bbox_min_x_{i}' for i in range(1, time_steps + 1)
] + [
    f'prev_bbox_max_x_{i}' for i in range(1, time_steps + 1)
] + [
    f'prev_bbox_min_y_{i}' for i in range(1, time_steps + 1)
] + [
    f'prev_bbox_max_y_{i}' for i in range(1, time_steps + 1)
]

X = annotations_df[input_columns]
y = annotations_df[['bbox_min_x', 'bbox_max_x', 'bbox_min_y', 'bbox_max_y']]

# Converter para numpy arrays e ajustar a dimensão para o modelo RNN
X = X.values.reshape((X.shape[0], time_steps, 4))
y = y.values

# Dividir em treino e teste mantendo a ordem temporal
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Função que cria o modelo RNN (agora utilizando GRU)
def create_rnn_model(input_shape, hidden_units, activation, learning_rate):
    # Definir o input da rede
    input_layer = Input(shape=input_shape)

    # Adicionar camadas GRU
    x = input_layer
    for units in hidden_units:
        x = GRU(units=units, activation=activation, return_sequences=True)(x)

    # Adicionar a última camada GRU sem "return_sequences"
    x = GRU(units=hidden_units[-1], activation=activation)(x)

    # Camada densa final
    output_layer = Dense(4, activation='linear')(x)

    # Criar o modelo
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

# Função para testar os modelos com diferentes hiperparâmetros
def test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list):
    best_iou = 0
    best_params = None

    for hidden_units in hidden_units_list:
        for activation in activations:
            for learning_rate in learning_rates:
                for epochs in epochs_list:
                    print(f"Testing model with hidden_units={hidden_units}, activation={activation}, learning_rate={learning_rate}, epochs={epochs}")

                    # Criar o modelo
                    model = create_rnn_model(X_train.shape[1:], hidden_units, activation, learning_rate)

                    # Treinar o modelo
                    model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=0)

                    # Fazer previsões
                    y_pred = model.predict(X_test)

                    # Calcular o IoU médio
                    iou_medio = calcula_iou_medio(y_test, y_pred)
                    print(f"IoU médio: {iou_medio}")

                    # Atualizar os melhores parâmetros se necessário
                    if iou_medio > best_iou:
                        best_iou = iou_medio
                        best_params = (hidden_units, activation, learning_rate, epochs)

    print(f"Melhor IoU: {best_iou}")
    print(f"Melhores parâmetros: hidden_units={best_params[0]}, activation={best_params[1]}, learning_rate={best_params[2]}, epochs={best_params[3]}")

# Execução do código no ambiente Colab
set_seed()

# Definir os hiperparâmetros a serem testados
hidden_units_list = [
    (128, 64, 32, 16, 8), (64, 32, 16, 8), (32, 16, 8), (16, 8), (64, 64), (64, 32), (32, 32)]  # Diferentes tamanhos de camadas ocultas
activations = ['linear', 'relu', 'leaky_relu', 'selu', 'swish', 'mish']  # Funções de ativação
learning_rates = [0.0001, 0.001, 0.01]  # Diferentes taxas de aprendizado
epochs_list = [10, 20, 30, 40, 50]  # Diferentes números de épocas a serem testados

# Testar os modelos
test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list)

In [None]:
# Função para calcular o IoU
def calcula_iou(bbox_real, bbox_predito):
    xA = max(bbox_real[0], bbox_predito[0])
    yA = max(bbox_real[2], bbox_predito[2])
    xB = min(bbox_real[1], bbox_predito[1])
    yB = min(bbox_real[3], bbox_predito[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    boxA_area = (bbox_real[1] - bbox_real[0] + 1) * (bbox_real[3] - bbox_real[2] + 1)
    boxB_area = (bbox_predito[1] - bbox_predito[0] + 1) * (bbox_predito[3] - bbox_predito[2] + 1)

    iou = interArea / float(boxA_area + boxB_area - interArea)
    return iou

def calcula_iou_medio(y_true, y_pred):
    iou_sum = 0
    count = len(y_true)

    for i in range(count):
        iou_sum += calcula_iou(y_true[i], y_pred[i])

    return iou_sum / count

# Carregar o DataFrame gerado com as anotações das bounding boxes
annotations_df = pd.read_csv('/content/dados/annotations_bbox2D_filtered.csv')

# Ordenar pelo 'instance_token' e 'timestamp' para garantir que os dados estejam na ordem correta
annotations_df = annotations_df.sort_values(by=['instance_token', 'timestamp'])

# Shift para obter múltiplos bboxes anteriores e criar colunas de diferença
time_steps = 5  # Número de timestamps anteriores a serem usados como input
for i in range(1, time_steps + 1):
    annotations_df[f'prev_bbox_min_x_{i}'] = annotations_df.groupby('instance_token')['bbox_min_x'].shift(i)
    annotations_df[f'prev_bbox_max_x_{i}'] = annotations_df.groupby('instance_token')['bbox_max_x'].shift(i)
    annotations_df[f'prev_bbox_min_y_{i}'] = annotations_df.groupby('instance_token')['bbox_min_y'].shift(i)
    annotations_df[f'prev_bbox_max_y_{i}'] = annotations_df.groupby('instance_token')['bbox_max_y'].shift(i)

# Remove as linhas onde não temos dados suficientes dos timestamps anteriores
annotations_df = annotations_df.dropna(subset=[f'prev_bbox_min_x_{i}' for i in range(1, time_steps + 1)])

# Definir as features (inputs) e a variável alvo (outputs)
input_columns = [
    f'prev_bbox_min_x_{i}' for i in range(1, time_steps + 1)
] + [
    f'prev_bbox_max_x_{i}' for i in range(1, time_steps + 1)
] + [
    f'prev_bbox_min_y_{i}' for i in range(1, time_steps + 1)
] + [
    f'prev_bbox_max_y_{i}' for i in range(1, time_steps + 1)
]

X = annotations_df[input_columns]
y = annotations_df[['bbox_min_x', 'bbox_max_x', 'bbox_min_y', 'bbox_max_y']]

# Converter para numpy arrays e ajustar a dimensão para o modelo RNN
X = X.values.reshape((X.shape[0], time_steps, 4))
y = y.values

# Dividir em treino e teste mantendo a ordem temporal
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Função que cria o modelo RNN
def create_rnn_model(input_shape, hidden_units, activation, learning_rate):
    # Definir o input da rede
    input_layer = Input(shape=input_shape)

    # Adicionar camadas LSTM
    x = input_layer
    for units in hidden_units:
        x = LSTM(units=units, activation=activation, return_sequences=True)(x)

    # Adicionar a última camada LSTM sem "return_sequences"
    x = LSTM(units=hidden_units[-1], activation=activation)(x)

    # Camada densa final
    output_layer = Dense(4, activation='linear')(x)

    # Criar o modelo
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

# Função para testar os modelos com diferentes hiperparâmetros
def test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list):
    best_iou = 0
    best_params = None

    for hidden_units in hidden_units_list:
        for activation in activations:
            for learning_rate in learning_rates:
                for epochs in epochs_list:
                    print(f"Testing model with hidden_units={hidden_units}, activation={activation}, learning_rate={learning_rate}, epochs={epochs}")

                    # Criar o modelo
                    model = create_rnn_model(X_train.shape[1:], hidden_units, activation, learning_rate)

                    # Treinar o modelo
                    model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=0)

                    # Fazer previsões
                    y_pred = model.predict(X_test)

                    # Calcular o IoU médio
                    iou_medio = calcula_iou_medio(y_test, y_pred)
                    print(f"IoU médio: {iou_medio}")

                    # Atualizar os melhores parâmetros se necessário
                    if iou_medio > best_iou:
                        best_iou = iou_medio
                        best_params = (hidden_units, activation, learning_rate, epochs)

    print(f"Melhor IoU: {best_iou}")
    print(f"Melhores parâmetros: hidden_units={best_params[0]}, activation={best_params[1]}, learning_rate={best_params[2]}, epochs={best_params[3]}")

# Execução do código no ambiente Colab
set_seed()

# Definir os hiperparâmetros a serem testados
hidden_units_list = [
    (128, 64, 32, 16, 8), (64, 32, 16, 8), (32, 16, 8), (16, 8), (64, 64), (64, 32), (32, 32)]  # Diferentes tamanhos de camadas ocultas
activations = ['linear', 'relu', 'leaky_relu', 'selu', 'swish', 'mish']  # Funções de ativação
learning_rates = [0.0001, 0.001, 0.01]  # Diferentes taxas de aprendizado
epochs_list = [10, 20, 30, 40, 50]  # Diferentes números de épocas a serem testados

# Testar os modelos
test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list)

In [None]:
# Função para calcular o IoU
def calcula_iou(bbox_real, bbox_predito):
    xA = max(bbox_real[0], bbox_predito[0])
    yA = max(bbox_real[2], bbox_predito[2])
    xB = min(bbox_real[1], bbox_predito[1])
    yB = min(bbox_real[3], bbox_predito[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    boxA_area = (bbox_real[1] - bbox_real[0] + 1) * (bbox_real[3] - bbox_real[2] + 1)
    boxB_area = (bbox_predito[1] - bbox_predito[0] + 1) * (bbox_predito[3] - bbox_predito[2] + 1)

    iou = interArea / float(boxA_area + boxB_area - interArea)
    return iou

def calcula_iou_medio(y_true, y_pred):
    iou_sum = 0
    count = len(y_true)

    for i in range(count):
        iou_sum += calcula_iou(y_true[i], y_pred[i])

    return iou_sum / count

# Carregar o DataFrame gerado com as anotações das bounding boxes
annotations_df = pd.read_csv('/content/dados/annotations_bbox2D_filtered.csv')

# Ordenar pelo 'instance_token' e 'timestamp' para garantir que os dados estejam na ordem correta
annotations_df = annotations_df.sort_values(by=['instance_token', 'timestamp'])

# Função para criar e testar os cenários
def run_scenario(time_steps, output_steps):
    # Shift para obter múltiplos bboxes anteriores e criar colunas de diferença
    for i in range(1, time_steps + 1):
        annotations_df[f'prev_bbox_min_x_{i}'] = annotations_df.groupby('instance_token')['bbox_min_x'].shift(i)
        annotations_df[f'prev_bbox_max_x_{i}'] = annotations_df.groupby('instance_token')['bbox_max_x'].shift(i)
        annotations_df[f'prev_bbox_min_y_{i}'] = annotations_df.groupby('instance_token')['bbox_min_y'].shift(i)
        annotations_df[f'prev_bbox_max_y_{i}'] = annotations_df.groupby('instance_token')['bbox_max_y'].shift(i)

    # Remove as linhas onde não temos dados suficientes dos timestamps anteriores
    annotations_df_filtered = annotations_df.dropna(subset=[f'prev_bbox_min_x_{i}' for i in range(1, time_steps + 1)])

    # Definir as features (inputs) e a variável alvo (outputs)
    input_columns = [
        f'prev_bbox_min_x_{i}' for i in range(1, time_steps + 1)
    ] + [
        f'prev_bbox_max_x_{i}' for i in range(1, time_steps + 1)
    ] + [
        f'prev_bbox_min_y_{i}' for i in range(1, time_steps + 1)
    ] + [
        f'prev_bbox_max_y_{i}' for i in range(1, time_steps + 1)
    ]

    output_columns = ['bbox_min_x', 'bbox_max_x', 'bbox_min_y', 'bbox_max_y']
    for i in range(1, output_steps):
        annotations_df_filtered[f'future_bbox_min_x_{i}'] = annotations_df_filtered.groupby('instance_token')['bbox_min_x'].shift(-i)
        annotations_df_filtered[f'future_bbox_max_x_{i}'] = annotations_df_filtered.groupby('instance_token')['bbox_max_x'].shift(-i)
        annotations_df_filtered[f'future_bbox_min_y_{i}'] = annotations_df_filtered.groupby('instance_token')['bbox_min_y'].shift(-i)
        annotations_df_filtered[f'future_bbox_max_y_{i}'] = annotations_df_filtered.groupby('instance_token')['bbox_max_y'].shift(-i)
        output_columns += [f'future_bbox_min_x_{i}', f'future_bbox_max_x_{i}', f'future_bbox_min_y_{i}', f'future_bbox_max_y_{i}']

    # Remove as linhas onde não temos dados suficientes dos timestamps futuros
    annotations_df_filtered = annotations_df_filtered.dropna(subset=[f'future_bbox_min_x_{i}' for i in range(1, output_steps)])

    X = annotations_df_filtered[input_columns]
    y = annotations_df_filtered[output_columns]

    # Converter para numpy arrays e ajustar a dimensão para o modelo RNN
    X = X.values.reshape((X.shape[0], time_steps, 4))
    y = y.values.reshape((y.shape[0], output_steps, 4))

    # Dividir em treino e teste mantendo a ordem temporal
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Testar os modelos
    print(f"\nRodando cenário com {time_steps} inputs e {output_steps} outputs")
    test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list)

# Função que cria o modelo RNN
def create_rnn_model(input_shape, hidden_units, activation, learning_rate):
    # Definir o input da rede
    input_layer = Input(shape=input_shape)

    # Adicionar camadas LSTM
    x = input_layer
    for units in hidden_units:
        x = LSTM(units=units, activation=activation, return_sequences=True)(x)

    # Adicionar a última camada LSTM sem "return_sequences"
    x = LSTM(units=hidden_units[-1], activation=activation)(x)

    # Camada densa final
    output_layer = Dense(input_shape[0] * 4, activation='linear')(x)
    output_layer = tf.reshape(output_layer, (-1, input_shape[0], 4))

    # Criar o modelo
    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

# Função para testar os modelos com diferentes hiperparâmetros
def test_rnn_models(X_train, X_test, y_train, y_test, hidden_units_list, activations, learning_rates, epochs_list):
    best_iou = 0
    best_params = None

    for hidden_units in hidden_units_list:
        for activation in activations:
            for learning_rate in learning_rates:
                for epochs in epochs_list:
                    print(f"Testing model with hidden_units={hidden_units}, activation={activation}, learning_rate={learning_rate}, epochs={epochs}")

                    # Criar o modelo
                    model = create_rnn_model(X_train.shape[1:], hidden_units, activation, learning_rate)

                    # Treinar o modelo
                    model.fit(X_train, y_train, epochs=epochs, batch_size=32, verbose=0)

                    # Fazer previsões
                    y_pred = model.predict(X_test)

                    # Calcular o IoU médio
                    iou_medio = calcula_iou_medio(y_test.reshape(-1, 4), y_pred.reshape(-1, 4))
                    print(f"IoU médio: {iou_medio}")

                    # Atualizar os melhores parâmetros se necessário
                    if iou_medio > best_iou:
                        best_iou = iou_medio
                        best_params = (hidden_units, activation, learning_rate, epochs)

    print(f"Melhor IoU: {best_iou}")
    print(f"Melhores parâmetros: hidden_units={best_params[0]}, activation={best_params[1]}, learning_rate={best_params[2]}, epochs={best_params[3]}")

# Execução do código no ambiente Colab
set_seed()

# Definir os hiperparâmetros a serem testados
hidden_units_list = [
    (128, 64, 32, 16, 8), (64, 32, 16, 8), (32, 16, 8), (16, 8), (64, 64), (64, 32), (32, 32)]  # Diferentes tamanhos de camadas ocultas
activations = ['linear', 'relu', 'leaky_relu', 'selu', 'swish', 'mish']  # Funções de ativação
learning_rates = [0.0001, 0.001, 0.01]  # Diferentes taxas de aprendizado
epochs_list = [10, 20, 30, 40, 50]  # Diferentes números de épocas a serem testados

# Executar os quatro cenários
run_scenario(5, 5)
run_scenario(10, 5)
run_scenario(5, 10)
run_scenario(10, 10)
run_scenario(5, 1)
run_scenario(10, 1)
run_scenario(15, 1)