In [38]:
import os
import shutil

individual_db_list = ["lionfish-dataset", "turtles-dataset", "plastic_dataset", "white-sharks", "hammerhead-shark"]

# Définir le chemin pour les données agrégées
aggregated_data_path = r"./Data/aggregated_data/"

# Créer les répertoires nécessaires
os.makedirs(os.path.join(aggregated_data_path, 'train', 'images'), exist_ok=True)
os.makedirs(os.path.join(aggregated_data_path, 'train', 'labels'), exist_ok=True)
os.makedirs(os.path.join(aggregated_data_path, 'valid', 'images'), exist_ok=True)
os.makedirs(os.path.join(aggregated_data_path, 'valid', 'labels'), exist_ok=True)
os.makedirs(os.path.join(aggregated_data_path, 'test', 'images'), exist_ok=True)
os.makedirs(os.path.join(aggregated_data_path, 'test', 'labels'), exist_ok=True)

# Dictionnaire pour stocker les informations spécifiques à chaque dataset
dataset_info = {
    "lionfish-dataset": {"class_id": 0, "class_name": "lionfish"},
    "turtles-dataset": {"class_id": 1, "class_name": "turtle"},
    "plastic_dataset": {"class_id": None, "class_name": "plastic"},  # Special case for plastic dataset
    "white-sharks": {"class_id": 6, "class_name": "white_sharks"},
    "hammerhead-shark": {"class_id": 7, "class_name": "hammerhead_shark"}
}

# Fonction pour renommer les fichiers et copier les données
def copy_and_modify_data(image_dir, label_dir, output_image_dir, output_label_dir, class_id, class_name):
    image_files = os.listdir(image_dir)
    counter = 1  # Compteur pour les nouveaux noms de fichiers
    
    for image_file in image_files:
        image_path = os.path.join(image_dir, image_file)

        # Assurer que l'extension .jpg est remplacée correctement (uniquement à la fin)
        if image_file.endswith(".jpg"):
            label_path = os.path.join(label_dir, image_file.replace(".jpg", ".txt"))
            
            try:
                # Nouveau nom pour l'image basé sur le nom de la classe (lionfish_1, turtle_1, etc.)
                new_image_name = f"{class_name}_{counter}.jpg"
                new_image_path = os.path.join(output_image_dir, new_image_name)

                # Copier l'image vers le nouveau répertoire avec un nom simplifié
                shutil.copy(image_path, new_image_path)
                
                # Modifier et copier le label vers le nouveau répertoire avec le nouveau nom
                if os.path.exists(label_path):
                    new_label_name = new_image_name.replace(".jpg", ".txt")
                    with open(label_path, 'r') as label_file:
                        lines = label_file.readlines()

                    # Si class_id est None (cas spécial comme pour le plastic dataset), on garde le label d'origine
                    with open(os.path.join(output_label_dir, new_label_name), 'w') as new_label_file:
                        for line in lines:
                            if class_id is not None:
                                parts = line.strip().split()
                                parts[0] = str(class_id)  # Changer l'ID de classe si spécifié
                                new_label_file.write(" ".join(parts) + "\n")
                            else:
                                new_label_file.write(line)  # Ne pas modifier le label pour les datasets spécifiques

                else:
                    print(f"Label file not found: {label_path}")
                
                counter += 1  # Incrémenter le compteur pour le prochain fichier
            except Exception as e:
                print(f"Error processing file {image_file}: {e}")

# Fonction pour traiter tous les datasets
def process_datasets():
    for dataset in individual_db_list:
        # Obtenir les informations du dataset
        class_id = dataset_info[dataset]["class_id"]
        class_name = dataset_info[dataset]["class_name"]
        
        # Définir les chemins en fonction du dataset
        train_images = os.path.join("./Data", dataset, "train", "images")
        train_labels = os.path.join("./Data", dataset, "train", "labels")
        
        valid_images = os.path.join("./Data", dataset, "valid", "images")
        valid_labels = os.path.join("./Data", dataset, "valid", "labels")
        
        test_images = os.path.join("./Data", dataset, "test", "images")
        test_labels = os.path.join("./Data", dataset, "test", "labels")
        
        # Copier les données du dataset vers les données agrégées
        copy_and_modify_data(train_images, train_labels, 
                             os.path.join(aggregated_data_path, 'train', 'images'), 
                             os.path.join(aggregated_data_path, 'train', 'labels'), 
                             class_id, class_name)
        
        copy_and_modify_data(valid_images, valid_labels, 
                             os.path.join(aggregated_data_path, 'valid', 'images'), 
                             os.path.join(aggregated_data_path, 'valid', 'labels'), 
                             class_id, class_name)
        
        copy_and_modify_data(test_images, test_labels, 
                             os.path.join(aggregated_data_path, 'test', 'images'), 
                             os.path.join(aggregated_data_path, 'test', 'labels'), 
                             class_id, class_name)

# Lancer le traitement des datasets
process_datasets()

print("Les données agrégées ont été créées avec succès !")


Error processing file Invasive-Lionfish-Destroying-Our-Reefs---Florida-Sport-Fishing-TV---Conservation-Corner---YouTube---Google-Chrome-2021-11-05-21-33-25_mp4-0_jpg.rf.40600afb9a02ca3495a25e2caf581905.jpg: [Errno 2] No such file or directory: './Data\\lionfish-dataset\\train\\images\\Invasive-Lionfish-Destroying-Our-Reefs---Florida-Sport-Fishing-TV---Conservation-Corner---YouTube---Google-Chrome-2021-11-05-21-33-25_mp4-0_jpg.rf.40600afb9a02ca3495a25e2caf581905.jpg'
Error processing file Invasive-Lionfish-Destroying-Our-Reefs---Florida-Sport-Fishing-TV---Conservation-Corner---YouTube---Google-Chrome-2021-11-05-21-33-25_mp4-0_jpg.rf.7ca5265f5f0e950ead9bccfef22f9f57.jpg: [Errno 2] No such file or directory: './Data\\lionfish-dataset\\train\\images\\Invasive-Lionfish-Destroying-Our-Reefs---Florida-Sport-Fishing-TV---Conservation-Corner---YouTube---Google-Chrome-2021-11-05-21-33-25_mp4-0_jpg.rf.7ca5265f5f0e950ead9bccfef22f9f57.jpg'
Error processing file Invasive-Lionfish-Destroying-Our-Ree

In [33]:

#compter le nombre d'éléments dans un dossier
import os
import sys

def count_files(path):
    if not os.path.isdir(path):
        print("Le chemin spécifié n'est pas un dossier")
        sys.exit(1)
    files = os.listdir(path)
    print(len(files))
    
count_files(r"Data/lionfish-dataset/valid/images")
count_files(r"Data/lionfish-dataset/valid/labels")

count_files(r"Data/lionfish-dataset/train/images")
count_files(r"Data/lionfish-dataset/train/labels")

count_files(r"Data/turtles-dataset/valid/images")
count_files(r"Data/turtles-dataset/valid/labels")

count_files(r"Data/turtles-dataset/train/images")
count_files(r"Data/turtles-dataset/train/labels")


print()

count_files(r"Data/aggregated_data/train/images")
count_files(r"Data/aggregated_data/train/labels")
count_files(r"Data/aggregated_data/valid/images")
count_files(r"Data/aggregated_data/valid/labels")

112
112
786
786
12
12
84
84

653
653
95
95
