In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import shutil
from tqdm import tqdm

In [3]:
def copy_manual_data(source_dir, destination_dir):
    os.makedirs(destination_dir, exist_ok=True)
    for date in os.listdir(source_dir):
        src_date_path = os.path.join(source_dir, date)
        des_date_path = os.path.join(destination_dir, date)
        if not os.path.isdir(src_date_path): continue

        for species in tqdm(os.listdir(src_date_path),
                            desc=f'Loading {date} data'):
            src_species_path = os.path.join(src_date_path, species)
            des_species_path = os.path.join(des_date_path, species)
            if not os.path.isdir(src_species_path): continue

            for fresh_type in os.listdir(src_species_path):
                src_fresh_type_path = os.path.join(src_species_path, fresh_type)
                des_fresh_type_path = os.path.join(des_species_path, fresh_type)
                if not os.path.isdir(src_fresh_type_path): continue
                os.makedirs(des_fresh_type_path, exist_ok=True)

                for file in os.listdir(src_fresh_type_path):
                    src_filepath = os.path.join(src_fresh_type_path, file)
                    des_filepath = os.path.join(des_fresh_type_path, file)
                    if not os.path.isfile(src_filepath): continue
                    if not os.path.isfile(des_filepath):
                        shutil.copy(src_filepath, des_filepath)


# source_directory = "/content/drive/MyDrive/Shreyas/qZense Projects-20230629T064754Z-001/qZense Projects/Fish_Project/Fish Data - Nandu's/Manual Data"
# destination_directory = "/content/drive/MyDrive/Sowmya /qZense Dataset/Manual Data"

# copy_manual_data(source_directory, destination_directory)

In [5]:
def replace_misspelled_folder_names(species_name):
    misspelled_folders = {
        'Basa' : ['Basa', 'Basaa'],
        'Are' : ['Ar', 'Are'],
        'Barracuda' : ['Barcoda', 'Barkoda', 'Barracoda', 'Barracuda'],
        'Bolo' : ['Bolo', 'Bulo'],
        'Sea bass' : ['C boss', 'C boos'],
        'Chara pona' : ['Chara pana'],
        'Emperor' : ['Comprel', 'Emperor', 'Emporwel', 'Empowel',
                      'M perl', 'M preal'],
        'Demo' : ['Demo', 'Demo2', 'Test', 'Trial'],
        'Hilsa' : ['Hilsa', 'Hilis'],
        'Catla' : ['Katala', 'Katalaa', 'Katla'],
        'Croaker' : ['Kokor', 'Croaker'],
        'Lady' : ['Lady', 'Ledi'],
        'Malabar trevally' : ['Mabar tavili', 'Malbhot',
                              'Trvili', 'Trevally'],
        'Needle' : ['Needale', 'Nidal', 'Nidil'],
        'Parsi' : ['Parci'],
        'Pearl spot' : ['(bloch,', 'Bloch,', 'Bloch',
                        'Pearl spot', 'Pearls spot',
                        'Green chromide'],
        'Shol' : ['Sholo'],
        'Snapper' : ['Sinper', 'Sniper'],
        'White snapar' : ['White snapper'],
    }

    for key, misspellings in misspelled_folders.items():
        if species_name in misspellings:
            return key
    return species_name


def manual_data_to_final_data(source_dir, destination_dir):
    os.makedirs(destination_dir, exist_ok=True)
    for date in os.listdir(source_dir):
        date_path = os.path.join(source_dir, date)
        if not os.path.isdir(date_path): continue

        for species in tqdm(os.listdir(date_path),
                            desc=f'Loading {date} data'):
            species = species.capitalize()
            if species.endswith(" "):
                species = species[:-1]
            species = replace_misspelled_folder_names(species)

            src_species_path = os.path.join(date_path, species)
            des_species_path = os.path.join(destination_dir, species)
            if not os.path.isdir(src_species_path): continue

            for fresh_type in os.listdir(src_species_path):
                fresh_type = fresh_type.capitalize()
                if fresh_type.endswith(" "):
                    fresh_type = fresh_type[:-1]

                src_fresh_type_path = os.path.join(src_species_path, fresh_type)
                des_fresh_type_path = os.path.join(des_species_path, fresh_type)

                if not os.path.isdir(src_fresh_type_path): continue
                os.makedirs(des_fresh_type_path, exist_ok=True)

                for file in os.listdir(src_fresh_type_path):
                    src_filepath = os.path.join(src_fresh_type_path, file)
                    des_filepath = os.path.join(des_fresh_type_path, file)
                    if not os.path.isfile(src_filepath): continue
                    if not os.path.isfile(des_filepath):
                        shutil.copy(src_filepath, des_filepath)


source_directory = "/content/drive/MyDrive/Sowmya /qZense Dataset/Manual Data"
destination_directory = "/content/drive/MyDrive/Sowmya /qZense Dataset/Final Data"

manual_data_to_final_data(source_directory, destination_directory)

Loading 2023-06-08 data: 100%|██████████| 2/2 [00:00<00:00, 14.33it/s]
Loading 2023-06-09 data: 100%|██████████| 2/2 [00:00<00:00, 13.08it/s]
Loading 2023-06-20 data: 100%|██████████| 1/1 [00:00<00:00, 15.44it/s]
Loading 2023-06-22 data: 100%|██████████| 2/2 [00:00<00:00,  7.94it/s]
Loading 2023-07-25 data: 100%|██████████| 1/1 [00:00<00:00, 10.78it/s]
Loading 2023-05-19 data: 100%|██████████| 6/6 [00:00<00:00, 35.74it/s]
Loading 2023-05-20 data: 100%|██████████| 7/7 [00:00<00:00, 28.03it/s]
Loading 2023-05-18 data: 100%|██████████| 5/5 [00:00<00:00, 84.70it/s]
Loading 2023-05-23 data: 100%|██████████| 9/9 [00:00<00:00, 31.04it/s]
