# Batch Dataset Renaming

### Init

In [1]:
import os
import shutil
import pandas as pd


In [2]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!rm -rf /content/sample_data

In [13]:
def save_on_cloud(source: str, destination: str):
    """
    Saves a folder to a cloud storage location (e.g., Google Drive in Colab).

    Args:
        source (str): The path to the source folder.
        destination (str): The path to the destination folder (in cloud storage).
    """
    # 0. Input Validation (Assertions)
    assert isinstance(source, str), "Source must be a string."
    assert isinstance(destination, str), "Destination must be a string."

    try:
        # 1. Verify Source Folder
        if not os.path.exists(destination):
            os.makedirs(destination)

        # 2. Copy the Folder
        shutil.copytree(source, destination, dirs_exist_ok=True)
        print("✅ Folder copied successfully:\n  ",source,"\n  -->",destination)

    except Exception as e:
        print(f"❌ An error occurred: {e}")

## Group files for processing

In [5]:
# Define source and destination paths
source_folder = '/content/drive/MyDrive/Dataset'
destination_folder = '/content/Files'

# Create the destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

def move_files(source, destination):
    """Recursively moves all files from source to destination."""
    for root, _, files in os.walk(source):
        for file in files:
            source_path = os.path.join(root, file)
            destination_path = os.path.join(destination, file)
            try:
                shutil.copy2(source_path, destination_path) #copy2 preserves metadata
                #shutil.move(source_path, destination_path) #use move instead of copy if you want to delete originals.
                print(f"Moved: {file}")
            except Exception as e:
                print(f"Error moving {file}: {e}")

# Move files
move_files(source_folder, destination_folder)

print("File transfer complete.")

Moved: .DS_Store
Moved: .DS_Store
Moved: DJI_0412.JPG
Moved: DJI_0414.JPG
Moved: DJI_0413.JPG
Moved: DJI_0391.JPG
Moved: DJI_0390.JPG
Moved: DJI_0392.JPG
Moved: DJI_0371.JPG
Moved: DJI_0373.JPG
Moved: DJI_0372.JPG
Moved: DJI_0375.JPG
Moved: DJI_0374.JPG
Moved: DJI_0377.JPG
Moved: DJI_0376.JPG
Moved: DJI_0382.JPG
Moved: DJI_0380.JPG
Moved: DJI_0381.JPG
Moved: DJI_0379.JPG
Moved: DJI_0378.JPG
Moved: DJI_0383.JPG
Moved: DJI_0385.JPG
Moved: DJI_0386.JPG
Moved: DJI_0384.JPG
Moved: DJI_0388.JPG
Moved: DJI_0387.JPG
Moved: DJI_0389.JPG
Moved: DJI_0396.JPG
Moved: DJI_0393.JPG
Moved: DJI_0395.JPG
Moved: DJI_0394.JPG
Moved: DJI_0397.JPG
Moved: DJI_0399.JPG
Moved: DJI_0402.JPG
Moved: DJI_0400.JPG
Moved: DJI_0403.JPG
Moved: DJI_0398.JPG
Moved: DJI_0401.JPG
Moved: DJI_0404.JPG
Moved: DJI_0409.JPG
Moved: DJI_0408.JPG
Moved: DJI_0407.JPG
Moved: DJI_0405.JPG
Moved: DJI_0406.JPG
Moved: DJI_0411.JPG
Moved: DJI_0410.JPG
Moved: DJI_0284.JPG
Moved: DJI_0283.JPG
Moved: DJI_0285.JPG
Moved: DJI_0288.JPG
Moved:

# Batch Renaming for Lot Photo Codification
Automatically rename photo filenames in batch with the corresponding dataset codification for pictures of the same lot.

## Authentication

In [6]:
from google.colab import auth
auth.authenticate_user()

In [7]:
import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

## Importing reference names spreadsheet

In [8]:
spreadsheet = gc.open('Metadata')
worksheet = spreadsheet.get_worksheet(2)
# 0: primera hoja, 1: segunda hoja, 2: tercera hoja
rows = worksheet.get_all_values()

df = pd.DataFrame(rows[1:-1], columns=rows[0])  # Assuming the first row is the header

In [9]:
df.head(1)

Unnamed: 0,Owner,Lote,Parada,Captura,Archivo,Aro,Altura,Ángulo,folder,name,Conteo trad,Plantas/m2,Conteo foto,Plantas/m2.1,Diferencia,MSE,Nota
0,amll,209,2,6,DJI_0190,Amarillo,1,90,1m90,209_206_54,54,216,,,,,


In [10]:
df.tail(1)

Unnamed: 0,Owner,Lote,Parada,Captura,Archivo,Aro,Altura,Ángulo,folder,name,Conteo trad,Plantas/m2,Conteo foto,Plantas/m2.1,Diferencia,MSE,Nota
192,eryx,777,1,16,DJI_0336,Rojo,5,60,5m60,777_116_NA,,-,,,,,


# Renaming & Dataset grouping

In [11]:
!cp -r /content/Files /content/BU

In [21]:
def process_files(df, source_folder="/content/Files", destination_root="/content/Datasets"):
    """
    Processes files in the source folder based on information from a DataFrame.

    Args:
        df (pd.DataFrame): DataFrame containing file information (Archivo, name, folder).
        source_folder (str): Path to the source folder.
        destination_root (str): Root path for destination folders.
    """

    if not os.path.exists(source_folder):
        print(f"Warning: Source folder '{source_folder}' does not exist. Skipping.")
        return

    for filename in os.listdir(source_folder):
        filepath = os.path.join(source_folder, filename)

        if not os.path.isfile(filepath):
            continue  # Skip directories

        try:
            # Extract filename without extension
            base_filename, extension = os.path.splitext(filename)

            # Find matching row in DataFrame
            matching_row = df[df["Archivo"] == base_filename]

            if matching_row.empty:
                print(f"Warning: Filename '{filename}' not found in DataFrame. Skipping.")
                continue

            new_name = matching_row["name"].iloc[0]
            destination_folder = matching_row["folder"].iloc[0]

            if pd.isna(new_name) or not new_name:
                print(f"Warning: 'name' is empty for '{filename}'. Skipping.")
                continue

            if pd.isna(destination_folder) or not destination_folder:
                print(f"Warning: 'folder' is empty for '{filename}'. Skipping.")
                continue

            new_filename = f"{new_name}{extension}"
            destination_path = os.path.join(destination_root, destination_folder)
            new_filepath = os.path.join(destination_path, new_filename)

            # Create destination folder if it doesn't exist
            os.makedirs(destination_path, exist_ok=True)

            # Rename and move the file
            try:
                shutil.move(filepath, new_filepath)
                print(f"Moved '{filename}' to '{new_filepath}'")
            except Exception as move_error:
                print(f"Warning: Failed to move '{filename}' to '{new_filepath}': {move_error}. Skipping.")
                continue

            #verify if the origin file still exist
            if os.path.exists(filepath):
                print(f"Warning: Original file '{filepath}' still exists after move operation.")

        except Exception as e:
            print(f"Warning: An error occurred while processing '{filename}': {e}. Skipping.")

In [22]:
process_files(df)

Moved 'DJI_0191.JPG' to '/content/Datasets/1m60/209_407_50.JPG'
Moved 'DJI_0327.JPG' to '/content/Datasets/1m90/503_222_19.JPG'
Moved 'DJI_0186.JPG' to '/content/Datasets/3m90/209_206_54.JPG'
Moved 'DJI_0362.JPG' to '/content/Datasets/2m90/503_222_54.JPG'
Moved 'DJI_0240.JPG' to '/content/Datasets/1m90/209_413_15.JPG'
Moved 'DJI_0142.JPG' to '/content/Datasets/5m90/209_102_37.JPG'
Moved 'DJI_0153.JPG' to '/content/Datasets/1,5m50/209_104_37.JPG'
Moved 'DJI_0278.JPG' to '/content/Datasets/3m45/209_416_15.JPG'
Moved 'DJI_0242.JPG' to '/content/Datasets/2m90/209_413_15.JPG'
Moved 'DJI_0269.JPG' to '/content/Datasets/4,5m90/209_414_13.JPG'
Moved 'DJI_0277.JPG' to '/content/Datasets/3m60/209_416_15.JPG'
Moved 'DJI_0335.JPG' to '/content/Datasets/5m90/503_222_19.JPG'
Moved 'DJI_0378.JPG' to '/content/Datasets/4,5m90/503_326_54.JPG'
Moved 'DJI_0181.JPG' to '/content/Datasets/5,5m90/209_206_54.JPG'
Moved 'DJI_0183.JPG' to '/content/Datasets/4,5m90/209_206_54.JPG'
Moved 'DJI_0371.JPG' to '/cont

### Exporting folders

In [24]:
# Store every dataset on the cloud
save_on_cloud(source='/content/Datasets', destination='/content/drive/MyDrive/Datasets/')

✅ Folder copied successfully:
   /content/Datasets 
  --> /content/drive/MyDrive/Datasets/


# Batch Adding prefix/suffix
Allows adding a text at the beginning/end of every filename inside a folder.

In [None]:
def rename_images(folder_path, prefix="image"):
    # Check if the provided path exists and is a directory
    if not os.path.isdir(folder_path):
        print("Provided folder path does not exist or is not a directory.")
        return

    # List all files in the folder
    files = os.listdir(folder_path)

    # Filter for image files (jpg, png, jpeg, etc.)
    image_files = [f for f in files if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))]

    # Loop through each image and rename it
    for index, filename in enumerate(image_files):
        # Set new name with prefix and index
        new_name = f"{prefix}_{filename}"

        # Full paths for renaming
        old_path = os.path.join(folder_path, filename)
        new_path = os.path.join(folder_path, new_name)

        # Rename file
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} -> {new_name}")

    print("All images have been renamed successfully.")

In [None]:
parent = "/content/drive/MyDrive/Colab Notebooks/Imagenes"
folder = "/2,5m90"
path = parent+folder
print(path)

/content/drive/MyDrive/Colab Notebooks/Imagenes/Anegado


In [None]:
# Apply
rename_images(path, "AN")

DJI_0418.JPG
Renamed: DJI_0418.JPG -> AN_DJI_0418.JPG
DJI_0417.JPG
Renamed: DJI_0417.JPG -> AN_DJI_0417.JPG
DJI_0419.JPG
Renamed: DJI_0419.JPG -> AN_DJI_0419.JPG
All images have been renamed successfully.
