Goal - Rename all images in a directory in a standardized numerical sequential format, and create an excel file to map old image names to new ones

Note: Assumes all images are in one format (Eg: JPG)

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Define-helper-functions-useful-for-movement" data-toc-modified-id="Define-helper-functions-useful-for-movement-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Define helper functions useful for movement</a></span></li></ul></div>

## Imports

In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [7]:
EXTENSION_IMAGES = ".JPG"

# Provide list of locations for which the fix needs to be applied - Each item in the list is a 3 item tuple containing a path, a prefix for each file names, and the location of the output text file
LIST_CONFIG = [
    ('/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/2. Renamed images/defective_lav',
     'def_lav_',
     '/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/Mapping Files/mapping_filenames_def_lav.csv'
     ),
    ('/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/2. Renamed images/defective_ldl',
     'def_ldl_',
     '/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/Mapping Files/mapping_filenames_def_ldl.csv'
     ),
    ('/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/2. Renamed images/non_defective_lav',
     'nondef_lav_',
     '/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/Mapping Files/mapping_filenames_nondef_lav.csv'
     ),
    ('/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/2. Renamed images/non_defective_ldl',
     'nondef_ldl_',
     '/Users/shyamravikumar/Documents/Workspace/BAI - Project/Input Files/Large/Mapping Files/mapping_filenames_nondef_ldl.csv'
     )
]

## Define helper functions useful for movement

In [8]:
def get_valid_images_in_path(path):
    """
    Return a list of valid images in a path
    
    Parameters
    ----------
        
        path: str
            Location where the images are stored
            
    Returns
    -------
        
        list of str: List of images obtained from the directory
        
    """
    from keras_preprocessing.image.utils import _iter_valid_files
    from keras_preprocessing.image.directory_iterator import DirectoryIterator
    
    list_files = list(
        _iter_valid_files(path,
                          DirectoryIterator.white_list_formats,
                          follow_links=False))
    list_files = list(map(lambda x: os.path.join(*x), list_files))
    return list_files

In [9]:
def rename(path_old,path_new):
    os.rename(path_old,path_new)

In [10]:
for path_input, prefix, path_mapping in LIST_CONFIG:
    #Read the folder path, prefix & path for storing the mapping file, for each item in LIST_CONFIG

    #Read all valid images in the path
    list_images = get_valid_images_in_path(path_input)
    num_images = len(list_images)

    #Convert the result to a dataframe, with a sequential identifier for each row
    data_mapping = pd.DataFrame({
        'path_input': list_images,
        'identifier_base': np.arange(1, num_images + 1)
    })

    #Identify the name of the renamed file
    data_mapping['path_output'] = data_mapping['identifier_base'].astype(
        'str').apply(lambda x: os.path.join(path_input, prefix + x +
                                            EXTENSION_IMAGES))

    data_mapping['filename_input'] = data_mapping['path_input'].apply(
        os.path.basename)
    data_mapping['filename_output'] = data_mapping['path_output'].apply(
        os.path.basename)

    #Apply the rename function on each row
    temp = data_mapping.apply(lambda x: rename(x['path_input'], x['path_output'
                                                                  ]),
                              axis=1)

    #Save the mapping file
    data_mapping.drop(columns=['identifier_base']).to_csv(path_mapping,
                                                          index=False)

One more thing to be done - Multiple categories for non-defective are not required. Hence, merge the folders