# Clean data (filenames)

In [1]:
import os
from shutil import copy2

In [2]:
from pathlib import Path

In [3]:
root_src_dir = Path('..\data-first') # dir with original image files with random names.
root_dst_dir = Path('..\data-dev') # destination dir with logical image file names.

## Copy the folder structure

In [4]:
'''
- Iterate over existing folders
- Build the structure for the new folders based on existing ones
- Check, if the new folder structure does not exist
- If so, create new folder without files
'''
def copy_subfolders(src_path, dst_path):
    for dirpath, dirnames, filenames in os.walk(src_path):
        structure = os.path.join(dst_path, os.path.relpath(dirpath, src_path))
        if not os.path.isdir(structure):
            os.mkdir(structure)
            print(f'mkdir {structure}')
        else:
            print(f'{structure} does already exits.')

In [5]:
copy_subfolders(root_src_dir, root_dst_dir)

..\data-dev\. does already exits.
..\data-dev\alef-1 does already exits.
..\data-dev\bet-2 does already exits.
..\data-dev\dalet-5 does already exits.
..\data-dev\gimel-4 does already exits.
..\data-dev\he-6 does already exits.
..\data-dev\vet-3 does already exits.


## Copy files into another directory.

In [6]:
''' copy files into dst dir with a filename based on their parent directory name and an index number.'''
def copy_files(src_dir, dst_dir):
    src_p = Path(src_dir)
    dst_p = Path(dst_dir)
    letter,_ = os.path.basename(src_dir).split('-')
    
    for i,filename in enumerate(os.listdir(src_p)):
        src = src_p/filename
        dst = dst_p/f'{letter}-{i}.jpg'
        print(f'src: {src} ---> dst: {dst}')
        copy2(src, dst)

In [7]:
def get_subfolders(root_dir):
    return [Path(d.path) for d in os.scandir(root_dir) if d.is_dir() ]

In [8]:
for src_sf, dst_sf in zip(get_subfolders(root_src_dir), get_subfolders(root_dst_dir)):
    copy_files(src_sf, dst_sf)

src: ..\data-first\alef-1\14.png ---> dst: ..\data-dev\alef-1\alef-0.jpg
src: ..\data-first\alef-1\15.png ---> dst: ..\data-dev\alef-1\alef-1.jpg
src: ..\data-first\alef-1\16.png ---> dst: ..\data-dev\alef-1\alef-2.jpg
src: ..\data-first\alef-1\17.png ---> dst: ..\data-dev\alef-1\alef-3.jpg
src: ..\data-first\alef-1\18.png ---> dst: ..\data-dev\alef-1\alef-4.jpg
src: ..\data-first\alef-1\19.png ---> dst: ..\data-dev\alef-1\alef-5.jpg
src: ..\data-first\alef-1\20.png ---> dst: ..\data-dev\alef-1\alef-6.jpg
src: ..\data-first\alef-1\alef-0.jpg ---> dst: ..\data-dev\alef-1\alef-7.jpg
src: ..\data-first\alef-1\alef-1.jpg ---> dst: ..\data-dev\alef-1\alef-8.jpg
src: ..\data-first\alef-1\alef-10.jpg ---> dst: ..\data-dev\alef-1\alef-9.jpg
src: ..\data-first\alef-1\alef-11.jpg ---> dst: ..\data-dev\alef-1\alef-10.jpg
src: ..\data-first\alef-1\alef-12.jpg ---> dst: ..\data-dev\alef-1\alef-11.jpg
src: ..\data-first\alef-1\alef-13.jpg ---> dst: ..\data-dev\alef-1\alef-12.jpg
src: ..\data-first\a