<a href="https://colab.research.google.com/github/M-110/automate-the-boring-stuff/blob/main/10_Organizing_Files.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from pathlib import Path
import shutil

In [None]:
!touch file.txt
!mkdir my_folder

# Using shutil

## Copy

In [None]:
p = Path.cwd()
p

PosixPath('/content')

In [None]:
shutil.copy(p / 'file.txt', p / 'my_folder' / 'file_copy.txt')

PosixPath('/content/my_folder/file_copy.txt')

In [None]:
shutil.copytree(p, p / 'backup_content')

PosixPath('/content/backup_content')

## Move

In [None]:
!touch data.csv

In [None]:
shutil.move('data.csv', 'my_folder')

'my_folder/data.csv'

Rename by moving with different name

In [None]:
!touch great_data.csv

In [None]:
shutil.move('great_data.csv', Path('my_folder', 'wonderful_data.csv'))

PosixPath('my_folder/wonderful_data.csv')

## Delete files/folders

In [None]:
!touch delete_me

In [None]:
os.unlink('delete_me') # delete file

In [None]:
!mkdir old_dir

In [None]:
os.rmdir('old_dir') # delete folder

In [None]:
!mkdir photos
!touch photos/photo{1..10}.jpg
!mkdir photos/other_photos
!touch photos/other_photos/other_stuff{1..10}.jpeg
!ls photos

other_photos  photo1.jpg  photo3.jpg  photo5.jpg  photo7.jpg  photo9.jpg
photo10.jpg   photo2.jpg  photo4.jpg  photo6.jpg  photo8.jpg


In [None]:
shutil.rmtree('photos')

In [None]:
!ls photos

ls: cannot access 'photos': No such file or directory


## glob

In [None]:
!mkdir text_files
!touch text_files/data{01..10}.txt
!touch text_files/screenshot{01..10}.jpg

In [None]:
for text_file in Path('text_files').glob('*.txt'):
  print(text_file)

text_files/data10.txt
text_files/data07.txt
text_files/data09.txt
text_files/data06.txt
text_files/data02.txt
text_files/data08.txt
text_files/data01.txt
text_files/data03.txt
text_files/data05.txt
text_files/data04.txt


In [None]:
for screenshot in Path('text_files').glob('screenshot*'):
  print(screenshot)

text_files/screenshot06.jpg
text_files/screenshot07.jpg
text_files/screenshot02.jpg
text_files/screenshot08.jpg
text_files/screenshot01.jpg
text_files/screenshot04.jpg
text_files/screenshot10.jpg
text_files/screenshot05.jpg
text_files/screenshot09.jpg
text_files/screenshot03.jpg


In [None]:
for text_file in Path('text_files').glob('*.txt'):
  os.unlink(text_file)

In [None]:
!ls text_files

screenshot01.jpg  screenshot04.jpg  screenshot07.jpg  screenshot10.jpg
screenshot02.jpg  screenshot05.jpg  screenshot08.jpg
screenshot03.jpg  screenshot06.jpg  screenshot09.jpg


# send2trash

In [None]:
import send2trash

In [None]:
with open('cow.txt', 'a') as f:
  f.write('The cow!')

In [None]:
send2trash.send2trash('cow.txt')

## Walk

Walks through all the subdirectories and files within them

In [None]:
!mkdir app/bin -p
!mkdir app/lib
!mkdir app/files
!mkdir app/files/extra
!touch app/bin/bin{01..05}
!touch app/lib/my_lib app/lib/your_lib
!touch app/file_{01..05}.exe
!touch app/files/virus.exe
!touch app/files/extra/extra_file.py

In [None]:
for folder, subfolders, files in os.walk('app'):
  print(f'Folder: {folder}')
  
  for file in files:
    print(f'\tFile: {file}')

  for subfolder in subfolders:
    print(f'\t\tSubfolder: {subfolder}')

Folder: app
	File: file_04.exe
	File: file_03.exe
	File: file_01.exe
	File: file_02.exe
	File: file_05.exe
		Subfolder: bin
		Subfolder: lib
		Subfolder: files
Folder: app/bin
	File: bin01
	File: bin03
	File: bin04
	File: bin02
	File: bin05
Folder: app/lib
	File: your_lib
	File: my_lib
Folder: app/files
	File: virus.exe
		Subfolder: extra
Folder: app/files/extra
	File: extra_file.py


# Compress with zipfile

In [None]:
!zip -r app.zip app

  adding: app/ (stored 0%)
  adding: app/file_04.exe (stored 0%)
  adding: app/bin/ (stored 0%)
  adding: app/bin/bin01 (stored 0%)
  adding: app/bin/bin03 (stored 0%)
  adding: app/bin/bin04 (stored 0%)
  adding: app/bin/bin02 (stored 0%)
  adding: app/bin/bin05 (stored 0%)
  adding: app/file_03.exe (stored 0%)
  adding: app/file_01.exe (stored 0%)
  adding: app/lib/ (stored 0%)
  adding: app/lib/your_lib (stored 0%)
  adding: app/lib/my_lib (stored 0%)
  adding: app/file_02.exe (stored 0%)
  adding: app/file_05.exe (stored 0%)
  adding: app/files/ (stored 0%)
  adding: app/files/virus.exe (stored 0%)
  adding: app/files/extra/ (stored 0%)
  adding: app/files/extra/extra_file.py (stored 0%)


In [None]:
import zipfile

In [None]:
my_zip = zipfile.ZipFile('app.zip')

In [None]:
my_zip.namelist()

['app/',
 'app/file_04.exe',
 'app/bin/',
 'app/bin/bin01',
 'app/bin/bin03',
 'app/bin/bin04',
 'app/bin/bin02',
 'app/bin/bin05',
 'app/file_03.exe',
 'app/file_01.exe',
 'app/lib/',
 'app/lib/your_lib',
 'app/lib/my_lib',
 'app/file_02.exe',
 'app/file_05.exe',
 'app/files/',
 'app/files/virus.exe',
 'app/files/extra/',
 'app/files/extra/extra_file.py']

In [None]:
my_zip.getinfo('app/files/virus.exe').file_size

0

In [None]:
my_zip.close()

### unzipping

In [None]:
with zipfile.ZipFile('app.zip') as file:
  file.extractall('zip_output')

In [None]:
!ls zip_output

app


### zipping

In [None]:
!echo "Hello World" > hello.txt
!echo "Hola World" > hola.txt

In [None]:
with zipfile.ZipFile('new.zip', 'w') as file:
  file.write('hello.txt')
  file.write('hola.txt')

Append to zip file with a

In [None]:
!echo 'Bonjour World' > bonjour.txt

In [None]:
with zipfile.ZipFile('new.zip', 'a') as file:
  file.write('bonjour.txt')

In [None]:
with zipfile.ZipFile('new.zip') as file:
  print(file.namelist())

['hello.txt', 'hola.txt', 'bonjour.txt']


# Project: Renaming Files with American-Style Dates to European-Style

In [None]:
!mkdir date_folder
!mkdir date_folder/logs
!touch date_folder/logs/log_{01..12}-25-{2021..2022}
!mkdir date_folder/photos
!touch date_folder/photos/my_photo_05.{21..29}.2021
!touch date_folder/history_12_31_{2015..2021}

In [None]:
!rm date_folder -rfd

In [None]:
for folder, _, files in os.walk('date_folder'):
  for file in files:
    print(file)

history_12_31_2020
history_12_31_2016
history_12_31_2018
history_12_31_2017
history_12_31_2015
history_12_31_2019
history_12_31_2021
log_01-25-2022
log_05-25-2022
log_06-25-2021
log_01-25-2021
log_09-25-2021
log_03-25-2022
log_03-25-2021
log_07-25-2021
log_05-25-2021
log_12-25-2021
log_10-25-2021
log_09-25-2022
log_02-25-2021
log_08-25-2021
log_06-25-2022
log_08-25-2022
log_02-25-2022
log_10-25-2022
log_04-25-2021
log_12-25-2022
log_11-25-2022
log_04-25-2022
log_07-25-2022
log_11-25-2021
my_photo_05.28.2021
my_photo_05.21.2021
my_photo_05.22.2021
my_photo_05.29.2021
my_photo_05.26.2021
my_photo_05.24.2021
my_photo_05.25.2021
my_photo_05.23.2021
my_photo_05.27.2021


In [None]:
import os
from pathlib import Path
import re
import shutil

def change_dates_in_directory_from_american_to_european_style(directory):
  date_pattern = re.compile(r'(10|11|12|0[1-9])[\-|\.|\_](30|31|0[1-9]|[12][0-9])[\-|\.|\_]((?:19|20)\d\d)')
  for folder, _, files in os.walk(directory):
    for file in files:
      new_file_name = date_pattern.sub(r'\g<2>-\g<1>-\g<3>', file)
      shutil.move(Path(folder, file), Path(folder, new_file_name))

In [None]:
change_dates_in_directory_from_american_to_european_style('date_folder')

In [None]:
for folder, _, files in os.walk('date_folder'):
  for file in files:
    print(file)

history_31-12-2019
history_31-12-2018
history_31-12-2021
history_31-12-2020
history_31-12-2017
history_31-12-2016
history_31-12-2015
log_25-01-2021
log_25-08-2021
log_25-05-2021
log_25-03-2022
log_25-04-2021
log_25-09-2021
log_25-11-2022
log_25-04-2022
log_25-02-2021
log_25-10-2022
log_25-08-2022
log_25-10-2021
log_25-06-2021
log_25-02-2022
log_25-01-2022
log_25-06-2022
log_25-05-2022
log_25-12-2022
log_25-03-2021
log_25-12-2021
log_25-09-2022
log_25-07-2022
log_25-07-2021
log_25-11-2021
my_photo_25-05-2021
my_photo_21-05-2021
my_photo_23-05-2021
my_photo_22-05-2021
my_photo_27-05-2021
my_photo_28-05-2021
my_photo_26-05-2021
my_photo_29-05-2021
my_photo_24-05-2021


# Project: Backing up a folder into a ZIP file


In [None]:
import os
from pathlib import Path
import os
import zipfile

def backup_to_zip(directory):
  """Creates a backup zip file of the directory."""
  directory = os.path.abspath(directory)
  basename = os.path.basename(directory)
  files = ''.join(map(str,Path().cwd().glob(f'{basename}*.zip')))
  current_nums = re.findall(rf'{os.path.basename(directory)}(\d*).zip', files)
  if current_nums:
    max_num = max(map(int, current_nums))
  else:
    max_num = 0
  output_name = f'{basename}{max_num + 1:02}.zip'
  with zipfile.ZipFile(output_name, 'w') as zip_file:
    for folder, _, files in os.walk(directory):
      for file in files:
        zip_file.write(Path(folder, file))
  print(f'Saved as {output_name!r}')
    

In [None]:
backup_to_zip('date_folder')

Saved as 'date_folder01.zip'


In [None]:
backup_to_zip('date_folder')

Saved as 'date_folder02.zip'


In [None]:
backup_to_zip('date_folder')

Saved as 'date_folder03.zip'


In [None]:
!ls *.zip -lh

-rw-r--r-- 1 root root 3.0K Oct 13 16:07 app.zip
-rw-r--r-- 1 root root 6.2K Oct 13 17:16 date_folder01.zip
-rw-r--r-- 1 root root 6.2K Oct 13 17:16 date_folder02.zip
-rw-r--r-- 1 root root 6.2K Oct 13 17:16 date_folder03.zip
-rw-r--r-- 1 root root  343 Oct 13 16:15 new.zip
-rw-r--r-- 1 root root    0 Oct 13 17:16 test01.zip
-rw-r--r-- 1 root root    0 Oct 13 17:16 test02.zip
-rw-r--r-- 1 root root    0 Oct 13 17:16 test03.zip
-rw-r--r-- 1 root root    0 Oct 13 17:16 test04.zip
-rw-r--r-- 1 root root    0 Oct 13 17:16 test05.zip


# Practice Projects

## Selective Copy

In [None]:
%%writefile copy_files_with_extensions.py
#!/usr/bin/env python
"""Copies all files with the given file extensions to a new directory"""
import argparse
import os
from pathlib import Path
import shutil


def main():
  """Get CLI arguments and copy files to a new folder."""
  args = get_args()
  extension = args.extension
  source = Path(args.src)
  destination = Path(args.dest)

  if not destination.is_dir():
    destination.mkdir()

  files = [file
           for directory, *_ in os.walk(source)
           for file in Path(directory).glob('*' + extension)]

  for file in files:
    try:
      shutil.copy(file, destination / file.name)
    except shutil.SameFileError:
      pass
  print(f'Coped all {extension!r} files to {destination}')


def get_args():
  """Get arguments from command line."""
  parser = argparse.ArgumentParser(
      description='Copy files of a specific extension to a new folder'
  )
  parser.add_argument('extension',
                      help='extensions to search for')
  parser.add_argument('-s',
                      '--src',
                      help='Source directory to copy from',
                      default='.')
  parser.add_argument('-d',
                      '--dest',
                      help='Destination directory to copy to')
  return parser.parse_args()

  
if __name__ == '__main__':
  main()


Overwriting copy_files_with_extensions.py


In [None]:
!chmod +x copy_files_with_extensions.py

In [None]:
!./copy_files_with_extensions.py .txt -d text_folder

Coped all '.txt' files to text_folder


In [None]:
!ls text_folder

bonjour.txt  file_copy.txt  file.txt  hello.txt  hola.txt


In [None]:
!./copy_files_with_extensions.py .jpg -d jpg_folder

Coped all '.jpg' files to jpg_folder


In [None]:
!ls jpg_folder

screenshot01.jpg  screenshot04.jpg  screenshot07.jpg  screenshot10.jpg
screenshot02.jpg  screenshot05.jpg  screenshot08.jpg
screenshot03.jpg  screenshot06.jpg  screenshot09.jpg


## Deleting Unneeded Files

In [None]:
def find_big_files(min_size_in_mb):
  """Prints all files in system that are larger than the given size."""
  min_size_in_b = min_size_in_mb * 1024 * 1024
  for folder, _, files in os.walk('/'):
    for file in files:
      file = Path(folder) / file
      try:
        size = os.path.getsize(file)
        if  size > min_size_in_b:
          print(f'{size/1024/1024:.1f} MB: {str(file):<15}')
      except FileNotFoundError:
        pass

In [None]:
find_big_files(750)

1371.9 MB: /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.0.5
2316.0 MB: /usr/lib/x86_64-linux-gnu/libcudnn_static_v8.a
1371.9 MB: /usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8
2501.4 MB: /usr/local/lib/python3.7/dist-packages/torch/lib/libtorch_cuda_cpp.so
1238.0 MB: /usr/local/lib/python3.7/dist-packages/torch/lib/libtorch_cuda_cu.so
954.3 MB: /usr/local/lib/python3.7/dist-packages/tensorflow/python/_pywrap_tensorflow_internal.so
1246.9 MB: /usr/local/lib/python2.7/dist-packages/torch/lib/libtorch.so
1013.1 MB: /usr/local/lib/python2.7/dist-packages/tensorflow_core/python/_pywrap_tensorflow_internal.so
955.0 MB: /tensorflow-1.15.2/python3.7/tensorflow_core/python/_pywrap_tensorflow_internal.so
955.0 MB: /tensorflow-1.15.2/python2.7/tensorflow_core/python/_pywrap_tensorflow_internal.so


## Filling in the Gaps

In [None]:
!mkdir saves
!touch saves/save{001..054}.txt
!touch saves/save{065..099}.txt
!touch saves/save{256..344}.txt

In [None]:
def fill_gaps(directory, prefix):
  """Fill in the gaps on numbered files with the given prefix.
  
  Example:
    prefix01.txt prefix05.txt prefix07.txt
    ->
    prefix01.txt prefix02.txt prefix03.txt
  """
  directory = Path(directory)
  files = []
  pattern = re.compile(rf'(.*{prefix})(\d+)(\..*)')
  for file in directory.iterdir():
    if pattern.match(str(file)):
      files.append(file)
  files.sort(key=lambda x: int(pattern.findall(str(x))[0][1]))
  for i, file in enumerate(files, start=1):
    new_name = pattern.sub(fr'\g<1>{str(i).rjust(3, "0")}\g<3>', str(file))
    shutil.move(file, new_name)

In [None]:
!ls saves

save001.txt  save031.txt  save071.txt  save257.txt  save287.txt  save317.txt
save002.txt  save032.txt  save072.txt  save258.txt  save288.txt  save318.txt
save003.txt  save033.txt  save073.txt  save259.txt  save289.txt  save319.txt
save004.txt  save034.txt  save074.txt  save260.txt  save290.txt  save320.txt
save005.txt  save035.txt  save075.txt  save261.txt  save291.txt  save321.txt
save006.txt  save036.txt  save076.txt  save262.txt  save292.txt  save322.txt
save007.txt  save037.txt  save077.txt  save263.txt  save293.txt  save323.txt
save008.txt  save038.txt  save078.txt  save264.txt  save294.txt  save324.txt
save009.txt  save039.txt  save079.txt  save265.txt  save295.txt  save325.txt
save010.txt  save040.txt  save080.txt  save266.txt  save296.txt  save326.txt
save011.txt  save041.txt  save081.txt  save267.txt  save297.txt  save327.txt
save012.txt  save042.txt  save082.txt  save268.txt  save298.txt  save328.txt
save013.txt  save043.txt  save083.txt  save269.txt  save299.txt  save329.txt

In [None]:
fill_gaps('saves', 'save')

In [None]:
!ls saves

save001.txt  save031.txt  save061.txt  save091.txt  save121.txt  save151.txt
save002.txt  save032.txt  save062.txt  save092.txt  save122.txt  save152.txt
save003.txt  save033.txt  save063.txt  save093.txt  save123.txt  save153.txt
save004.txt  save034.txt  save064.txt  save094.txt  save124.txt  save154.txt
save005.txt  save035.txt  save065.txt  save095.txt  save125.txt  save155.txt
save006.txt  save036.txt  save066.txt  save096.txt  save126.txt  save156.txt
save007.txt  save037.txt  save067.txt  save097.txt  save127.txt  save157.txt
save008.txt  save038.txt  save068.txt  save098.txt  save128.txt  save158.txt
save009.txt  save039.txt  save069.txt  save099.txt  save129.txt  save159.txt
save010.txt  save040.txt  save070.txt  save100.txt  save130.txt  save160.txt
save011.txt  save041.txt  save071.txt  save101.txt  save131.txt  save161.txt
save012.txt  save042.txt  save072.txt  save102.txt  save132.txt  save162.txt
save013.txt  save043.txt  save073.txt  save103.txt  save133.txt  save163.txt