# File IO

In [69]:
import glob
import os
import shutil

PATH = "my_directory/"

## List the contents of a directory

In [70]:
with os.scandir(PATH) as entries:
    for e in entries:
        print(e.name)

sub_dir
file3.txt
sub_dir_b
sub_dir_c
file2.csv
file1.py


## Filter files & directories

In [71]:
with os.scandir(PATH) as entries:
    for e in entries:
        if e.is_file():
            print(e.name)

file3.txt
file2.csv
file1.py


In [72]:
with os.scandir(PATH) as entries:
    for e in entries:
        if e.is_dir():
            print(e.name)

sub_dir
sub_dir_b
sub_dir_c


## Making files & directories

In [73]:
PATH = "2024/Jan/23"

In [74]:
os.makedirs(PATH)

# Won't throw an error.
os.makedirs(PATH, exist_ok=True)

# ! Will throw an error.
# os.makedirs("2024/Jan/23")

In [75]:
with open(f"{PATH}/file.txt", "w") as writer:
    pass

## Deleting files and directories

In [76]:
# Delete a file
os.remove(f"{PATH}/file.txt")

In [77]:
# Delete a directory
shutil.rmtree("2024/")

## Filename pattern matching
Use `glob` whenever matching patterns.

In [78]:
# returns a list
for file in glob.glob(PATH + "/*.py"):
    print(file)

In [79]:
# returns an iterator
for file in glob.iglob(PATH + "/*.py"):
    print(file)

### Recursive pattern matching

In [80]:
for file in glob.iglob(PATH + "/**/*.py", recursive=True):
    print(file)

## Traversing directories

In [81]:
PATH = "walk_vs_glob"

In [82]:
# os.walk() returns three values on each iteration of the loop.
for dirpath, dirnames, files in os.walk(PATH):
    print(f"\nDirectory: {dirpath}")
    for file_name in files:
        print(file_name)


Directory: walk_vs_glob
file_1.txt
file_2.txt

Directory: walk_vs_glob/dir_2
file_4.txt
file_3.txt

Directory: walk_vs_glob/dir_1
file_5.txt

Directory: walk_vs_glob/dir_1/dir_3


### Why not use glob for it?
It does not give you control over individual subdirectories.

In [83]:
for entry in glob.iglob(PATH + "/**/*", recursive=True):
    print(entry)

walk_vs_glob/file_1.txt
walk_vs_glob/file_2.txt
walk_vs_glob/dir_2
walk_vs_glob/dir_1
walk_vs_glob/dir_2/file_4.txt
walk_vs_glob/dir_2/file_3.txt
walk_vs_glob/dir_1/file_5.txt
walk_vs_glob/dir_1/dir_3


## Copying, moving, and renaming files & directories

In [84]:
PATH = "copy_move_rename"

In [85]:
### Setup cell
os.makedirs(f"{PATH}/dir_1", exist_ok=True)
os.makedirs(f"{PATH}/dir_2", exist_ok=True)
os.makedirs(f"{PATH}/dir_3", exist_ok=True)

with open(f"{PATH}/dir_1/file.txt", "w") as file:
    pass

with open(f"{PATH}/dir_3/file.txt", "w") as file:
    pass

### Copy

In [86]:
src = f"{PATH}/dir_1/file.txt"
dst = f"{PATH}/dir_2/copy.txt"
dst2 = f"{PATH}/dir_2/copy2.txt"

# Filename will be copied if not supplied.
# dst = f"{PATH}/dir_2/"

shutil.copy(src, dst)
shutil.copy2(src, dst2) # copy2 will have the same timestamp as file_1.txt

'copy_move_rename/dir_2/copy2.txt'

In [87]:
src = f"{PATH}/dir_1/"
dst = f"{PATH}/copy"

# dirs_exist_ok (same as exist_ok in os.makedirs)
shutil.copytree(src, dst, dirs_exist_ok=True)

'copy_move_rename/copy'

### Move

In [88]:
### Setup cell
try:
    shutil.rmtree(f"{PATH}/moved/dir_1")
except:
    pass

In [89]:
src = f"{PATH}/dir_1/"
dst = f"{PATH}/moved/"

# If dst does not exist, then "moved" directory is saved at PATH.
# If dst exists, "moved" directory is saved inside the existing "moved" directory.
# If "moved/moved" also exists, then it'll throw an error.

# dirs_exist_ok (same as exist_ok in os.makedirs)
shutil.move(src, dst)

'copy_move_rename/moved/dir_1'

### Rename

In [90]:
current_name = f"{PATH}/dir_3/file.txt"
new_name = f"{PATH}/dir_3/renamed.txt"

os.rename(current_name, new_name)

## Archive

In [96]:
shutil.make_archive(
    format="zip",
    base_name="zip_file_1",
    root_dir=".",
    base_dir="walk_vs_glob/dir_2"
)

'/Users/armagaan/Documents/code/rough/zip_file_1.zip'

In [97]:
shutil.make_archive(
    format="zip",
    base_name="zip_file_2",
    root_dir="walk_vs_glob",
    base_dir="dir_2"
)

'/Users/armagaan/Documents/code/rough/zip_file_2.zip'

In [98]:
shutil.unpack_archive(
    filename="zip_file_1.zip",
    extract_dir="unzipped_1",
    format="zip"
)

In [99]:
shutil.unpack_archive(
    filename="zip_file_2.zip",
    extract_dir="unzipped_2",
    format="zip"
)