# Introduction to Pathlib

See https://realpython.com/python-pathlib for a useful tutorial.

In [1]:
import datetime as dt
import os
from pathlib import Path
import shutil
import tempfile


# `os` module used to be the way to do it
current_dir = os.getcwd()
print(f"Current directory: {current_dir}")

new_dir = os.path.join(os.getcwd(), 'my_new_dir')
print(f"Joining a path: {new_dir}")

# note that paths are strings
print(type(new_dir))

Current directory: c:\Users\jostev\github\python-improvers-2\lessons
Joining a path: c:\Users\jostev\github\python-improvers-2\lessons\my_new_dir
<class 'str'>


In [2]:
## Pathlib uses Path objects
current_dir = Path.cwd()

print(current_dir)
print(type(current_dir))
print(f"is_file(): {current_dir.is_file()}")
print(f"exists(): {current_dir.exists()}")
print(f"parent: {current_dir.parent}")
    

c:\Users\jostev\github\python-improvers-2\lessons
<class 'pathlib.WindowsPath'>
is_file(): False
exists(): True
parent: c:\Users\jostev\github\python-improvers-2


In a script, you can use the __file__ variable to get current directory,
but this doesn't work in notebooks.

```python
SCRIPT_DIR = Path(__file__).parent
print(f"script_dir: {SCRIPT_DIR}")
```

In [3]:
# Pathlib can create directories and files.
working_dir = current_dir.parent / "work"
working_dir.mkdir(exist_ok=True)

hello_file = working_dir / "hello.txt"
print(f"file exists: {hello_file.exists()}")

# write_text writes text.  Compare this to other file writing methods.
hello_file.write_text("Hello World!")

print(f"file contents: {hello_file.read_text()}")
print(f"file exists: {hello_file.exists()}")
print(f"directory contents: {list(working_dir.iterdir())}")

file exists: True
file contents: Hello World!
file exists: True
directory contents: [WindowsPath('c:/Users/jostev/github/python-improvers-2/work/hello.txt')]


In [4]:
# Pathlib can retrieve information about files
print(f"name: {hello_file.name}")
print(f"suffix: {hello_file.suffix}")
print(f"stem: {hello_file.stem}")
print(f"absolute path: {hello_file.absolute()}")
print(f"parent: {hello_file.parent}")

print(type(hello_file.stat()))
print(f"modified: {dt.datetime.fromtimestamp(hello_file.stat().st_mtime)}")
print(f"size: {hello_file.stat().st_size} bytes")

name: hello.txt
suffix: .txt
stem: hello
absolute path: c:\Users\jostev\github\python-improvers-2\work\hello.txt
parent: c:\Users\jostev\github\python-improvers-2\work
<class 'os.stat_result'>
modified: 2025-01-20 23:44:40.725101
size: 12 bytes


In [5]:
# Use glob and rglob to list files
for file_ in current_dir.glob("*.ipynb"):
    print(file_.name)


classes.ipynb
exceptions.ipynb
pathlib.ipynb
text_parse.ipynb
time_series_data.ipynb


### Exercise

+ Write a script that will print the size of the largest `.txt` file in the data directory.

In [6]:
# Use rename to rename. Note that original Path location is unchanged.
new_hello_file = hello_file.parent / 'hello_again.txt'
hello_file.rename(new_hello_file)
print(f"name: {hello_file.absolute()}")
print(f"exists: {hello_file.exists()}")
print(f"new name: {new_hello_file.absolute()}")
print(f"exists: {new_hello_file.exists()}")

name: c:\Users\jostev\github\python-improvers-2\work\hello.txt
exists: False
new name: c:\Users\jostev\github\python-improvers-2\work\hello_again.txt
exists: True


In [7]:
# Use unlink to delete a file or directory
new_hello_file.unlink(missing_ok=True)

In [8]:
# tempfile can create temporary directories (it is normally used as context manager)
temp_dir = tempfile.TemporaryDirectory(delete=False)
temp_dir = Path(temp_dir.name)
print(f"temp dir name: {temp_dir}")
print(f"exists: {temp_dir.exists()}")

# Use shutil for copying files (copy2 includes metadata), it returns the new name.
hello_file.write_text("Here is a new file.")
hello_file_copy = Path(shutil.copy2(hello_file, temp_dir))
print(f"new name: {hello_file_copy}")

# Directories can also be copied
current_dir_copy = Path(shutil.copytree(current_dir, temp_dir, dirs_exist_ok=True))
print(f"new files: {list(current_dir_copy.iterdir())}")


temp dir name: C:\Users\jostev\AppData\Local\Temp\tmp3705zdim
exists: True
new name: C:\Users\jostev\AppData\Local\Temp\tmp3705zdim\hello.txt
new files: [WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/classes.ipynb'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/exceptions.ipynb'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/functions.md'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/hello.txt'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/pathlib.ipynb'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/pythonic_code.md'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/style_and_linting.md'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/text_parse.ipynb'), WindowsPath('C:/Users/jostev/AppData/Local/Temp/tmp3705zdim/time_series_data.ipynb')]


In [9]:
# Use shutil rmtree for bulk delete
shutil.rmtree(temp_dir)

### Exercise

Write a script that will:

+ create a folder called "data_backup"
+ copy all the .txt files from the data directory across, renaming them to `.txt.backup`