# Chapter 9: READING AND WRITING FILES

## Files and File Paths

In [42]:
import os
from pathlib import Path

**Path** object works with any operating system

In [2]:
path = Path("Disk", "Folder", "File")
path

WindowsPath('Disk/Folder/File')

#### Joining Paths

In [12]:
Path(path, "new_file")

WindowsPath('Disk/Folder/File/new_file')

In [15]:
Path('disk') / Path('some_folder', 'My_Files')

WindowsPath('disk/some_folder/My_Files')

In [16]:
Path('folder') / Path('dir1/files')

WindowsPath('folder/dir1/files')

In [18]:
Path('Mail') / 'Inbox' / 'Spam'

WindowsPath('Mail/Inbox/Spam')

#### Drive letter in Path

In [23]:
Path("/mnt/d", "PATH", "folder")  # Linux
Path("D:/", "PATH", "folder")  # Windows

WindowsPath('D:/PATH/folder')

#### Current Working Directory

In [24]:
Path.cwd()

WindowsPath('D:/GitHub Repos/coding/automate-with-python')

In [25]:
os.getcwd()

'D:\\GitHub Repos\\coding\\automate-with-python'

#### Change Current Working Directory

In [34]:
os.chdir("D:/Software")

Path.cwd()

WindowsPath('D:/Software')

#### The Home Directory

In [35]:
Path.home()

WindowsPath('C:/Users/javoh')

#### Create New Folders (Directories)

In [11]:
os.makedirs(Path("D:/", "FOLDER1", 'sub_folder_1', 'again_folder!'))

In [9]:
os.makedirs("i/can/make/multiple/folders/at/once!!!")

#### Create Only One Directory

In [14]:
Path("D:/", "temp_folder").mkdir()

In [8]:
os.mkdir("my_folder_1")

#### Check Whether a Given Path is an Absolute Path

In [77]:
Path("D:/GitHub Repos/coding/").is_absolute()

True

In [78]:
Path("/GitHub Repos/coding/").is_absolute()

False

In [80]:
os.path.isabs("F:/Hello/Path")

True

In [82]:
os.path.isabs("directory/some_folder")

False

#### String of the Absolute Path of the Argument - convert a relative path into an absolute

In [85]:
path

WindowsPath('Disk/Folder/File')

In [84]:
os.path.abspath(path)

'D:\\GitHub Repos\\coding\\automate-with-python\\Disk\\Folder\\File'

#### String of a Relative Path from the `start` path to `path`
*`os.path.relpath(`**`start, path`**`)`*

In [88]:
os.path.relpath('D:/path/dir', 'D:/')

'path\\dir'

#### Getting the Parts of a File Path

In [94]:
p = Path("D:/Folders/Pictures/nature.png")

In [95]:
p.anchor

'D:\\'

In [96]:
p.drive

'D:'

In [97]:
p.parts

('D:\\', 'Folders', 'Pictures', 'nature.png')

In [98]:
p.parent

WindowsPath('D:/Folders/Pictures')

In [107]:
p.parents[0]

WindowsPath('D:/Folders/Pictures')

In [108]:
p.parents[2]

WindowsPath('D:/')

In [109]:
p.root

'\\'

In [110]:
p.name

'nature.png'

In [111]:
p.stem

'nature'

In [112]:
p.suffix

'.png'

In [113]:
p.suffixes

['.png']

#### Finding File Sizes and Folder Contents

**`os.path.getsize`** will return the size in *bytes* of the file

In [115]:
os.path.getsize("D:/Software/NewFolderWizard.exe")

168448

**`os.listdir`** will return a list of filename strings

In [119]:
os.listdir("D:/GitHub Repos/coding/ARCHIVE")

['antivirus-update.py',
 'pi-million-digits-no-space.txt',
 'pi-million-digits.txt',
 'regex-corey-schafer.py',
 'space-correction',
 'wordlist-10000-mit.txt']

In [123]:
total_size = 0
folder_path = "D:/GitHub Repos/coding/ARCHIVE"
for filename in os.listdir(folder_path):
    total_size += os.path.getsize(folder_path / Path(filename))

print(f"{total_size} bytes = {total_size/1024:.2f} KB")

2108634 bytes = 2059.21 KB


#### Modifying a List of Files Using Glob Patterns

In [25]:
p = Path("D:/GitHub Repos/automate-the-boring-stuff")
p.glob('*')

<generator object Path.glob at 0x0000020757F55310>

In [52]:
list(p.glob('*'))

[WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/.git'),
 WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/.gitignore'),
 WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/.ipynb_checkpoints'),
 WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/auto-with-py.py'),
 WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/chapter-6-manipulating-strings.ipynb'),
 WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/chapter-9-reading-writing-files.ipynb'),
 WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/README.md')]

**The asterisk (\*) stands for “multiple of any characters,” so p.glob('\*') returns a generator of all files in the path stored in p.**

In [50]:
list(Path('./').glob('*.py'))

[WindowsPath('auto-with-py.py')]

In [53]:
list(p.glob('chapter*.ipynb'))

[WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/chapter-6-manipulating-strings.ipynb'),
 WindowsPath('D:/GitHub Repos/automate-the-boring-stuff/chapter-9-reading-writing-files.ipynb')]

**The question mark (?) stands for any single character**

In [51]:
list(Path("D:/GitHub Repos").glob("project?.docx"))

[WindowsPath('D:/GitHub Repos/project1.docx'),
 WindowsPath('D:/GitHub Repos/project2.docx'),
 WindowsPath('D:/GitHub Repos/project3.docx'),
 WindowsPath('D:/GitHub Repos/project4.docx')]

In [66]:
list(Path("D:/GitHub Repos").glob('*.?x?'))

[WindowsPath('D:/GitHub Repos/calculator.exe'),
 WindowsPath('D:/GitHub Repos/dummy_file.txt'),
 WindowsPath('D:/GitHub Repos/notes.txt')]

In [72]:
p = Path("D:/GitHub Repos")
for file_path_obj in p.glob("*.txt"):
    print(file_path_obj)
    # You can do something with the file.

D:\GitHub Repos\dummy_file.txt
D:\GitHub Repos\notes.txt


#### Checking Path Validity

- **`path_obj.exists()`** returns `True` if the path exists or returns `False` if it doesn't exist.

In [102]:
exist_dir = Path("D:/GitHub Repos")
not_exist_dir = Path("D:/This/Folder/Does/Not/Exist")
py_file = Path("D:/GitHub Repos/coding/app.py")

In [103]:
exist_dir.exists()

True

In [104]:
not_exist_dir.exists()

False

In [105]:
py_file.exists()

True

- **`path_obj.is_dir()`** returns `True` if the path exists and is a directory, or returns `False` otherwise.

In [106]:
exist_dir.is_dir()

True

In [107]:
not_exist_dir.is_dir()

False

In [108]:
py_file.is_dir()

False

- **`path_obj.is_file()`** returns `True` if the path exists and is a file, or returns `False` otherwise.

In [110]:
exist_dir.is_file()

False

In [113]:
not_exist_dir.is_file()

False

In [114]:
py_file.is_file()

True

In [117]:
print(Path("D:/").exists())
print(Path("F:/").exists())

True
False


#### The older os.path module can accomplish the same tasks

- **`os.path.exists(`*`path`*`)`**

- **`os.path.isfile(`*`path`*`)`**

- **`os.path.isdir(`*`path`*`)`**

## The File Reading/Writing Process

In [45]:
p = Path('spam.txt')
p.write_text('Hello, world!')

13

In [47]:
!wsl ls

README.md
chapter-12-web-scraping.ipynb
chapter-6-manipulating-strings.ipynb
chapter-9-reading-writing-files.ipynb
spam.txt


In [48]:
p.read_text()

'Hello, world!'