# File system interaction

In [1]:
from datetime import datetime
import fileinput
import os
import pathlib
from pathlib import Path
import re
import shutil
import stat
import tempfile

## Contents of directories & properties

Iterate over the contents of the current working directory, and for each file, print its name, size, and last access time.

In [2]:
for item in Path.cwd().iterdir():
    if item.is_file() and item.name.endswith('.py'):
        lstat = item.lstat()
        access_time = datetime.strftime(datetime.fromtimestamp(lstat.st_atime),
                                        '%Y-%m-%d %H:%M:%S')
        print(f'{item.name}: {lstat.st_size} bytes, last accessed: {access_time}')

subprocess_environment.py: 858 bytes, last accessed: 2018-11-12 13:25:04
sys_info.py: 2439 bytes, last accessed: 2018-11-12 13:32:16


## File permissions

Create a new directory, don't throw an error if it exists. Note that the mode must be specified in octal.

In [3]:
tmp_dir = Path.cwd() / 'tmp'
tmp_dir.mkdir(mode=0o700, exist_ok=True)

More specifically, testing whether the current user can access a file is also quite useful.

In [4]:
if pathlib.os.access(tmp_dir, pathlib.os.W_OK):
    print(f'current user can write in {tmp_dir.name}')

current user can write in tmp


Set the access mode a bit more liberal and check the permissions.

In [5]:
tmp_dir.chmod(0o755)
print(oct(tmp_dir.lstat().st_mode))

0o40755


Detailed tests on access permissions can be written using the bitmasks defined in the `stat` module.

In [6]:
mode = tmp_dir.lstat().st_mode

In [7]:
if mode & stat.S_IREAD and mode & stat.S_IWRITE and mode & stat.S_IEXEC:
    print(f'owner has full permissions on {tmp_dir.name}')

owner has full permissions on tmp


In [8]:
if not (mode & stat.S_IWGRP):
    print(f'group can not write in {tmp_dir.name}')

group can not write in tmp


## Creating files & globbing

Creating some file in the `tmp` directory can be done by creating a `Path` object for each file, and opening it directly.

In [9]:
readme = tmp_dir / 'README.md'
with readme.open('w') as readme_file:
    print('# Contents', file=readme_file, end='\n\n')
    nr_files = 5
    for i in range(1, nr_files + 1):
        file = tmp_dir / f'data_{i:03d}.txt'
        with file.open('w') as data_file:
            print(f'data {i}', file=data_file)
        print(f'  * `{file.name}`: data file {i}', file=readme_file)

In [10]:
!cat tmp/README.md

# Contents

  * `data_001.txt`: data file 1
  * `data_002.txt`: data file 2
  * `data_003.txt`: data file 3
  * `data_004.txt`: data file 4
  * `data_005.txt`: data file 5


In [11]:
for file in tmp_dir.glob('*.txt'):
    print(f'{file.name}: ', end='')
    with file.open('r') as data_file:
        data = data_file.readline().rstrip()
        print(data)

data_003.txt: data 3
data_004.txt: data 4
data_001.txt: data 1
data_002.txt: data 2
data_005.txt: data 5


## Dissecting paths

Splitting a path into its various functional parts is of course OS dependent, pathlib lets you do this in a platform independent way.

### Files

In [12]:
readme.name

'README.md'

In [13]:
readme.suffix

'.md'

In [14]:
readme.parent

PosixPath('/home/gjb/Documents/Projects/training-material/Python/SystemsProgramming/tmp')

In [15]:
readme.drive

''

### Directories

In [16]:
tmp_dir.name

'tmp'

In [17]:
tmp_dir.suffix

''

In [18]:
tmp_dir.parent

PosixPath('/home/gjb/Documents/Projects/training-material/Python/SystemsProgramming')

## Copying, moving, deleting

Some operations can be done using `pathlib` such as moving or deleting files.

In [19]:
!ls tmp

data_001.txt  data_002.txt  data_003.txt  data_004.txt	data_005.txt  README.md


In [20]:
readme.unlink()

In [21]:
!ls tmp

data_001.txt  data_002.txt  data_003.txt  data_004.txt	data_005.txt


Renaming files is straightforward.

In [22]:
for file in tmp_dir.glob('*.txt'):
    new_file = file.parent / file.name.replace('.txt', '.dat')
    file.rename(new_file)

In [23]:
!ls tmp

data_001.dat  data_002.dat  data_003.dat  data_004.dat	data_005.dat


`pathlib` has no facilities for copying files, however `shutil` does.

In [24]:
for file in tmp_dir.glob('*.dat'):
    new_file = file.parent / file.name.replace('.dat', '.bak')
    shutil.copy(file, new_file)

In [25]:
!ls tmp

data_001.bak  data_002.bak  data_003.bak  data_004.bak	data_005.bak
data_001.dat  data_002.dat  data_003.dat  data_004.dat	data_005.dat


Move the `.bak` files into a subdirectory `bak`.

In [26]:
bak_dir = tmp_dir / 'bak'
bak_dir.mkdir()
for file in tmp_dir.glob('*.bak'):
    file.rename(bak_dir / file.name)

Print the sizes of all the files in `tmp`, and also compute the total size.

In [27]:
total_size = 0
for dir_name, _, files in os.walk(tmp_dir, topdown=False):
    dir_size = 0
    print(f'{dir_name}:')
    for file in map(lambda f: Path(dir_name) / f, files):
        file_size = file.lstat().st_size
        dir_size += file_size
        print(f'\t{file.name}: {file_size} bytes')
    print(f'\tTotal: {dir_size} bytes')
    total_size += dir_size
print(f'Total: {total_size} bytes')

/home/gjb/Documents/Projects/training-material/Python/SystemsProgramming/tmp/bak:
	data_002.bak: 7 bytes
	data_005.bak: 7 bytes
	data_001.bak: 7 bytes
	data_003.bak: 7 bytes
	data_004.bak: 7 bytes
	Total: 35 bytes
/home/gjb/Documents/Projects/training-material/Python/SystemsProgramming/tmp:
	data_002.dat: 7 bytes
	data_004.dat: 7 bytes
	data_005.dat: 7 bytes
	data_003.dat: 7 bytes
	data_001.dat: 7 bytes
	Total: 35 bytes
Total: 70 bytes


`Path`'s `rmdir` method can be used to remove an empty directory, however, if the directory is not empty, the `rmtree` function in `shutil` can be used.

In [28]:
shutil.rmtree(tmp_dir)

## Temporary files and directories, `inputfile`

Especially when running application concurrently, it is important to ensure that names for temporary files are unique.  The module `tempfile` implements this. The temporary directory is created in `/tmp` and will be deleted with all its contents as soon as the end of the context is reached.  Files are created in that directory with random names, but will not be deleted. The default mode for temporary files is `w+b`, in this case we choose only `w` since the files are permanent, and we open them for reading later.

In [6]:
nr_files = 3
nr_lines = 4
with tempfile.TemporaryDirectory(prefix='data_', dir='/tmp') as tmp_dir:
    print(f'director {tmp_dir} exists: {Path(tmp_dir).exists()}')
    # write the data files, use random names
    for file_nr in range(nr_files):
        with tempfile.NamedTemporaryFile(mode='w', prefix='data_', suffix='.txt',
                                         dir=tmp_dir, delete=False) as tmp_file:
            for i in range(nr_lines):
                print(f'line {i + 1} of {file_nr + 1}', file=tmp_file)
    # for each data file, print the name, and, indented, all the lines
    for tmp_file in Path(tmp_dir).iterdir():
        print(tmp_file.name)
        with tmp_file.open('r') as file:
            for line in file:
                print(f'\t{line.rstrip()}')
    # treat all the data files as one input, and compute the sum of all the numbers in the file
    total = 0
    for line in fileinput.FileInput(Path(tmp_dir).glob('*.txt')):
        total += sum(map(int, re.findall(r'\d+', line)))
    print(f'total = {total}')
print(f'director {tmp_dir} exists: {Path(tmp_dir).exists()}')

director /tmp/data_dm6_6mf1 exists: True
data_xf0siwln.txt
	line 1 of 1
	line 2 of 1
	line 3 of 1
	line 4 of 1
data_rrsq3__a.txt
	line 1 of 2
	line 2 of 2
	line 3 of 2
	line 4 of 2
data_czdumvnm.txt
	line 1 of 3
	line 2 of 3
	line 3 of 3
	line 4 of 3
total = 54
director /tmp/data_dm6_6mf1 exists: False


## Miscellaneous

Get the current working directory.

In [29]:
Path.cwd()

PosixPath('/home/gjb/Documents/Projects/training-material/Python/SystemsProgramming')

Get the current user's home directory.

In [30]:
Path.home()

PosixPath('/home/gjb')

Get the system's temporary directory.

In [2]:
tempfile.gettempdir()

'/tmp'