Reading and Writing Text Data

In [1]:
with open('somefile.bin','wb') as f:
    f.write(b'Hello World')

In [3]:
with open('somefile.bin','rb') as f:
    data = f.read(16)
    text = data.decode('utf8')

In [4]:
text

'Hello World'

In [8]:
#You want to write data to a file, but only if it doesnt already exist on the filesystem.

with open('somefile.txt', 'wt') as f:
    f.write('Hello\n')

In [11]:
#to check the file if exists and then modify it accordingly

import os
if not os.path.exists('somefile'):
    with open('somefile', 'wt') as f:
        f.write('Hello\n')
else:
    print('file already exists')



#Cleary using the x file mode is a lot more straightforward.

file already exists


Performing I/O Operations on a String

In [14]:
#Use the io.StringIO() and io.BytesIO() classes to create file-like objects that operate on string data

import io

s = io.StringIO()
s.write('Hello')

5

In [15]:
s.getvalue()

'Hello'

Reading and Writing Compressed Datafiles

In [19]:
# gzip compression
import gzip
with gzip.open('somefile.gz', 'rt') as f:
    text = f.read()
# bz2 compression
import bz2
with bz2.open('somefile.bz2', 'rt') as f:
    text = f.read()

FileNotFoundError: [Errno 2] No such file or directory: 'somefile.bz2'

Iterating Over Fixed Sized Records

In [20]:
#Instead of iterating over a file by lines, you want to iterate over a #collection of fixed-
#sized records or chunks.

In [22]:
from functools import partial

RECORD_SIZE = 32

with open('somefile.data', 'rb') as f:
    records = iter(partial(f.read, RECORD_SIZE), b'')
    for r in records:
        f.read(r)

FileNotFoundError: [Errno 2] No such file or directory: 'somefile.data'

In [26]:
[i for i in iter(sys.path)]

['/home/susearc/Documents/github/Python_CookBook_Self',
 '/home/susearc/miniconda3/envs/my-pypy/lib/pypy3.9',
 '',
 '/home/susearc/miniconda3/envs/my-pypy/lib/pypy3.9/site-packages']

Reading Binary Data into a Mutable Buffer

In [29]:
#To read data into a mutable array, use the readint() method of files


import os.path

def read_into_buffer(filename):
    buf = bytearray(os.path.getsize(filename))
    with open(filename, 'rb') as f:
        f.readinto(buf)
    return buf

In [30]:
with open('sample.bin', 'wb') as f:
    f.write(b'Hello world')

buf = read_into_buffer('sample.bin')
buf

bytearray(b'Hello world')

In [31]:
RECORD_SIZE = 32

buf = bytearray(RECORD_SIZE)
with open('somefile', 'rb') as f:
    while True:
        n = f.readinto(buf)
        if n < RECORD_SIZE:
            break

In [32]:
buf

bytearray(b'Hello\n\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')

In [34]:
m1 = memoryview(buf)
m2 = m1[-5:]
m2

<memory at 0x00005622325fdd30>

In [35]:
#Manipulating Pathnames

import os
path = '/Users/beazley/Data/data.csv'


os.path.basename(path)

'data.csv'

In [36]:
os.path.dirname(path)

'/Users/beazley/Data'

In [38]:
os.path.join('tmp','data', os.path.basename(path))

'tmp/data/data.csv'

In [39]:
#Expand the user's home directory

path = '~/Data/data.csv'
os.path.expanduser(path)

'/home/susearc/Data/data.csv'

In [40]:
os.path.splitext(path)

('~/Data/data', '.csv')

Testing for the Existence of a File

In [42]:
import os
os.path.exists('/etc/spam')

False

In [43]:
#You can perform further tests to see what kind of file it might be.Theses tests return False if the file in question doesnt exist:

os.path.realpath('/usr/local/bin/python3')

'/usr/local/bin/python3'

In [44]:
#If you need to get metadata(file size or modification date) that is also available in the os.path module

os.path.getsize('/etc/passwd')

2633

In [45]:
os.path.getmtime('/etc/passwd')

1672809415.9167676

In [47]:
import time
time.ctime(os.path.getmtime('/etc/passwd'))


#displays the modification time

'Wed Jan  4 10:46:55 2023'

Getting a Directory Listing

In [51]:
#Use os.listdir() function to obtaina list of files in a directory:

import os
names = os.listdir()

In [53]:
names

['.git',
 'README.md',
 'chap1.ipynb',
 '.vscode',
 '.gitignore',
 'chap2.ipynb',
 'textfile.txt',
 'jalapeño.txt',
 'chap3.ipynb',
 'chap4.ipynb',
 'somefile.txt',
 'myfile.txt',
 'chap5.ipynb',
 'somefile.bin',
 'somefile',
 'somefile.gz',
 'sample.bin']

In [58]:
#For filename matching one may want to use the glob or fnmatch modules instead


import glob
pyfiles = glob.glob('*.ipynb')

In [59]:
pyfiles

['chap1.ipynb', 'chap2.ipynb', 'chap3.ipynb', 'chap4.ipynb', 'chap5.ipynb']

In [60]:
from fnmatch import fnmatch
pyfiles = [name for name in os.listdir()]

In [61]:
pyfiles

['.git',
 'README.md',
 'chap1.ipynb',
 '.vscode',
 '.gitignore',
 'chap2.ipynb',
 'textfile.txt',
 'jalapeño.txt',
 'chap3.ipynb',
 'chap4.ipynb',
 'somefile.txt',
 'myfile.txt',
 'chap5.ipynb',
 'somefile.bin',
 'somefile',
 'somefile.gz',
 'sample.bin']

In [64]:
#Example of getting a directory listing

import os
import os.path
import glob

pyfiles = glob.glob('*.ipynb')


#Get file sizes and modification dates

name_sz_dat = [(name, os.path.getsize(name), os.path.getmtime(name)) for name in pyfiles]

In [65]:
name_sz_dat

[('chap1.ipynb', 51370, 1673028623.339419),
 ('chap2.ipynb', 63486, 1674824153.328792),
 ('chap3.ipynb', 52422, 1674913676.89818),
 ('chap4.ipynb', 39019, 1676285308.706607),
 ('chap5.ipynb', 20676, 1676294372.4970877)]

In [66]:
for name, size, mtime in name_sz_dat:
    print(name, size, mtime)

chap1.ipynb 51370 1673028623.339419
chap2.ipynb 63486 1674824153.328792
chap3.ipynb 52422 1674913676.89818
chap4.ipynb 39019 1676285308.706607
chap5.ipynb 20676 1676294372.4970877


In [67]:
#alternative Get file metadata

file_metadata = [(name, os.stat(name)) for name in pyfiles]
for name, meta in file_metadata:
    print(name, meta.st_size, meta.st_mtime)

chap1.ipynb 51370 1673028623.339419
chap2.ipynb 63486 1674824153.328792
chap3.ipynb 52422 1674913676.89818
chap4.ipynb 39019 1676285308.706607
chap5.ipynb 21676 1676294482.8545504


Bypassing Filename Encoding



In [68]:
import sys

sys.getfilesystemencoding()

'utf-8'

In [69]:
import os
file = os.listdir('.')

In [70]:
file

['.git',
 'README.md',
 'chap1.ipynb',
 '.vscode',
 '.gitignore',
 'chap2.ipynb',
 'textfile.txt',
 'jalapeño.txt',
 'chap3.ipynb',
 'chap4.ipynb',
 'somefile.txt',
 'myfile.txt',
 'chap5.ipynb',
 'somefile.bin',
 'somefile',
 'somefile.gz',
 'sample.bin']

Adding or Changing the Encoding of an Already
Open File

In [71]:
import urllib.request
import io

u = urllib.request.urlopen('http://www.python.org')
f = io.TextIOWrapper(u, encoding='utf-8')
text = f.read()

URLError: <urlopen error [Errno -2] Name or service not known>

In [72]:
#If you want to change the encoding of an already open text mode file, use its detach() method to remove the existing text encoding layer before replacing it with a new one.


import os
sys.stdout.encoding

'UTF-8'

In [74]:
f  = open('sample.txt', 'w')
f

<_io.TextIOWrapper name='sample.txt' mode='w' encoding='UTF-8'>

In [75]:
b = f.detach()

In [76]:
b

<_io.BufferedWriter name='sample.txt'>

In [77]:
f.write('Hello')

ValueError: underlying buffer has been detached

Wrapping an Existing File Descriptor As a File Object

In [1]:
import sys
# Cratea a binary  mode file for stdout

bstdout = open(sys.stdout.fileno(), 'wb', closefd = False)
bstdout.write(b'Hello World\n')
bstdout.flush()

Making temporary files and directories

In [13]:
# Reading and using a tempfile

from tempfile import TemporaryFile

with TemporaryFile('w+t') as f:
    # Read/Write to the file
    f.write("Hello world")
    f.write('\n')
    f.write("Testing")


    #Seek back to beginning and read the data


    f.seek(0)
    data  =  f.read()

#Tempfile is destroyed after this

In [15]:
print(data)

Hello world
Testing


In [16]:
# if named tempfile is required

from tempfile import NamedTemporaryFile

with NamedTemporaryFile('w+t') as f:
    print('filename is:', f.name)
    

filename is: /tmp/tmp3iptlzro


In [18]:
f  = NamedTemporaryFile('w+t')
f.close()

In [22]:
# To make a temporary directory, use tempfile, TemporaryDirectory().

from tempfile import TemporaryDirectory
with TemporaryDirectory() as dirname:
    print('dirname is ', dirname)
    

dirname is  /tmp/tmpzlnqpeis


In [23]:
import tempfile
tempfile.gettempdir()

'/tmp'

In [35]:
f = NamedTemporaryFile(prefix='mytemp',mode='w+t',suffix='.txt', dir='/tmp')
f.write("This is a test file and will be deleted affterwards")
f.flush()

In [37]:
print (f.name)

/tmp/mytempv8qyxmwt.txt


In [38]:
f.close()

### Serializing Python Objects

In [None]:
# The most common approach for serializein data is to use the pickle module. To dump an object ot a file you do this:

import pickle

data = print("hello world")
f = open('jalapeño.txt', 'wb')
pickle.dump(data,f)

For example if working with multiple objects you can do this:

In [50]:
import pickle
f = open('somedata', 'wb')
pickle.dump(['1,2,3,4'],f)
pickle.dump('hello',f)
pickle.dump({'apples','pear','banana'},f)


f = open('somedata', 'rb')
pickle.load(f)

['1,2,3,4']

In [54]:
pickle.load(f)

EOFError: Ran out of input

In [55]:
#You can pickle functions classes and instances but the resulting data only encodes name references to the associated code objects.

import math
import pickle
pickle.dumps(math.cos)


b'\x80\x04\x95\x10\x00\x00\x00\x00\x00\x00\x00\x8c\x04math\x94\x8c\x03cos\x94\x93\x94.'

In [None]:
import countdown

c = countdown.Countdown(30)


T-minus 23
T-minus 22
T-minus 21
T-minus 20
T-minus 19
T-minus 18
T-minus 17
T-minus 16
T-minus 15
T-minus 14
T-minus 13
T-minus 12
T-minus 11
T-minus 10
T-minus 9
T-minus 8
T-minus 7
T-minus 6


In [58]:
f = open('cstate.p', 'wb')
import pickle
pickle.dump(c,f)

T-minus 2


In [59]:
f.close()

In [3]:
#execute the following script after restarting the env
import pickle
f = open('cstate.p', 'rb')
pickle.load(f)

T-minus 2


<countdown.Countdown at 0x7fe3587c3a90>

T-minus 1
