# 5.1. Reading and Writing Text Data

In [11]:
# Read the entire file as a single string 
with open('test.txt', 'rt') as f:
    data = f.read()

# Iterate over the lines of the file
with open('test.txt', 'rt') as f:
    for line in f:
        print(line)

text1 = "Apple Inc"
text2 = "Amazon Inc"
        
# Write chunks of text data 
with open('write.txt', 'wt') as f:
    f.write(text1)
    f.write(text2)

# By default, files are read/writing using the system default text encoding.
import sys 
print(sys.getdefaultencoding())

with open('test.txt', 'rt', encoding='latin-1') as f:
    print(f.read())

# more common encoding are ascii, latin-1, utf-8, and utf-16 
# UTF-8 is usually a safe bet 
# ascii corresponds to the 7-bit characters 
# latin-1 is a direct mapping of bytes 0-255 to Unicode

# When contol leaves the with block, the file will be closed automatically 

f = open('test.txt', 'rt')
data = f.read()
print(data)
f.close()

# Unix VS Windows \n VS \r\n 

# Read with diabled newline translation 
with open('test.txt', 'rt', newline='') as f:
    print(f.read())

# Replace bad chars with Unicode U+ffd replacement char
f = open('test.txt', 'rt', encoding='ascii', errors='replace')
f.read()
f.close()

# Ignore bad chars entirely 
g = open('test.txt', 'rt', encoding='ascii', errors='ignore')
g.read()
g.close()




大吉大利今晚吃鸡😀😀

🐶🐶

utf-8
å¤§åå¤§å©ä»æåé¸¡ðð
ð¶ð¶

大吉大利今晚吃鸡😀😀
🐶🐶

大吉大利今晚吃鸡😀😀
🐶🐶



# 5.2. Printing to a File

In [16]:
with open('text.txt', 'rt') as f:
    print('Hello world!', file=f)

UnsupportedOperation: not writable

# 5.3. Printing with a Different Separator or Line Ending

In [30]:
# Use the sep and end keyword argument to print() to change the outputs
print('ACME', 50, 91.5)
print('ACME', 50, 91.5, sep=',')
print('ACME', 50, 91.5, sep=',', end='!!\n')

for i in range(5):
    print(i)

for i in range(5):
    print(i, end= ' ')

# str.join() it only works with strings
print(','.join(['ACME', '50', '91.5']))
row = ('ACME',10)
print(*row, sep=',')

ACME 50 91.5
ACME,50,91.5
ACME,50,91.5!!
0
1
2
3
4
0 1 2 3 4 ACME,50,91.5
ACME,10


# 5.4. Reading and Writing Binary Data

In [None]:
# open() function with mode rb, wb to read or write binary data

# Write binary data to a file 
with open('somefile.bin', 'wb') as f:
    f.write(b'Hello World')


# Read the entire file as a single byte string 
with open('somefile.bin', 'rb') as f:
    data = f.read()
    print(data)

# returned will be in the form of bytes string 

# Text string 
t = 'Hello World'
t[0]

for c in t:
    print(c)

# Byte string 
b = b'Hello World'
b[0]

for c in b:
    print(c)

# If you ever need to read or write text from a binary-mode file, make sure you remember to decode or encode it

with open('somefile.bin', 'rb') as f:
    data = f.read(16)
    text = data.decode('utf-8')
    print(text)

with open('somefile.bin', 'wb') as f:
    text = 'Hello World'
    f.write(text.encode('utf-8'))

import array 
nums = array.array('i', [1, 2, 3, 4])
with open('data.bin', 'wb') as f:
    f.write(nums)

a = array.array('i', [0,0,0,0])
with open('data.bin', 'rb') as f:
    f.readinto(a)

print(a)

# 5.5 Writing to a File That Doesn't Already Exist

In [3]:
# want to write data to file, but only if doesn't already exist on the filesystem
with open('somefile', 'wt') as f:
    f.write('Hello\n')

# If the file is binary mode, use mode xb instead of xt
with open('somefile', 'rb') as f:
    print(f.read())

# elegant solution
import os 
if not os.path.exists('somefile'):
    with open('somefile', 'wt') as f:
        f.write('Hello World\n')
else:
    print('File already exists!')

b'Hello\n'
File already exists!


# 5.6. Performing I/O Operations on a String

In [10]:
# io.StringIO() and io.BytesIO() 
import io 

s = io.StringIO() # only be used for text 
s.write('Hello World\n')
print('This is a test', file=s)

# Get all of the data written so far
s.getvalue()

# Wrap a file interface around an existing string 
s = io.StringIO('Hello\nWorld\n')
s.read(4)
s.read()

s = io.BytesIO() # operating with binary data
s.write(b'binary data')
s.getvalue()

# StringIO and BytesIO instance don't have a proper integer file descriptor.

b'binary data'

# 5.7.Reading and Writing Compressed Datafiles

In [15]:
# gzip, bz2 module 
# gzip compression
import gzip 
with gzip.open('text.gz', 'rt') as f: # parameters {encoding, errors, newline}
    text = f.read()
    print(text)

# gzip compression 
import gzip 
with gzip.open('text.gz', 'wt') as f:
    f.write('Add New Line gzip File')

# compression level 
# default level 9, provides the highest level of compression.
# Lower levels offer better performance, but not as much compression

import gzip 
f = open('text.gz', 'rb')
with gzip.open(f, 'rt') as g:
    text = g.read() 
    print(text)

Add New Line gzip File
Add New Line gzip File


# 5.8.Iterating Over Fixed-Sized Records

In [16]:
from functools import partial 

RECORD_SIZE = 32

with open('data.bin', 'rb') as f:
    records = iter(partial(f.read, RECORD_SIZE), b'')
    for r in records:
        print(r)

b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00\x00\x00'


# 5.9. Reading Binary Data into a Mutable Buffer

In [21]:
import os.path

def read_into_buffer(filename):
    buf = bytearray(os.path.getsize(filename))
    with open(filename, 'rb') as f:
        f.readinto(buf)
    return buf 

# Write a sample file
with open('text.txt', 'wb') as f:
    f.write(b'Hello World')

buf = read_into_buffer('sample.bin')

record_size = 5 # Size of each record (adjust value)

buf = bytearray(record_size)
print(buf)
with open('sample.bin', 'rb') as f:
    while True:
        n = f.readinto(buf) # return code, which is a number of bytes actually read
        if n < record_size:
            break 
        else:
            print(n)
            

bytearray(b'\x00\x00\x00\x00\x00')
5
5


# 5.10. Memory Mapping Binary Files

In [29]:
import os
import mmap 

def memory_map(filename, access=mmap.ACCESS_WRITE):
    size = os.path.getsize(filename)
    fd = os.open(filename, os.O_RDWR)
    return mmap.mmap(fd, size, access=access)

size = 1000000
with open('data', 'wb') as f:
    f.seek(size-1)
    f.write(b'\x00')

m = memory_map('data')
len(m)
m[0:10]

# Reassign a slice
m[0:11] = b'Hello World'
m.close()

# Verfify that changes were made
with open('data', 'rb') as f:
    print(f.read(11))

with memory_map('data') as m:
    print(len(m))
    print(m[0:10])

# If read-only access 
m = memory_map('data', mmap.ACCESS_READ)

# modify the data locally 
m = memory_map('data', mmap.ACCESS_COPY)

# Using mmap to map files into memory can be an efficient and elegant means for randomly accessing the contents of file
m = memory_map('data')
# Memoryview of unsigned integers
v = memoryview(m).cast('I')
v

b'Hello World'
1000000
b'Hello Worl'


<memory at 0x10d045d08>

# 5.11.Manipylating Pathnames

In [34]:
import os
path = '/Users/mbp-chyi/Downloads/Books-BK-PythonCookbook3rd/Chapter5-FilesAndIO'

# Get the last component of the path 
os.path.basename(path)

# Get the firectory name
os.path.dirname(path)

# join path components together 
os.path.join('tmp','data', os.path.basename(path))

# Expand the user's home directory 
path = '~'
os.path.expanduser(path)

# Split the file extension 
os.path.splitext(path)

('~', '')

# 5.12. Testing for the Existence of a File

In [43]:
import os 
os.path.exists('/etc/passwd')
os.path.exists('/tmp/spam')

# Is a regular file 
os.path.isfile('/etc/passwd')

# Is a directory 
os.path.isdir('/etc/passwd')

# Is a symbolic link 
os.path.islink('/usr/local/bin/python3')

# Get the file linked to 
os.path.realpath('/usr/local/bin/python3')

# Get metadata 
os.path.getsize('/etc/passwd')
os.path.getmtime('/etc/passwd')

import time 
time.ctime(os.path.getmtime('/etc/passwd'))

# worry about permissions 

'Tue Oct  3 08:29:33 2017'

# 5.13.Getting a Directory Listing

In [53]:
import os
names = os.listdir('/Users/mbp-chyi/Downloads/Books-BK-PythonCookbook3rd/Chapter5-FilesAndIO')
print(names)
import os.path 

# Get all regular files
pat = '/Users/mbp-chyi/Downloads/Books-BK-PythonCookbook3rd/Chapter5-FilesAndIO'
names = [names for name in os.listdir(pat) if os.path.isfile(os.path.join(pat, name))]
print(names)

# Get all dirs
dirnames = [name for name in os.listdir(pat) if os.path.isdir(os.path.join(pat, name))]

# startswith, endswith 
pyfiles = [name for name in os.listdir(pat) if name.endswith('.py')]

# for filename matching 
import glob 
pyfiles= glob.glob('{}/*.py'.format(pat))

from fnmatch import fnmatch
pyfiles = [name for name in os.listdir(pat) if fnmatch(name, '*.py')]

# additional metadata file sizes, modification dates

# Example of getting a directory listing 
import os 
import os.path 
import glob 

pyfiles = glob.glob('*.py')

# Get file sizes and modification dates
name_sz_date = [(name, os.path.getsize(name), os.path.getmtime(name))
               for name in pyfiles]

for name, size, mtime in name_sz_date:
    print(name, size, mtime)
    
# Alternative: Get file metadata 
file_metadata = [(name, os.stat(name)) for name in pyfiles]
for name, meta in file_metadata:
    print(name, meta.st_size, meta.st_mtime)

['somefile.bin', 'text.gz', 'write.txt', 'somefile', 'text.txt', 'test.txt', '.ipynb_checkpoints', 'Chapter5. Files and IO.ipynb', 'data', 'data.bin', 'sample.bin']
[['somefile.bin', 'text.gz', 'write.txt', 'somefile', 'text.txt', 'test.txt', '.ipynb_checkpoints', 'Chapter5. Files and IO.ipynb', 'data', 'data.bin', 'sample.bin'], ['somefile.bin', 'text.gz', 'write.txt', 'somefile', 'text.txt', 'test.txt', '.ipynb_checkpoints', 'Chapter5. Files and IO.ipynb', 'data', 'data.bin', 'sample.bin'], ['somefile.bin', 'text.gz', 'write.txt', 'somefile', 'text.txt', 'test.txt', '.ipynb_checkpoints', 'Chapter5. Files and IO.ipynb', 'data', 'data.bin', 'sample.bin'], ['somefile.bin', 'text.gz', 'write.txt', 'somefile', 'text.txt', 'test.txt', '.ipynb_checkpoints', 'Chapter5. Files and IO.ipynb', 'data', 'data.bin', 'sample.bin'], ['somefile.bin', 'text.gz', 'write.txt', 'somefile', 'text.txt', 'test.txt', '.ipynb_checkpoints', 'Chapter5. Files and IO.ipynb', 'data', 'data.bin', 'sample.bin'], ['so

# 5.14. ByPassing Filename Encoding

In [59]:
import sys 

# By default, all filenames are encoded and decoded according to the text encoding returned by sys.getfilesystemcoding()
print(sys.getfilesystemencoding())

# Write a file using a unicode filename 
with open('jalape\xf1o.txt', 'w') as f:
    f.write('Spicy!')

# Directory listing (decoded)
import os 
os.listdir('.')

# Directory listing (raw)
os.listdir(b'.') # Note: byte string 

# Open file with raw filename 

utf-8


[b'somefile.bin',
 b'text.gz',
 b'write.txt',
 b'somefile',
 b'jalape\xc3\xb1o.txt',
 b'text.txt',
 b'test.txt',
 b'.ipynb_checkpoints',
 b'Chapter5. Files and IO.ipynb',
 b'data',
 b'data.bin',
 b'sample.bin']

# 5.15. Printing Bad Filenames 

In [4]:
def bad_filename(filename):
    temp = filename.encode(sys.getfilesystemencoding(), errors='surrogateescape')
    return temp.decode('latin-1')

import os 
files = os.listdir('.')
for name in files:
    try:
        print(name)
    except UnicodeEncodeError:
        print(bad_filename(name))

somefile.bin
text.gz
write.txt
somefile
jalapeño.txt
text.txt
test.txt
.ipynb_checkpoints
Chapter5. Files and IO.ipynb
data
data.bin
sample.bin


# 5.16. Adding or Changing the Encoding of an Already Open File

In [12]:
import urllib.request
import io

# 
u = urllib.request.urlopen('http://www.python.org')

# io.TextIOWrapper is a text-handling layer that encodes and decodes Unicode
# io.BufferedWriter is a buffered I/O layer that handles binary data
# io.FileIO is a raw file representing the low-level file descriptor in the operating system
f = io.TextIOWrapper(u, encoding='utf-8')
text = f.read() 
print(text)

import sys 
print(sys.stdout.encoding)

f = open('sample.txt', 'w')
print(f.buffer)
print(f.buffer.raw)

f = io.TextIOWrapper(f.buffer, encoding='latin-1')
print(f)

f = open('sample.txt', 'w')
print(f)
b = f.detach() # detach() method disconnects the topmost layer of a file and return the next lower layer
print(b)
f = io.TextIOWrapper(b, encoding='latin-1')
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='ascii', errors='xmlcharrefreplace')


<!doctype html>
<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9">   <![endif]-->
<!--[if IE 7]>      <html class="no-js ie7 lt-ie8 lt-ie9">          <![endif]-->
<!--[if IE 8]>      <html class="no-js ie8 lt-ie9">                 <![endif]-->
<!--[if gt IE 8]><!--><html class="no-js" lang="en" dir="ltr">  <!--<![endif]-->

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">

    <link rel="prefetch" href="//ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js">

    <meta name="application-name" content="Python.org">
    <meta name="msapplication-tooltip" content="The official home of the Python Programming Language">
    <meta name="apple-mobile-web-app-title" content="Python.org">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black">

    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta name="HandheldFriendly" conte

UnsupportedOperation: detach

# 5.17. Writing Bytes to a Text File

In [16]:
import sys
sys.stdout.write(b'Hello\n')
# sys.stdout is always opened in text mode 

Hello


# 5.18. Wrapping an Existing File Descriptor As a File Object

In [18]:
# Open a low-level file descriptor 
import os
fd = os.open('somefile.txt', os.O_WRONLY | os.O_CREAT)

# Turn into a proper file 
f = open(fd, 'wt')
f.write('hello world\n')
f.close()

from socket import socket, AF_INET, SOCK_STREAM 

def echo_client(client_sock, addr):
    print('Got connection from', addr)
    
    # Make text-mode file wrappers for socket reading/writing
    client_in = open(client_sock.fileno(), 'rt', encoding='latin-1', closefd=False)
    client_out = open(client_sock.fileno(), 'wt', encoding='latin-1', closefd=False)
    
    # Echo lines back to the client using file I/O 
    for line in client_in:
        client_out.write(line)
        client_out.flush()
    client_sock.close()

def echo_server(address):
    sock = socket(AF_INET, SOCK_STREAM)
    sock.bind(address)
    sock.listen(1)
    while True:
        client, addr = sock.accept()
        echo_client(client, addr)

import sys 

# Create a binary-mode file for stdout 
bstdout = open(sys.stdout.fileno(), 'wb', closefd=False)
bstdout.write(b'Hello World\n')
bstdout.flush() 


UnsupportedOperation: fileno

# 5.19. Making Temporary Files and Directories 

In [26]:
from tempfile import TemporaryFile
from tempfile import NamedTemporaryFile 
from tempfile import TemporaryDirectory 


with TemporaryFile('w+t') as f:
    # Read/write to the file 
    f.write('Hello World\n')
    f.write('Testing\n')
    
    # Seek back to beginning and read the data 
    f.seek(0)
    data = f.read() 

with NamedTemporaryFile('w+t') as f:
    print('filename is:', f.name)
    
with TemporaryDirectory() as dirname:
    print('dirname is:', dirname)
    # Use the directory 
# Directory and all contents destryed 

# Temporary file is destroyed

f = TemporaryFile('w+t') # w+t for text; w+b for binary 
# Use the temporary file 
print(f)
f.close()
# File is destroyed

# mkstemp() and mkdtemp() to create temporary files and directories
import tempfile
tempfile.mkstemp() 
tempfile.mkdtemp()

# /var/tmp similar 

# find out the actual location 
tempfile.gettempdir()

# All of the temporary-file-related functions allow you to override this directory as well as the naming conventions
f = NamedTemporaryFile(prefix='mytemp', suffix='.txt', dir='/tmp')
f.name

filename is: /var/folders/g7/lwx8_5tn269dr8gyq3vrsghr0000gn/T/tmp0i43b7wz
dirname is: /var/folders/g7/lwx8_5tn269dr8gyq3vrsghr0000gn/T/tmp3ag6hwbj
<_io.TextIOWrapper name=62 mode='w+t' encoding='UTF-8'>


'/tmp/mytempez0dmswy.txt'

# 5.20. Communicating with Serial Ports

In [1]:
# You want to read and write data over a serial port, typically to interact with some kind of hardware device
import serial
ser = serial.Serial('/dev/tty.usbmodemName', # Device name varies
                   baudrate=9600,
                   bytesize=8,
                   parity='N',
                   stopbits=1)



# 5.21. Serializing Python Objects

In [8]:
# need to serialize a Python object into a byte stream so that you can do things such as save it to a file, store it in a database, or transmit it over a network connection
import pickle 
# dump an object to a file 

data = 'Apple Inc'
f = open('somefile','wb')
pickle.dump(data, f)

# pickle.dumps(data) dump an object to a string, Use pickle.dumps
s = pickle.dumps(data)
print(s)

# pickle.load() pickle.loads() 
# Restore from a file 
f = open('somefile', 'rb')
data = pickle.load(f)
print(data)

# Restore from a string 
data = pickle.loads(s)
print(data)

# Pickle is a Python-specific self-describing data encoding
import pickle 
f = open('somedata', 'wb')
pickle.dump([1,2,3,4], f)
pickle.dump('hello', f)
pickle.dump({'Apple', 'Pear', 'Banana'}, f)
f.close()
f = open('somedata', 'rb')
pickle.load(f)
pickle.load(f)
pickle.load(f)

import math 
import pickle 
pickle.dumps(math.cos)

b'\x80\x03X\t\x00\x00\x00Apple Incq\x00.'
Apple Inc
Apple Inc


b'\x80\x03cmath\ncos\nq\x00.'