In [6]:
##################### Files #####################

# Open a file before reading it

a_file = open('../../algo_from_scratch/TSNE/README.md', encoding='utf-8')

In [8]:
# Bytes are bytes; characters are an abstraction. A string is a sequence of Unicode characters.
# File on disk is not a sequence of Unicode characters; a File on disk is a sequence of bytes.

# Python decodes the bytes according to a specific character encoding algorithm and 
# returns a sequence of Unicode characters (otherwise known as a string).

# Python uses CP-1252 as the default encoding. CP-1252 is a common encoding on computers running Microsoft Windows.
# Default encoding is platform-dependent, so it may work for me but won't work for someone else.

##### To get default character encoding on your system:
import locale
locale.getpreferredencoding()

'cp1252'

In [10]:
a_file = open('../../algo_from_scratch/TSNE/README.md', encoding='utf-8')
print(a_file.mode)
print(a_file.name)
print(a_file.encoding)

r
../../algo_from_scratch/TSNE/README.md
utf-8


In [11]:
a_file.read()

'I have implemented TSNE Algorithm from scratch in this folder.\n\nTSNE is a non-linear dimensionality reduction algorithm. \nIt is a powerful algorithm for visualization when you are working with non-linear data.\n\nI referred the following articles/papers for in depth understanding of the algorithm:\n1) https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf\n2) https://towardsdatascience.com/t-sne-clearly-explained-d84c537f53a\n\n'

In [12]:
# Reading again just returns empty string, Python doesn't return any Exception
a_file.read()

''

In [13]:
####### How to re-read a file? #######

# seek() method moves to a specific byte position in a file.
a_file.seek(0)

0

In [14]:
a_file.read()

'I have implemented TSNE Algorithm from scratch in this folder.\n\nTSNE is a non-linear dimensionality reduction algorithm. \nIt is a powerful algorithm for visualization when you are working with non-linear data.\n\nI referred the following articles/papers for in depth understanding of the algorithm:\n1) https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf\n2) https://towardsdatascience.com/t-sne-clearly-explained-d84c537f53a\n\n'

In [15]:
a_file.seek(20)
a_file.read()

'SNE Algorithm from scratch in this folder.\n\nTSNE is a non-linear dimensionality reduction algorithm. \nIt is a powerful algorithm for visualization when you are working with non-linear data.\n\nI referred the following articles/papers for in depth understanding of the algorithm:\n1) https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf\n2) https://towardsdatascience.com/t-sne-clearly-explained-d84c537f53a\n\n'

In [17]:
# If you want you can read 1 character at a time
a_file.seek(20)
print(a_file.read(1))
print(a_file.read(1))
print(a_file.read(3))

S
N
E A


In [18]:
print("Now you are at the following byte: {}".format(a_file.tell()))

Now you are at the following byte: 25


In [20]:
# seek() and tell() methods always count bytes, but since you opened this file as text, the read() method counts characters.

# English characters in the file only require one byte each.
# Chinese characters require multiple bytes in UTF-8

# a_file.seek(30)
# a_file.read(1)
# This might fail, if there is a Chinese character which begins at 29 and it takes 2 or 3 bytes to store the character.

In [21]:
# Open files consume system resources, and depending on the file mode, other programs may not be able to access them.
a_file.close()

In [22]:
# Solution for the problem we will face when server crashes due to a bug and file is still open.
# Solution 1: Use try... finally block
# Solution 2:
with open('../../algo_from_scratch/TSNE/README.md', encoding='utf-8') as a_file:
    a_file.seek(17)
    a_character = a_file.read(1)
    print(a_character)

# At the end of with block, a_file.close() will be called automatically! 
# Python will close that file… even if you “exit” it via an unhandled exception
# This prevents from the file being kept open.

# with statement: just a generic framework for creating runtime contexts and telling objects 
#                 that they’re entering and exiting a runtime context.

d


In [27]:
line_number = 0
with open('../../algo_from_scratch/TSNE/README.md', encoding='utf-8') as a_file:
    for a_line in a_file:
        line_number += 1
        print('{:>4}  {}'.format(line_number, a_line.rstrip()))
        # rstrip() string method removes the trailing whitespace, including the carriage return characters.

   1  I have implemented TSNE Algorithm from scratch in this folder.
   2  
   3  TSNE is a non-linear dimensionality reduction algorithm.
   4  It is a powerful algorithm for visualization when you are working with non-linear data.
   5  
   6  I referred the following articles/papers for in depth understanding of the algorithm:
   7  1) https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf
   8  2) https://towardsdatascience.com/t-sne-clearly-explained-d84c537f53a
   9  


In [29]:
# To open a file for writing, use the open() function and specify the write mode. 
# There are two file modes for writing:
# 1) “Write” mode will overwrite the file. Pass mode='w'
# 2) “Append” mode will add data to the end of the file. Pass mode='a'

# Either mode will create the file automatically if it doesn’t already exist

In [30]:
with open('../../algo_from_scratch/TSNE/README.md', mode='a', encoding='utf-8') as a_file:
    a_file.write(" I have added this content programmetically from this file: python_basic_dev/chapter10/chapter10.ipynb")

In [31]:
##################### Binary Files #######################

# an_image = open('examples/beauregard.jpg', mode='rb')
# Binary files have no encoding attribute, as there is no need for conversion since it already exists in binary format
# Mode will be: rb, wb, ab

In [32]:
# io module defines the StringIO class that you can use to treat a string in memory as a file

import io

s = "PapayaWhip is the new black."
a_file = io.StringIO(s)
a_file.read()

'PapayaWhip is the new black.'

In [33]:
a_file.tell()
a_file.seek(0)
a_file.read(10)

'PapayaWhip'

In [None]:
################### Handling Compressed Files ####################

import gzip

# Always open gzip files in binary mode!
with gzip.open('out.log.gz', mode='wb') as z_file:
    z_file.write('A nine mile walk is no joke, especially in the rain.'.encode('utf-8'))

# There are libraries for GNU tar, PKZIP file structures too!

In [34]:
# Standard Input, Output, and Error

# When print() is called, whatever you are printing is sent to stdout
# When program crashes and prints a traceback, it goes to sterr

for i in range(3):
    print('PapayaWhip')

import sys
for i in range(3):
    sys.stdout.write("PapayaWhip")

PapayaWhip
PapayaWhip
PapayaWhip
PapayaWhipPapayaWhipPapayaWhip

In [35]:
# sys.stdout and sys.stderr are stream objects, that only support writing. read() doesn't work for them!

