# Preparation

## Mount Google Drive

In [None]:
# Firstly, we need to authorize Colab to access data on Google Drive.
from google.colab import drive
drive.mount('/content/drive')
# You will be prompted to accept data access by Google.
# This cell needs to be run every time before reading a file on Google Drive.

Mounted at /content/drive


## Change Working directory

In [None]:
# Secondly, we need to change the current working directory to where your scripts are saved.
# In this case, it's the scripts folder.
# Again, this cell needs to be run every time.
# If you are running Python locally, there is no preparation.
import os
os.chdir('/content/drive/MyDrive/Python boot camp_2/scripts_complete')

# Text

## Reading from a text file

In [None]:
handle = open('../data_in/io_txt.txt','r')
print(handle)
print(type(handle))

<_io.TextIOWrapper name='../data_in/io_txt.txt' mode='r' encoding='UTF-8'>
<class '_io.TextIOWrapper'>


In [None]:
# file attributes
# use without the parentheses
print(handle.name)
print(handle.encoding)
print(handle.mode)
print(handle.closed)

../data_in/io_txt.txt
cp1252
r
False


In [None]:
print(handle.readable())
print(handle.writable())
# because the file is open for reading, mode is r.

True
False


In [None]:
count = 0
for line in handle:
    count += 1
print(count)

20


In [None]:
# After previous operation on the handle,
# the pointer is moved to the end of the file.
# There is nothing more to be read.

count = 0
for line in handle:
    count += 1
print(count)

0


In [None]:
# use tell method to find out the current position of the pointer

handle.tell()

745

In [None]:
# If you want to use the file again,
# you need to move the pointer back to the start of the file.
# To do so, use method seek(0) on the handle.

handle.seek(0)
count = 0
for line in handle:
    count += 1
print(count)

handle.close()

20


In [None]:
handle2 = open('../data_in/io_txt.txt')
len(handle2.read())

# read() reads the whole file as a single string

726

In [None]:
len(handle2.read())

# Again, now the pointer is at the end of the file,
# nothing more to be read.

0

In [None]:
handle2.seek(0)
len(handle2.read())

726

In [None]:
handle2.seek(0)
handle2.read(10)
# read the first 10 characters.

'Two roads '

In [None]:
handle2.read(8)
# continue to read the next 8 characters.

'diverged'

In [None]:
handle2.read(-9)
# if it's negative, continue to read until the end of the file.

' in a yellow wood,\nAnd sorry I could not travel both\nAnd be one traveler, long I stood\nAnd looked down one as far as I could\nTo where it bent in the undergrowth;\nThen took the other, as just as fair,\nAnd having perhaps the better claim,\nBecause it was grassy and wanted wear;\nThough as for that the passing there\nHad worn them really about the same,\nAnd both that morning equally lay\nIn leaves no step had trodden black.\nOh, I kept the first for another day!\nYet knowing how way leads on to way,\nI doubted if I should ever come back.\nI shall be telling this with a sigh\nSomewhere ages and ages hence:\nTwo roads diverged in a wood, and I-\nI took the one less traveled by,\nAnd that has made all the difference.'

In [None]:
handle2.seek(0)
handle2.readline()
# read the first line as a string.

'Two roads diverged in a yellow wood,\n'

In [None]:
handle2.readline()
# continue to read the next line as a string.

'And sorry I could not travel both\n'

In [None]:
handle2.readlines()
# continue to read the rest of the lines as a list of strings.

['And be one traveler, long I stood\n',
 'And looked down one as far as I could\n',
 'To where it bent in the undergrowth;\n',
 'Then took the other, as just as fair,\n',
 'And having perhaps the better claim,\n',
 'Because it was grassy and wanted wear;\n',
 'Though as for that the passing there\n',
 'Had worn them really about the same,\n',
 'And both that morning equally lay\n',
 'In leaves no step had trodden black.\n',
 'Oh, I kept the first for another day!\n',
 'Yet knowing how way leads on to way,\n',
 'I doubted if I should ever come back.\n',
 'I shall be telling this with a sigh\n',
 'Somewhere ages and ages hence:\n',
 'Two roads diverged in a wood, and I-\n',
 'I took the one less traveled by,\n',
 'And that has made all the difference.']

In [None]:
handle2.seek(0)

for line in handle2:
    line = line.rstrip()
    if 'I' in line: print(line)
handle2.close()

And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
In leaves no step had trodden black.
Oh, I kept the first for another day!
I doubted if I should ever come back.
I shall be telling this with a sigh
Two roads diverged in a wood, and I-
I took the one less traveled by,


In [None]:
handle3 = open('../data_in/io_txt.txt')
handle3.readline()
handle3.close()

# When you run this, nothing is produced.
# Because the first line is temporarily read to the memory.
# Once the file is closed, memory is cleared.

In [None]:
# In order to use the content from the file,
# assign it to a variable first.
handle4 = open('../data_in/io_txt.txt')
first_line = handle4.readline()
handle4.close()
print(first_line)

Two roads diverged in a yellow wood,



## Writing to a text file

In [None]:
with open('../data_out/hkubs.txt', 'w', newline = '') as fout:
    line1 = 'HKU Business School was established in 2001.\n'
    # specify newlines when you write data to a text file.
    fout.write(line1)

    line2 = 'She is now home to seven great master programs including MSc(BA).\n'
    fout.write(line2)

In [None]:
with open('../data_out/hkubs.txt', 'a', newline = '') as fout:
    line3 = 'We have campuses in HK, SZ, BJ, Ho Chi Minh City, and Tel Aviv.\n'
    fout.write(line3)

In [None]:
with open('../data_out//hkubs.txt', 'a', newline = '') as fout:
    # mode 'a' means to append to the existing content.
    lines = """\nMy favorite course
    is without a doubt
    MSBA7001.
    """
    fout.write(lines)

In [None]:
# to open multiple files at the same time

handle1 = open()
handle2 = open()
# do something with both handles
handle1.close()
handle2.close()

In [None]:
# to open multiple files at the same time
# with open("file_1.txt") as f1, open("file_2.txt") as f2:

# `zip` and `enumerate` functions

In [None]:
# the zip function takes iterables,
# aggregates them and returns a zip object,
# which we can convert to a list, a tuple, or a dictionary

keys = ['fall', 'spring', 'winter']
values = [20, 15, 9]
print(list(zip(keys, values)))
print(tuple(zip(keys, values)))
print(dict(zip(keys, values)))

[('fall', 20), ('spring', 15), ('winter', 9)]
(('fall', 20), ('spring', 15), ('winter', 9))
{'fall': 20, 'spring': 15, 'winter': 9}


In [None]:
names = ['A', 'B', 'C']
values = [11, 23, 46]
ages = [45, 67, 82]
for x, y, z in zip(names, values, ages):
    print(x, y, z)

A 11 45
B 23 67
C 46 82


In [None]:
# the enumerate function takes one iterable and one starting index,
# adds a counter to it and returns an enumerate object.
# which we can convert to a list, a tuple, or a dictionary
# essentially enumerate allows to add indice to values.

obj1 = enumerate(names)
# index starting from 0, the default setting
obj2 = enumerate(names, 10)
# index starting from 10
obj3 = enumerate(names, 100)
# index starting from 100
print(list(obj1))
print(tuple(obj2))
print(dict(obj3))

[(0, 'A'), (1, 'B'), (2, 'C')]
((10, 'A'), (11, 'B'), (12, 'C'))
{100: 'A', 101: 'B', 102: 'C'}


In [None]:
for index, val in enumerate(values, 1):
    print(index, bool(val % 2))

1 True
2 True
3 False


# CSV

## Reading from a csv file

In [None]:
import csv

In [None]:
with open('../data_in/io_csv.csv', 'r', encoding = 'utf-8') as infile:
    reader = csv.reader(infile)
    for row in reader:
        print(row)

['name', 'age']
['Peter', '22']
['Jasmine', '21']
['Sam', '24']
['Chao', '20']


In [None]:
# without reader object, every line is still read as a string
with open('../data_in/io_csv.csv', 'r', encoding = 'utf-8') as infile:
    for row in infile:
        print(row)

name,age

Peter,22

Jasmine,21

Sam,24

Chao,20



In [None]:
with open('../data_in/io_csv.csv', 'r', encoding = 'utf-8') as infile:
    reader = csv.reader(infile)
    for row in reader:
        print(f'{row[0]}, {row[1]}')

name, age
Peter, 22
Jasmine, 21
Sam, 24
Chao, 20


In [None]:
# or, simply turn the content into a list of lists
with open('../data_in/io_csv.csv', 'r', encoding = 'utf-8') as infile:
    reader = csv.reader(infile)
    data = list(reader)

In [None]:
data

[['name', 'age'],
 ['Peter', '22'],
 ['Jasmine', '21'],
 ['Sam', '24'],
 ['Chao', '20']]

## Writing to a csv file

In [None]:
# create a list of lists or a list of tuples as data to write.
major = ['BA', 'A&F', 'ECON']
size = [50, 150, 200]
gender_ratio = [0.5, 0.7, 0.3]
csvdata = list(zip(major, size, gender_ratio))

print(csvdata)

[('BA', 50, 0.5), ('A&F', 150, 0.7), ('ECON', 200, 0.3)]


In [None]:
with open('../data_out/programs.csv', 'w', newline = '', encoding = 'utf-8') as outfile:
    writer_ = csv.writer(outfile)

    # use writerow() to write the first row, usually the header
    writer_.writerow(['major', 'size', 'gender_ratio'])

    # then write the following rows using writerows()
    writer_.writerows(csvdata)

In [None]:
# append another row to the csv file
with open('../data_out/programs.csv', 'a', newline = '', encoding = 'utf-8') as outfile:
    writer_ = csv.writer(outfile)
    newrow = 'HR', 30, 0.8
    writer_.writerow(newrow)

# About encoding

From Wikipedia:
UTF-8 is Universal Coded Character Set/Unicode Transformation Format - 8 bit.
It is by far the most common encoding for the World Wide Web, accounting for over 97% of all web pages, and up to 100% for some languages, as of 2021

In [None]:
# by default, the encoding is not specified in the open function
# then it will not be able to correctly read and write texts other than English letters.

handle = open('../data_in/encoding_fr.txt','r')
print(handle.read())

La sÃ©rie d'essais "Quoi de neuf dans Python" reprend les plus importants changements entres les versions majeures de Python. Elles sont Ã  lire pour quiconque souhaitant Ãªtre Ã  jour suite Ã  une nouvelle sortie.


In [None]:
# fix that by specifying its encoding

handle = open('../data_in/encoding_fr.txt','r', encoding = 'utf-8')
print(handle.read())

La série d'essais "Quoi de neuf dans Python" reprend les plus importants changements entres les versions majeures de Python. Elles sont à lire pour quiconque souhaitant être à jour suite à une nouvelle sortie.


In [None]:
# for some Asian languages, Python may not even be able to read them.

handle = open('../data_in/encoding_cn.txt','r')
print(handle.read())

UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 2: character maps to <undefined>

In [None]:
# again, fix it by using encoding
handle = open('../data_in/encoding_cn.txt','r', encoding = 'utf-8')
print(handle.read())

像 Perl 語言一樣, Python 源代碼同樣遵循 GPL(GNU General Public License) 協議


In [None]:
# similarly, when writing

text = ">>>>像 Perl 語言一樣, Python 源代碼同樣遵循 GPL(GNU General Public License) 協議<<<<"
handle = open('../data_in/encoding_cn.txt','a', encoding = 'utf-8')
handle.write(text)
handle.close()

# JSON

## Reading from a json file

In [None]:
import json

In [None]:
handle = open('../data_in/io_json.json', 'r', encoding = 'utf-8')
data = json.load(handle)
handle.close()
print(type(data))

<class 'dict'>


In [None]:
data

{'quiz': {'sport': {'q1': {'question': 'Which one is correct team name in NBA?',
    'options': ['New York Bulls',
     'Los Angeles Kings',
     'Golden State Warriros',
     'Huston Rocket'],
    'answer': 'Huston Rocket'}},
  'maths': {'q1': {'question': '5 + 7 = ?',
    'options': ['10', '11', '12', '13'],
    'answer': '12'},
   'q2': {'question': '12 - 8 = ?',
    'options': ['1', '2', '3', '4'],
    'answer': '4'}}}}

In [None]:
# define a json string
j_str = """{
    "Name": "Chao Ding",
    "Contact Number": 12345678,
    "Email": "chao.ding@hku.hk",
    "Hobbies":["Hiking", "Tennis", "Photography"]
    }"""

# convert to a python dictionary
p_dict = json.loads(j_str)
print(type(p_dict))

<class 'dict'>


In [None]:
p_dict

{'Name': 'Chao Ding',
 'Contact Number': 12345678,
 'Email': 'chao.ding@hku.hk',
 'Hobbies': ['Hiking', 'Tennis', 'Photography']}

## Writing to a json file

In [None]:
# define a python dictionary
p_dict2 = {'business school':
            ['BA', 'A&F', 'ECON'],
           'engineering school':
            ['CS', 'AI', 'SE']
           }

# convert to a json string
j_str2 = json.dumps(p_dict2)
print(type(j_str2))

<class 'str'>


In [None]:
j_str2

'{"business school": ["BA", "A&F", "ECON"], "engineering school": ["CS", "AI", "SE"]}'

In [None]:
handle = open('../data_out/hku.json', 'w', encoding = 'utf-8')

# write a python dictionary to a json file
json.dump(p_dict2, handle, indent = 3)
handle.close()

In [None]:
# since json is still a string,
# we can also use the write method.
with open('../data_out/hku2.json', 'w', encoding = 'utf-8') as handle:
    handle.write(j_str2)