How to open a csv file

In [1]:
import csv
%precision 2
with open('./resources/datasets/mpg.csv') as csvfile:
    mpg = list(csv.DictReader(csvfile))
mpg[:3]

[OrderedDict([('', '1'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'auto(l5)'),
              ('drv', 'f'),
              ('cty', '18'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '2'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '1.8'),
              ('year', '1999'),
              ('cyl', '4'),
              ('trans', 'manual(m5)'),
              ('drv', 'f'),
              ('cty', '21'),
              ('hwy', '29'),
              ('fl', 'p'),
              ('class', 'compact')]),
 OrderedDict([('', '3'),
              ('manufacturer', 'audi'),
              ('model', 'a4'),
              ('displ', '2'),
              ('year', '2008'),
              ('cyl', '4'),
              ('trans', 'manual(m6)'),
              ('drv',

In [13]:
len(mpg)

234

In [14]:
mpg[0].keys()

odict_keys(['', 'manufacturer', 'model', 'displ', 'year', 'cyl', 'trans', 'drv', 'cty', 'hwy', 'fl', 'class'])

In [15]:
sum(float(d['cty']) for d in mpg) / len(mpg)  # 運算前要將 str 轉換成 float

16.86

In [16]:
sum(float(d['hwy']) for d in mpg) / len(mpg)

23.44

In [17]:
cylinders = set(d['cyl'] for d in mpg)  # set(): 找出所有獨特的 value
cylinders

{'4', '5', '6', '8'}

In [21]:
CtyMpgByCyl = []

for c in cylinders:
    summpg = 0
    cyltypecount = 0
    for d in mpg:
        if d['cyl'] == c:
            summpg += float(d['cty'])
            cyltypecount += 1
    CtyMpgByCyl.append((c, summpg / cyltypecount))
    
# lambda is a brief way to express function, you don't have to declare it or give it a name
CtyMpgByCyl.sort(key=lambda x: x[0])
CtyMpgByCyl

[('4', 21.01), ('5', 20.50), ('6', 16.22), ('8', 12.57)]

In [4]:
vehicleclass = set(d['class'] for d in mpg)
vehicleclass

{'2seater', 'compact', 'midsize', 'minivan', 'pickup', 'subcompact', 'suv'}

In [5]:
HwyMpgByClass = []

for t in vehicleclass:
    summpg = 0
    vclasscount = 0
    for d in mpg:
        if d['class'] == t:
            summpg += float(d['hwy'])
            vclasscount += 1
    HwyMpgByClass.append((t, summpg / vclasscount))
    
HwyMpgByClass.sort(key=lambda x: x[1])
HwyMpgByClass

[('pickup', 16.88),
 ('suv', 18.13),
 ('minivan', 22.36),
 ('2seater', 24.80),
 ('midsize', 27.29),
 ('subcompact', 28.14),
 ('compact', 28.30)]

Date and time

In [6]:
import datetime as dt
import time as tm

In [10]:
tm.time()
# 傳回從 1970/1/1 00:00:00 算起至今的秒數，用來作為時間戳記，例如測量程式執行時間

1628259373.86

In [8]:
dtnow = dt.datetime.fromtimestamp(tm.time())  # the current time?
dtnow

datetime.datetime(2021, 8, 6, 14, 14, 30, 455432)

In [11]:
dtnow.year, dtnow.month, dtnow.day, dtnow.hour, dtnow.minute, dtnow.second

(2021, 8, 6, 14, 14, 30)

In [12]:
delta = dt.timedelta(days = 100)
delta

datetime.timedelta(days=100)

In [13]:
today = dt.date.today()

In [14]:
today-delta  # 從今天回推 100 天

datetime.date(2021, 4, 28)

In [15]:
today > today - delta

True

Object: Define a class

In [19]:
class Person:
    department = 'School of Information'
    def set_name(self, new_name):
        self.name = new_name
    def set_location(self, new_location):
        self.location = new_location

In [20]:
person = Person()
person.set_name('Christopher Brooks')
person.set_location('Ann Arbor, MI, USA')
print('{} live in {} and works in the department {}'.format(person.name, person.location, person.department))

Christopher Brooks live in Ann Arbor, MI, USA and works in the department School of Information


map()

In [21]:
store1 = [10.00, 11.00, 12.34, 2.34]
store2 = [9.00, 11.10, 12.34, 2.01]
cheapest = map(min, store1, store2)
cheapest

<map at 0x7fe4082f7ef0>

In [24]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    title = person.split()[0]  # 以空格分開 'Dr. Christopher Brooks' 之後取出第一個
    lastname = person.split()[-1]  # 以空格分開 'Dr. Christopher Brooks' 之後取出最後一個
    return '{} {}'.format(title, lastname)

list(map(split_title_and_name, people)) # 將 people 裡面所有東西都送進 function，類似 for loop

['Dr. Brooks', 'Dr. Collins-Thompson', 'Dr. Vydiswaran', 'Dr. Romero']

How to use Lambda

In [25]:
my_function = lambda a, b, c : a + b  # 只能用單一運算式，也不能有 default value 

In [27]:
my_function(1, 21, 3)

22

In [4]:
people = ['Dr. Christopher Brooks', 'Dr. Kevyn Collins-Thompson', 'Dr. VG Vinod Vydiswaran', 'Dr. Daniel Romero']

def split_title_and_name(person):
    return person.split()[0] + ' ' + person.split()[-1]

# check whether the function above is same as the lambda functions
# option 1: 把每個 person 丟進 lambda function，確認他的結果跟 split_title_and_name 是否相同，印出 true / false
for person in people:
    print(split_title_and_name(person) == (lambda x: x.split()[0] + ' ' + x.split()[-1])(person))
    
    
# option 2: 用 map 代替 for，把 people 裡的 person 丟進 lambda function，再比較兩個 lists 是否相同 
list(map(split_title_and_name, people)) == list(map(lambda person: person.split()[0] + ' ' + person.split()[-1], people))

True
True
True
True


True

List comprehensions

In [6]:
# for loop
my_list = []
for number in range(0, 100):
    if number % 2 == 0:
        my_list.append(number)

my_list

[0,
 2,
 4,
 6,
 8,
 10,
 12,
 14,
 16,
 18,
 20,
 22,
 24,
 26,
 28,
 30,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 46,
 48,
 50,
 52,
 54,
 56,
 58,
 60,
 62,
 64,
 66,
 68,
 70,
 72,
 74,
 76,
 78,
 80,
 82,
 84,
 86,
 88,
 90,
 92,
 94,
 96,
 98]

In [8]:
# rewrite it with list comprehension
# list = [statement for ... in ...]

my_list = [number for number in range(0, 100) if number % 2 == 0]
my_list

[0,
 2,
 4,
 6,
 8,
 10,
 12,
 14,
 16,
 18,
 20,
 22,
 24,
 26,
 28,
 30,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 46,
 48,
 50,
 52,
 54,
 56,
 58,
 60,
 62,
 64,
 66,
 68,
 70,
 72,
 74,
 76,
 78,
 80,
 82,
 84,
 86,
 88,
 90,
 92,
 94,
 96,
 98]

In [9]:
def times_tables():
    lst = []
    for i in range(10):
        for j in range (10):
            lst.append(i*j)
    return lst

times_tables() == [i * j for i in range(10) for j in range(10)]

True

In [11]:
lowercase = 'abcdefghijklmnopqrstuvwxyz'
digits = '0123456789'

correct_answer = [a+b+c+d for a in lowercase for b in lowercase for c in digits for d in digits]

correct_answer[:30] # Display first 50 ids

['aa00',
 'aa01',
 'aa02',
 'aa03',
 'aa04',
 'aa05',
 'aa06',
 'aa07',
 'aa08',
 'aa09',
 'aa10',
 'aa11',
 'aa12',
 'aa13',
 'aa14',
 'aa15',
 'aa16',
 'aa17',
 'aa18',
 'aa19',
 'aa20',
 'aa21',
 'aa22',
 'aa23',
 'aa24',
 'aa25',
 'aa26',
 'aa27',
 'aa28',
 'aa29']