# 1.2_Python More Advanced Basics

## *1.2.a Lambda Expressions*

In [1]:
# lambda can be used to define functions inline
(lambda x, y, coeff=1: coeff*(x ** 2 + y ** 2) ** 0.5) (3, 4)

5.0

In [2]:
a = [1, 2, 3]
f = lambda x: sum(x)
lst = [5., (2, 3), 2**-0.5, 'some text', a, f(a), {2, 5} ]
print(f'Full list: {lst}')

Full list: [5.0, (2, 3), 0.7071067811865476, 'some text', [1, 2, 3], 6, {2, 5}]


## *1.2.b In-function lambda usage*

In [3]:
# pass functions to other
def square_fn(x):
    return x**2

def do_twice(func, x):
    return func(func(x))

do_twice(square_fn, 3)
#9**2

81

In [4]:
# map creates a new sequence by applying a function to each element of a sequence
# use list to iterate the sequence and create a list of the elements

numbers = [1, 2, 3, 4, 5, 6]

squares = map(square_fn, numbers)
type(squares), squares, list(squares)

(map, <map at 0x26e92752800>, [1, 4, 9, 16, 25, 36])

In [5]:
list(map(square_fn, numbers))

[1, 4, 9, 16, 25, 36]

In [6]:
# Naturally, we can use a lambda expression to specify a function in-line
squares = map(lambda x: x**2, numbers)
print(list(squares))

# filter evaluates a function on each element and will add it to the new sequence only if the function evaluates to True
result = filter(lambda x: x % 2 == 0, numbers)  # only even numbers are allowed
print(type(result), result, list(result))

[1, 4, 9, 16, 25, 36]
<class 'filter'> <filter object at 0x0000026E92753D30> [2, 4, 6]


## *1.2.c Dictionaries (HashMaps/Key value stores)*

In [7]:
# dicts can be instatiated using {} or the dict() function
fruit = {
    'color': 'green',
    'taste': 'sweet',
    'size 3d': [1, 3, 2]
}

fruit2 = dict(color='green', taste='sweet', size3d=[1, 2, 3]) #that is another notation to write the dictonary

print(type(fruit), fruit)
print(type(fruit2), fruit2)

<class 'dict'> {'color': 'green', 'taste': 'sweet', 'size 3d': [1, 3, 2]}
<class 'dict'> {'color': 'green', 'taste': 'sweet', 'size3d': [1, 2, 3]}


In [8]:
fruit2

{'color': 'green', 'taste': 'sweet', 'size3d': [1, 2, 3]}

In [9]:
# dict support indexing notation to assign/ look-up values given a key
fruit['size 3d'] = [2, 3, 1]
print(fruit['color'])

# we can use the .get method to return a default value in case the key is not found
fruit.get('price', 23)

# we can use setdefault to also set a default value in case the key is not found
fruit.setdefault('price', 1) # default value is returned

green


1

In [10]:
# you can use variables to specify keys
my_key = ('cool', 'tuple')
{my_key: 'huh'}

{('cool', 'tuple'): 'huh'}

In [11]:
# iterating dicts will return the keys
list(fruit)

# only immutable (hashable) types may be used as keys
{(1, 2): 3}

{(1, 2): 3}

## *1.2.d Combining Dicts*

In [12]:
fruit

{'color': 'green', 'taste': 'sweet', 'size 3d': [2, 3, 1], 'price': 1}

In [13]:
# copy a dict
cool_fruit = dict(fruit)

In [14]:
cool_fruit

{'color': 'green', 'taste': 'sweet', 'size 3d': [2, 3, 1], 'price': 1}

In [15]:
# some new dict for combining
properties = {
    'price' : 4,
    'best before': 7,
    True : 'sour',
    1: 'two',
    ('wow', 420): 'cool',
}

properties[('wow', 420)]

'cool'

In [16]:
cool_fruit.update(properties)  # cool_fruit is changed in-place
cool_fruit

{'color': 'green',
 'taste': 'sweet',
 'size 3d': [2, 3, 1],
 'price': 4,
 'best before': 7,
 True: 'two',
 ('wow', 420): 'cool'}

## *1.2.e Dictionary Unpacking*

In [17]:
# similar to sequence unpacking with *, we can use dictionary unpacking using the ** operator
print({**fruit, 'price': 3})
print({**fruit, **properties})

{'color': 'green', 'taste': 'sweet', 'size 3d': [2, 3, 1], 'price': 3}
{'color': 'green', 'taste': 'sweet', 'size 3d': [2, 3, 1], 'price': 4, 'best before': 7, True: 'two', ('wow', 420): 'cool'}


In [18]:
nums = [2,3]
print(*nums)
print(nums)

2 3
[2, 3]


## *1.2.f Sets - unordered, unique elements*

- sets may be instantiated with the literal {} notation, or the set() type
- Note that {} (empty braces) will evaluate to an emtpy dict

In [19]:
{1, 2, 'apples'}, set()

({1, 2, 'apples'}, set())

In [20]:
#test
a = set([3,4])
a.add(1)
print(a)

d = {}
s = set()

{1, 3, 4}


In [21]:
# as sets are unordered (i.e. not a sequence), they do not support indexing
my_set = {'cool', 'alright'}
my_set[0]

TypeError: 'set' object is not subscriptable

In [22]:
# add elements using .add, or use an in-place union using .update (adding elements that already exist has no effect)
my_set = {'cool', 'alright'}
my_set.add('good')
print(my_set)
my_set.update({'cool', 'awesome'})
print(my_set)

{'good', 'cool', 'alright'}
{'alright', 'good', 'awesome', 'cool'}


In [23]:
# sets are mutable like lists (since you can add elements in-line), thus they are unhashable and cannot be used as keys
{{1,2,3}: 'numbers'}

TypeError: unhashable type: 'set'

In [24]:
# the set constructor attempts to iterate its input to generate a new set, they may be iterated themselves e.g. to create a sequence
print(set([3, 3, 1, 4]))
print(list({5, 4, 1}))

{1, 3, 4}
[1, 4, 5]


## *1.2.g Conditional expressions*

In [25]:
def classify(x: dict) -> str:
    if x['color'] == 'green':   
        if x['size'] == 'big':
            decision = 'watermelon'
        elif x['size'] == 'medium':
            decision = 'apple'                
        else:
            decision = 'other'           
    else:
        decision = 'other'
    
    return decision

In [26]:
# Use dict type to represent fruits arbitrary properties
fruit_1 = {'color': 'green', 'size': 'big'}
fruit_2 = {'color': 'green', 'size': 'medium'}
fruit_3 = {'color': 'red', 'size': 'small'}

classify(fruit_1), classify(fruit_2), classify(fruit_3)

('watermelon', 'apple', 'other')

## *1.2.h Ternary condition operator*

- the in-line ternary operator true_value if condition else false_value can be used inside expressions for some syntactic sugar, or when we are only allowed to specify a single expression

In [27]:
def compare(x: dict, y: dict) -> str: 
    """ 
        Compares two fruits either they are the same 
        
        Args:
            x (dict) : first fruit
            y (dict) : second fruit
            
        Returns:
            string : either 'same' or 'different'  
    """
    
    return "same" if x == y else "different" # short if-else form 

compare(fruit_1, fruit_2), compare(fruit_1, fruit_1)

('different', 'same')

## *1.2.i Range Type*

- the range type represents a range of integers, use the range(start, stop, step) built-in to instantiate ranges

In [28]:
# aöö with same output
print(range(5))
print(range(0, 5))
print(range(0, 5, 1))

range(0, 5)
range(0, 5)
range(0, 5)


In [29]:
for i in range(5):
    print(i)    

0
1
2
3
4


In [30]:
# ranges are immutable, like tuples, support indexing and slicing, but do not support item assignment
print({range(5): 'cool'})
print(range(2, 20, 3)[1])
print(range(20, 1, -2)[2:])

{range(0, 5): 'cool'}
5
range(16, 0, -2)


In [31]:
# you can use a sequential container type, like list, to iterate ranges to create a list of the range members
print(list(range(0, 10, 3)))
print(list(range(3, 1)))

[0, 3, 6, 9]
[]


In [32]:
print(range(2, 20, 3)[1])
print(list(range(10, 1, -2)[2:]))

5
[6, 4, 2]


## *1.2.j Iterators*

In [33]:
'''
iterable types can be iterated, the manual way is to use iter, which creates an iterator that tracks 
the state of the iteration, and next to get the next element in sequence, when the sequence is over, 
a StopIteration will be raised
'''

for i in iter([1,2,3]):
    print(i)

1
2
3


In [34]:
my_iter = iter([1, 2, 3])
print(next(my_iter))
print(next(my_iter))

1
2


In [35]:
# an easier way to iterate iterable types is to use the for loop
for i in range(2, 13, 4):
    print(i)

2
6
10


## *1.2.k Enumerator*

In [36]:
# enumerators wrap an iterable, and generate a tuple with the index and the true element for each element in the iterable

for n, tag in enumerate(['yes', 'no', 'maybe']):
    print(f'{n} -> {tag}')

0 -> yes
1 -> no
2 -> maybe


## *1.2.l Iteration through a list with different objects*

In [37]:
a = [1, 2, 3]
f = lambda x: sum(x)
# [float, tuple, expr, str, var, func call, set]
lst = [5., (2, 3), 2**-0.5, 'some text', a, f(a), {2, 5} ]
print(f'Full list: {lst}')


for i in range(2):
    for i, element in enumerate(lst):
        if type(element) == str:
            break  # iteration is interrupted
            #continue # the current cycle is skipped 

        print(f"\n{i} -> {element}")

Full list: [5.0, (2, 3), 0.7071067811865476, 'some text', [1, 2, 3], 6, {2, 5}]

0 -> 5.0

1 -> (2, 3)

2 -> 0.7071067811865476

0 -> 5.0

1 -> (2, 3)

2 -> 0.7071067811865476


In [38]:
import sys

mylist = range(0, 1_000_000) # generator object
sys.getsizeof(mylist), sys.getsizeof(list(mylist)) # explicit casting to int 

(48, 8000056)

## *1.2.m Programming exercise*

In [39]:
# 𝐄𝐱𝐚𝐦𝐩𝐥𝐞: Given a data which is a list of dictionaries.
data = [
  {'color': 'green', 'size': 'big'},
  {'color': 'yellow', 'shape': 'round', 'size': 'big'},
  {'color': 'red', 'size': 'medium'},
  {'color': 'green', 'size': 'big'},
  {'color': 'red', 'size': 'small', 'taste': 'sour'},
  {'color': 'green', 'size': 'small'}
]

type(data), type(data[0])

(list, dict)

In [40]:
# 𝐆𝐨𝐚𝐥: to make a prediction for multiple observations.
results = []

for x in data:    
    res = classify(x)
    print(f'Fruit: {x} \nClass: {res} \n') # \n is a so called "carriage return" sign it stands for a new line
    results.append(res) # equiv.to  results += [classify(x)]
"All:", results # tuple object is printed

Fruit: {'color': 'green', 'size': 'big'} 
Class: watermelon 

Fruit: {'color': 'yellow', 'shape': 'round', 'size': 'big'} 
Class: other 

Fruit: {'color': 'red', 'size': 'medium'} 
Class: other 

Fruit: {'color': 'green', 'size': 'big'} 
Class: watermelon 

Fruit: {'color': 'red', 'size': 'small', 'taste': 'sour'} 
Class: other 

Fruit: {'color': 'green', 'size': 'small'} 
Class: other 



('All:', ['watermelon', 'other', 'other', 'watermelon', 'other', 'other'])

In [41]:
# List comprehension are the pythonic way of the functional programming paradigms
results = [classify(x) for x in data]

In [42]:
# filter can be done by passing an if at the end of the expression
results = [classify(x) for x in data if x['color'] == 'green']
results

['watermelon', 'watermelon', 'other']

In [43]:
# this can be combined with any expression
[classify(x) if x['color'] == 'green' else 'other' for x in data]

['watermelon', 'other', 'other', 'watermelon', 'other', 'other']

## *1.2.n Counting watermelon" objects in the data*

In [44]:
result = [classify(x) for x in data]

obj = 'watermelon'

count = 0
for res in result:    
    if res == obj:
        count += 1

f'Total number of {obj}s is {count}'

'Total number of watermelons is 2'

In [45]:
# Pythonic way using list comprehensions and build-in 𝐬𝐮𝐦 function:
lst = [classify(x) for x in data]
cnt = lst.count('watermelon')

f'Total number of {obj}s is {cnt}'

'Total number of watermelons is 2'

## *1.2.o Dictionary comprehensions*

In [46]:
names = ['Anna', 'Bob', 'Carl']
ages = [21, 18, 34]

In [47]:
# zip creates a list of tuples elementwise
names_and_ages = zip(names, ages, (0, 1, 2), {1, 32, 4})

for el in names_and_ages:
    print(el)

# zipping
type(names_and_ages), list(names_and_ages)

('Anna', 21, 0, 32)
('Bob', 18, 1, 1)
('Carl', 34, 2, 4)


(zip, [])

In [48]:
# Comprehensions
# {'age': value}
# first loop key, value ('Anna', 21)

dct = {name: age for name, age in zip(names, ages)}
# dct = {age: name for name, age in zip(names, ages)}

print(type(dct), dct)

<class 'dict'> {'Anna': 21, 'Bob': 18, 'Carl': 34}


## *1.2.p Reading Data from a file*

Content of file _scores.txt_ that lists the performance of players at a certain game:

<br>

`80,55,16,26,37,62,49,13,28,56`

`43,45,47,63,43,65,10,52,30,18`

`63,71,69,24,54,29,79,83,38,56`

`46,42,39,14,47,40,72,43,57,47`

`61,49,65,31,79,62,9,90,65,44`

`10,28,16,6,61,72,78,55,54,48`

In [49]:
with open('./scores.txt', 'r') as f: # f is then only available within the code block
    
    data = []
    
    for line in f:    
        line_entries = line.strip().split(',')
        print(line_entries)
        lst = [float(x) for x in line_entries]
        #data.append(lst)
        data.extend(lst)
        # in contrast with
    
print(f'Data length: {len(data)}')
f'File content: {data}'

FileNotFoundError: [Errno 2] No such file or directory: './scores.txt'

## *1.2.q Training and Test data separation*

In [50]:
N = len(data)

ratio = 0.8
split = int(ratio * N) # 80 % of length

train_data = data[:split]
test_data  = data[split:]

print(f"Train len: {len(train_data)} \nTest len: {len(test_data)}")

Train len: 4 
Test len: 2


## *1.2.r Writing results back into a file with exception handling*

In [51]:
import os # imports package for file and dir handling


def write(data, outfile='outputs.txt', folder='./data'):
    
    os.makedirs(folder, exist_ok=True)
    filepath = os.path.join(folder, outfile)
    
    try:
        # Make sure not to overwrite an existing file
        if os.path.exists(filepath):    
            raise Exception(f"File '{filepath}' already exists.")
        with open(filepath, 'w') as f:  # 'a'
            f.write(str(data))
            print(f'Sucessfuly written to {filepath}')
    except Exception as e:   
        #recreate_file(data, outfile)
        print(f"Exception occured: {e}")

In [52]:
write(train_data, outfile='train_scores.txt')
write(test_data, outfile='te_scores.txt')

Sucessfuly written to ./data\train_scores.txt
Sucessfuly written to ./data\te_scores.txt


## *1.2.s Classes*

In [53]:
# Classes are useful for modeling anything that has an internal state, for example, machine learning models.
# The model below classifies whether a score is above/below the average.

class Classifier(object): # parent class 
    
    mu = 10 #class attribute
    
    def __init__(self, name='Score'): # constructor (special method)
        self.avg = 0
        self.name = name
        
    # special method to define an object representation of the class
    def __repr__(self):
        return f'{self.name} classifier with avg: {self.avg:0.3f}'
    
    # methods
    def train(self, data): 
        self.avg = sum(data) / len(data)
        return self
        
    def predict(self, data):
        return ['above' if x > self.avg else 'below' for x in data]

## *1.2.t Creation of a new classifier object*

In [54]:
c = Classifier(name='Custom')
print(c.avg, c.name, c.mu)

# __repr__ function is called
print(c)              

d = Classifier(name='Temp')
print(d)  

c is d

0 Custom 10
Custom classifier with avg: 0.000
Temp classifier with avg: 0.000


False

In [55]:
#test

#the following is the same, because python is always looking for the string method
c = Classifier()
print(c)
print(c.__repr__)
next(iter([1,2,3]))

Score classifier with avg: 0.000
<bound method Classifier.__repr__ of Score classifier with avg: 0.000>


1

In [56]:
# Get training and test datasets from scores.txt file
from utils import get_train_test_data

train_data, test_data = get_train_test_data(path='./scores.txt', train_size=0.8)

ModuleNotFoundError: No module named 'utils'

In [57]:
# Train the classifier and inspect what the classifier has learned:
c.train(train_data)
print(c)

TypeError: unsupported operand type(s) for +: 'int' and 'dict'

## *1.2.u Application of the test data to the model*

In [59]:
print(f"Test data len: {len(test_data)}")

test_preds = c.predict(test_data)

print(f"Avg: {c.avg:0.3f}")
print("Test: ", test_data)
print("Pred: ", test_preds)

Test data len: 2


TypeError: '>' not supported between instances of 'dict' and 'int'

In [60]:
n_samples_to_print = 4

res = zip(test_data[:n_samples_to_print], test_preds[:n_samples_to_print]) # zip combines two objects into a tuple elementwise

list(res) # cast to list for printing

NameError: name 'test_preds' is not defined