In [7]:
#### Chapter 3: Built-in Data Structures, Functions, and Files ####

### Tuples 

In [9]:
# A tuple is a fixed-length, immutable sequence of objects:

In [10]:
# Can be formed with no parentheses:
tup = 4, 5, 6
tup

(4, 5, 6)

In [11]:
# Or with parentheses:
alt_tup = (4, 5, 6)
alt_tup

(4, 5, 6)

In [12]:
# You can convert any sequence or iterator to a tuple:
tuple([1, 2, 3, 4])

(1, 2, 3, 4)

In [13]:
# Elements can be accessed with square brackets:
tup[0]

4

In [14]:
# While the objects stored in a tuple may be mutable themselves, once a 
# tuple is created it's not possible to modify which object is stored
# in each slot:
tup = ('foo', [1, 2], True)

In [15]:
# Append an element to nested list:
tup[1].append(3)

In [16]:
tup

('foo', [1, 2, 3], True)

In [18]:
# You can concatenate tuples using the + operator:
(4, None, 'foo') + (6, 0) + ('bar',)

(4, None, 'foo', 6, 0, 'bar')

In [19]:
# Multiplying a tuple by an integer has the effect of concatenating
# together that many copies of the tuple:
('foo', 'bar') * 4

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

In [20]:
# If you try to assign to a tuple-like expression of variables, Python
# will attempt to unpack the values:
tup = (4, 5, 6)
a, b, c = tup

In [21]:
print(a, b, c)

4 5 6


In [22]:
# Even sequences with nested tuples can be unpacked:
tup = 4, 5, (6, 7)
a, b, (c, d) = tup

In [23]:
print(c, d)

6 7


In [24]:
# A common use of variable unpacking is iterating over sequences of tuples/lists:
seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
for a, b, c in seq:
    print(f'a={a}, b={b}, c={c}')

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [26]:
# You can also just pluck the first few elements from a sequence using *rest:
values = 1, 2, 3, 4, 5
a, b, *rest = values

In [28]:
print(a, b, rest)

1 2 [3, 4, 5]


In [29]:
# The 'rest' bit is sometimes something you want to discard; there is nothing special
# about the 'rest' name. By convention, programmers will use '_' to indicate values
# that are to be discarded:
a, b, *_ = values

In [30]:
# Since the size and contents of a tuple cannot be modified, it is very light on
# instance methods. A useful one is count, which counts the number of occurrences
# of a value:
a = (1, 2, 2, 2, 3, 4, 2)
a.count(2)

4

### Lists

In [32]:
# In contrast to tuples, lists are variable length and their contents can be modified
# in-place. You can define them with square brackets:
a = [2, 3, 7, None]

In [33]:
# Lists and tuples are semantically similar and can be used interchangeably in many functions

In [34]:
# The list function is frequently used in data processing as a way to materialize an 
# iterator or generator expression:
gen = range(10)

In [35]:
gen

range(0, 10)

In [36]:
list(gen)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [37]:
# Elements can be appended to the end of the list with the append method:
b = ['foo', 'peekaboo', 'baz']
b.append('dwarf')

In [38]:
b

['foo', 'peekaboo', 'baz', 'dwarf']

In [39]:
# Using insert you can insert an element at a specific location in the list:
b.insert(1, 'red')

In [40]:
b

['foo', 'red', 'peekaboo', 'baz', 'dwarf']

In [41]:
# Warning: 'insert' is computationally expensive compared with 'append', because
# references to subsequent elements have to be shifted internally to make room
# for the new element.

# If you need to insert elements at both the beginning and end of a sequence, 
# try exploring collections.deque, a double-ended queue

In [42]:
# The inverse operation to insert is pop, which removes and returns an element
# at a particular index:
b.pop(2)

'peekaboo'

In [44]:
# By default, 'pop' will remove the last element:
b.pop()

'dwarf'

In [45]:
b

['foo', 'red', 'baz']

In [46]:
# Elements can be removed by value with 'remove', which locates the first such 
# value and removes it from the list:
b.append('foo')
b

['foo', 'red', 'baz', 'foo']

In [47]:
b.remove('foo')

In [48]:
b

['red', 'baz', 'foo']

In [49]:
# You can check if a list contains a value using the 'in' keyword:
'dwarf' in b

False

In [50]:
# The keyword 'not' can be used for negation:
'dwarf' not in b

True

In [51]:
# Warning: checking whether a list contains a value is a lot slower than doing so
# with a dictionary or set, as Python makes a linear scan across the values of
# the list, whereas it can check the others (based on hash tables) in constant time

In [52]:
# Similar to tuples, adding two lists together with + concatenates them:
[4, None, 'foo'] + [7, 8, (2, 3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [53]:
# If you have a list already defined, you can append multiple elements to it using
# the 'extend' method:
x = [4, None, 'foo']
x.extend([7, 8, (2, 3)])
x

[4, None, 'foo', 7, 8, (2, 3)]

In [55]:
# Note that list concatentation by addition is a comparatively expensive operation since
# a new list must be created and the objects copied over. Using 'extend' to append elements
# to an existing list, especially if you are building up a large list, is usually preferred:
#
# # slower
# everything = []
# for chunk in list_of_lists:
#     everything = everything + chunk
# 
# # faster
# everything = []
# for chunk in list_of_lists:
#     everything.extend(chunk)

In [56]:
# You can sort a list in-place (without creating a new object) by calling its sort method:
a = [7, 2, 5, 1, 3]
a.sort()
a

[1, 2, 3, 5, 7]

In [57]:
# 'sort' has a few options that will occasionally come in handy. One is the ability to pass
# a secondary sort key (a function that produces a value to use to sort the objects):
b = ['saw', 'small', 'He', 'foxes', 'six']
b.sort(key=len)
b

['He', 'saw', 'six', 'small', 'foxes']

In [58]:
# The built-in bisect module implements binary search and insertion into a sorted list. 
# 'bisect.bisect' finds the location where an element should be inserted to keep it 
# sorted, while 'bisect.insort' actually inserts the element into that location:
import bisect
c = [1, 2, 2, 2, 3, 4, 7]
bisect.bisect(c, 2)

4

In [59]:
bisect.bisect(c, 5)

6

In [60]:
bisect.insort(c, 6)

In [61]:
c

[1, 2, 2, 2, 3, 4, 6, 7]

In [62]:
# You can select sections of most sequence types by using slice notation, which in its
# basic form consists of start:stop passed to the indexing operator:
seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[1:5]

[2, 3, 7, 5]

In [63]:
# Slices can also be assigned to with a sequence:
seq[3:4] = [6, 3]

In [64]:
seq

[7, 2, 3, 6, 3, 5, 6, 0, 1]

In [65]:
# While the element at the 'start' index is included, the 'stop' index is not included, so
# that the number of elements in the result is stop - start

In [66]:
# Either the start or stop index can be omitted, in which case they default to the start
# of the sequence and the end of the sequence, respectively:
print(seq[:5])
print(seq[3:])
print(seq[:])

[7, 2, 3, 6, 3]
[6, 3, 5, 6, 0, 1]
[7, 2, 3, 6, 3, 5, 6, 0, 1]


In [67]:
# Negative indices slice the sequence relative to the end:
print(seq[-4:])
print(seq[-6:-2])

[5, 6, 0, 1]
[6, 3, 5, 6]


In [68]:
# A step can also be used after a second colon:
seq[::2]

[7, 3, 3, 6, 1]

In [69]:
# A clever use of this is to pass -1, which has the effect of reversing a list/tuple:
seq[::-1]

[1, 0, 6, 5, 3, 6, 3, 2, 7]

### Sequence Functions

In [70]:
# It's common when iterating over a sequence to want to keep track of the index of the
# current item. Python has a built-in function enumerate() which returns a sequence of
# (i, value) tuples:
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i, v in enumerate(some_list):
    mapping[v] = i
mapping

{'foo': 0, 'bar': 1, 'baz': 2}

In [78]:
# The sorted() function returns a new sorted list from the elements of any sequence:
sorted([7, 1, 2, 6, 0, 3, 2], key=lambda x: -x)

[7, 6, 3, 2, 2, 1, 0]

In [79]:
# The 'zip' function pairs up the elements of a number of lists, tuples, or other
# sequences to create a list of tuples:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)

In [81]:
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [82]:
# A very common use of zip is simultaneously iterating over multiple sequences:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print(f'{i}: {a}, {b}')

0: foo, one
1: bar, two
2: baz, three


In [83]:
# Given a zipped sequence, zip can be applied to unzip the sequence:
pitchers = [('Nolan', 'Ryan'), ('Randy', 'Johnson'), ('Curt', 'Schilling')]
first_names, last_names = zip(*pitchers)

In [84]:
first_names

('Nolan', 'Randy', 'Curt')

In [85]:
last_names

('Ryan', 'Johnson', 'Schilling')

In [87]:
# 'reversed' iterates over the elements of a sequence in reverse order:
list(reversed([1, 2, 3, 4, 5]))

[5, 4, 3, 2, 1]

In [88]:
# Keep in mind that reversed is a generator, so it does not create the reversed sequence
# until materialized (with list() or a for loop)

### Dictionaries

In [91]:
# Dictionaries are likely the most important built-in Python data structure. A more common
# name for it is 'hash map' or 'associtative array'. It is a flexibly sized collection of
# key-value pairs, where key and value are Python objects. 
dict1 = {'a': 'some value', 'b': [1, 2, 3, 4]}

In [92]:
dict1

{'a': 'some value', 'b': [1, 2, 3, 4]}

In [93]:
# You can access, insert, or set elements using the same syntax for lists/tuples:
dict1[7] = 'an integer'

In [94]:
dict1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [96]:
dict1['b']

[1, 2, 3, 4]

In [97]:
# You can check if a dict contains a key using the same syntax used for checking
# whether a list/tuple contains a value:
'b' in dict1

True

In [109]:
# You can delete values either using the del keyword or the pop method (which simultaneously
# returns the value and deletes the key):
dict1[5] = 'some value'
dict1['dummy'] = 'another value'
dict1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 5: 'some value',
 'dummy': 'another value'}

In [110]:
del dict1[5]
dict1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 'dummy': 'another value'}

In [111]:
# For dicts, pop() has no default behavior (as it did with lists):
ret = dict1.pop('dummy')
dict1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [112]:
ret

'another value'

In [113]:
# You can access the keys and values with iterator methods:
list(dict1.keys())

['a', 'b', 7]

In [115]:
# While the key-value pairs are not in any particular order, these functions output 
# the keys and values in the same order
list(dict1.values())

['some value', [1, 2, 3, 4], 'an integer']

In [116]:
# You can merge one dict into another using the update method:
dict1.update({'b': 'foo', 'c': 12})

In [118]:
# The update method changes dicts in-place, so any existing keys in the data passed
# to update() will have their old values discarded
dict1

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

In [119]:
# It's common to end up with two sequences that you want to pair up element-wise in
# a dict:
#
# mapping = {}
# for key, value in zip(key_list, value_list):
#     mapping[key] = value

In [121]:
# Since a dict is essentially a collection of 2-tuples, the dict function accepts a list
# of 2-tuples:
# 
mapping = dict(zip(range(5), reversed(range(5))))

In [122]:
mapping

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

In [None]:
# It's also common to have logic like:
# 
# if key in some_dict:
#     value = some_dict[key]
# else:
#     value = default_value

In [123]:
# Thus, the dict methods get and pop can take a default value to be returned, so that
# the above if-else block can be written simply as:
# 
# value = some_dict.get(key, default_value)

In [124]:
# Sometimes the default value is another collection, like a list:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)

In [125]:
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

In [128]:
# The setdefault() method returns the value of the item with the specified key. If the
# the specified key does not exist, it'll return a default value:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0]
    by_letter.setdefault(letter, []).append(word)

In [129]:
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

In [130]:
# The built-in collections module has a useful class for defaultdict, which makes this
# even easier. To create one, you pass a type or function for generating the default
# value for each slot in the dict:
from collections import defaultdict
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)

In [131]:
by_letter

defaultdict(list, {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']})

### Sets

In [133]:
# A set is an unordered collection of unique elements. You can think of them as dicts,
# but with keys only (no values). A set can be created in two ways:
a = set([2, 2, 2, 1, 3, 3])
b = {1, 2, 3}

In [134]:
# Sets support mathematical set operations like union, intersection, difference, and 
# symmetric difference:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

In [135]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [137]:
a.intersection(b)

{3, 4, 5}

In [139]:
# Like dicts, set elements must be immutable (hashable). You can also check if a set is
# a subset (or superset) of another set:
{1, 2, 3}.issubset(a)

True

In [140]:
{1, 2, 3, 4, 5, 6}.issuperset(a)

True

In [141]:
# Sets are equal iff their contents are equal:
{1, 2, 3} == {3, 2, 1}

True

### Comprehensions

In [143]:
# List comprehensions allow you to concisely form a new list by filtering the elements of a
# collection, transforming the elements passing the filter, in one expression:
# 
# result = [expression for value in collection if condition]
# 
# # equivalent for loop:
# result = []
# for value in collection:
#     if condition:
#         result.append(expression)

In [144]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']
[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [146]:
# Set and dictionary comprehensions are natural extensions, producing sets and dicts in an
# idiomatically similar way instead of lists:
# 
# dict_comp = {key_expr: value_expr for value in collection if condition}
#
# set_comp = {expr for value in collection if condition}

In [147]:
# Suppose we have a list of lists containing some English and Spansih names:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

In [148]:
# Now suppose we wanted to get a single list of all names with two or more
# e's in them. We could do this with a for loop:
names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('e') >= 2]
    names_of_interest.extend(enough_es)

In [149]:
names_of_interest

['Steven']

In [150]:
# However, you can actually wrap this whole operation up in a single nested list
# comprehension. The for parts of the comprehension are arranged according to the
# order of nesting, and any filter condition is put at the end as before:
result = [name for names in all_data for name in names if name.count('e') >= 2]

In [151]:
result

['Steven']

### Functions

In [1]:
# Functions are the primary and most important method of code organization and reuse
# in Python. If you anticipate needing to repeat the same or very similar code more
# than once, if may be worth writing a function

In [2]:
# To declare a function in Python:
def my_function(x, y, z=1.5):
    if z > 1:
        return z * (x + y)
    else:
        return z / (x + y)

In [3]:
# Note that there is no issue with having multiple return statements. If Python reaches
# the end of a function without encountering a return statement, None is returned.

In [4]:
# Each function can have positional arguments and keyword arguments. Keyword arguments
# are used to specify default values or optional arguments. The main restriction here
# is that keyword arguments must follow positional arguments, but you can specify the 
# keyword arguments in any order.

In [5]:
# Functions can access variables in two different scopes: global and local. An alternative
# and more descriptive name describing a variable scope is a namespace. Any variables that 
# are assigned within a function by default are assigned to the local namespace. The local
# namespace is created when the function is called and immediately populated by the function's
# arguments. After the function is finished, the local namespace is destroyed. 

In [20]:
# Assigning variables outside of the function's scope is possible, but they must be declared
# as global:
def bind_a_variable():
    global a
    a = []
bind_a_variable()

In [21]:
a

[]

In [22]:
# You can return multiple values from a function:
def f():
    a = 5
    b = 6
    c = 7
    return a, b, c
a, b, c = f()

In [23]:
# What's happening here is that the function is actually returning one object, a tuple. When
# calling the function, you are unpacking the values into separate variables. 

In [24]:
# Since Python functions are objects, many constructs can be easily expressed that are difficult
# to do in other languages. Consider the following list:
states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda', 'south carolina##', 'West virginia?']

In [25]:
# Suppose we wish to clean these list items. One way to do this is to use built-in string
# methods along with the re standard library module for regular expressions. 

In [26]:
import re

def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip()
        value = re.sub('[!#?]', '', value)
        value = value.title()
        result.append(value)
    return result

In [27]:
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [None]:
# An alternative approach is to make a list of the operations you want to apply 
# to a particular set of strings:
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

# Make a list of the functions (treat as objects)
clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)

In [28]:
# A more functional pattern like this enables you to easily modify how the strings
# are transformed at a very high level. 

In [30]:
# Python also has support for so-called anonymous or lambda functions, which are a
# way of writing functions consisting of a single statement. 

# Using a regular function definition
def short_function(x):
    return x * 2

# Equivalent action with a lambda function
equiv_anon = lambda x: x * 2

In [31]:
# Lambda functions are especially useful in data analysis because there are many 
# cases where data transformations will take functions as arguments. It's often
# less typing to pass a lambda function as opposed to writing out a full function

In [32]:
def apply_to_list(some_list, f):
    return [f(x) for x in some_list]

In [33]:
ints = [4, 0, 1, 5, 6]
apply_to_list(ints, lambda x: x * 2)

[8, 0, 2, 10, 12]

In [34]:
# As another example, suppose you wanted to sort a collection of strings by the
# number of unique letters:
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']
strings.sort(key=lambda x: len(set(list(x))))
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

In [35]:
# Currying is computer science jargon that means deriving new functions from 
# existing ones by partial argument applications:

# Define a function that takes two inputs
def add_numbers(x, y):
    return x + y

# Curry up a new function that calls the previous function with partial inputs
add_five = lambda y: add_numbers(5, y)

In [37]:
add_five(7)

12

In [38]:
# The built-in functools module can simplify this process using the partial function:
from functools import partial
add_five = partial(add_numbers, 5)

In [39]:
add_five(7)

12

### Generators

In [40]:
# Having a consistent way to iterate over sequences is accomplished by means of the 
# iterator protocol, a generic way to make objects iterable.

In [41]:
some_dict = {'a': 1, 'b': 2, 'c': 3}
for key in some_dict:
    print(key)

a
b
c


In [42]:
# When you write the above, the Python interpreter first attempts to create an iterator
# out of some_dict:
dict_iterator = iter(some_dict)
dict_iterator

<dict_keyiterator at 0x25e9c4e0680>

In [43]:
# An iterator is any object that will yield objects to the interpreter when used in a
# context like a loop. Most methods expecting a list or list-like object will also 
# accept any iterable object:
list(dict_iterator)

['a', 'b', 'c']

In [45]:
# A generator is a concise way to construct a new iterable object. Whereas normal 
# functions execute and return a single result at a time, generators return a 
# sequence of multiple results lazily, pausing after each one until the next one
# is requested. To create a generator, use the 'yield' keyword instead of 'return'

def squares(n=10):
    print(f'Generating squares from 1 to {n ** 2}')
    for i in range(1, n + 1):
        yield i ** 2

In [50]:
gen = squares()

In [51]:
# It is not until you request elements from the generator that it begins executing
# the code:
for x in gen:
    print(x, end=' ', flush=False)

Generating squares from 1 to 100
1 4 9 16 25 36 49 64 81 100 

In [52]:
# Another even more concise way to make a generator is by using a generator expression.
# This is a generator analogue to list, dict, and set comprehensions:
gen = (x ** 2 for x in range(100))

In [53]:
gen

<generator object <genexpr> at 0x0000025E9C4D1970>

In [54]:
# This is completely equivalent to the following:
def _make_gen():
    for x in range(100):
        yield x ** 2
gen = _make_gen()

In [55]:
gen

<generator object _make_gen at 0x0000025E9C4D1040>

In [56]:
# Generator expressions can be used instead of list comprehensions as function
# arguments in many cases:
sum(x ** 2 for x in range(100))

328350

In [57]:
dict((i, i**2) for i in range(5))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

In [59]:
# The standard library itertools module has a collection of generators for many
# common data algorithms. For example, groupby takes any sequence and a function,
# grouping consecutive elements in the sequence by return value of the function:
import itertools

first_letter = lambda x: x[0]

names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']

for letter, names in itertools.groupby(names, first_letter):
    print(letter, list(names))

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


### Exception Handling

In [60]:
# Handling errors or exceptions gracefully is an important part of building robust
# programs. In data analysis, many functions only work on certain kinds of input.
# Suppose we wanted a version of float() that fails gracefully, returning the 
# input argument:
def attempt_float(x):
    try:
        return float(x)
    except:
        return x

In [61]:
# The code in the except part of the block will only be executed if float(x)
# raises an exception:
attempt_float('1.2345')

1.2345

In [62]:
attempt_float('not a number')

'not a number'

In [72]:
# You might want to only suppress ValueError, since a TypeError (the input was not
# a string or numeric value) might indicate a legitimate bug in your program:
def attempt_float(x):
    try:
        return float(x)
    except ValueError:
        return x

In [73]:
# You can catch multiple exception types by writing a tuple of exception types 
# instead (parentheses are required):
def attempt_float(x):
    try:
        return float(x)
    except (TypeError, ValueError):
        return x

In [74]:
# In some cases, you may not want to suppress an exception, but you want some 
# code to be executed regardless of whether the code in the try block succeeds:
f = open('some_file.txt', 'w')
try:
    f.write('Hello World!')
# Here the file handle f will always get closed
finally:
    f.close()

In [75]:
# Similarly, you can have code that executes only if the try block succeeds:
f = open('another_file.txt', 'w')
try:
    f.write('Hello World')
except:
    print('Failed')
else:
    print('Succeeded')
finally:
    f.close()

Succeeded


### Files and the Operating System

In [84]:
# To open a file for reading and writing, use the built-in open() function
# with either a relative or absolute path:

path = r'H:\Personal\Python_for_Data_Analysis\segismundo.txt'

# Modes: 'r' for read (default), 'w' for write, and 'a' for append
f = open(path)

In [85]:
# We can treat the file handle f like a list and iterate over the lines:
for line in f:
    pass

In [86]:
# The lines come out of the file with the EOL markers intact, so you'll
# often see code to get an EOL-free list of lines:
lines = [line.rstrip() for line in open(path)]

In [87]:
lines

['SueÃ±a el rico en su riqueza',
 'que mÃ¡s cuidados le ofrece',
 '',
 'sueÃ±a el pobre que padece',
 'su miseria y su pobreza',
 '',
 'sueÃ±a el que a medrar empieza',
 'sueÃ±a el que afana y pretende',
 'sueÃ±a el que agravia y ofende',
 '',
 'y en el mundo, en conclusiÃ³n',
 'todos sueÃ±an lo que son',
 'aunque ninguno lo entiende']

In [88]:
# When you use open to create file objects, it's important to explicitly close
# the file when you're finished with it. Closing the file releases its resources 
# back to the operating system:
f.close()

In [89]:
# One way to make it easier to clean up open files is to use the with statement,
# which will automatically close the file f when exiting the with block:
with open(path, encoding='utf8') as file_object:
    lines = [line.rstrip() for line in file_object]

In [90]:
lines

['SueÃ±a el rico en su riqueza',
 'que mÃ¡s cuidados le ofrece',
 '',
 'sueÃ±a el pobre que padece',
 'su miseria y su pobreza',
 '',
 'sueÃ±a el que a medrar empieza',
 'sueÃ±a el que afana y pretende',
 'sueÃ±a el que agravia y ofende',
 '',
 'y en el mundo, en conclusiÃ³n',
 'todos sueÃ±an lo que son',
 'aunque ninguno lo entiende']

In [91]:
# When using write mode, a new file would have been created, overwriting any
# one in its place. There is also the 'x' file mode, which creates a writable
# file but fails if the file path already exists. 

In [92]:
# For readable files, the most common methods are read, seek, and tell. 
# read returns a certain number of characters from the file. What constitutes
# a character is determined by the file's encoding:
file_object = open(path, encoding='utf8')
file_object.read(10)

'SueÃ±a el '

In [93]:
# The read method advances the file handle's position by the number of bytes
# read. tell gives you the current position:
file_object.tell()

10

In [94]:
# seek changes the file position to the indicated byte in the file:
file_object.seek(3)

3

In [95]:
file_object.read(1)

'Ã'

In [96]:
file_object.close()

In [97]:
# The default behavior for Python files is 'text mode', which means that you
# intend to work with Python strings (i.e., Unicode). This contrasts with 
# 'binary mode', which you can obtain by appending 'b' onto the file mode. 

In [101]:
with open(path, encoding='utf8') as file_object:
    chars = file_object.read(10)

In [102]:
chars

'Sueña el r'

In [103]:
# UTF-8 is a variable-length Unicode encoding, so when you request some number
# of characters from the file, Python reads enough bytes (which could be as few
# as 10 or as many as 40 bytes) from the file to decode that many characters. 

In [105]:
# If you open the file in 'rb' mode instead, read requests exact number of bytes:
with open(path, 'rb') as file_object:
    data = file_object.read(10)

In [106]:
data

b'Sue\xc3\xb1a el '

In [107]:
# Depending on the text encoding, you may be able to decode the bytes to a str 
# object, but only if each of the encoded Unicode characters is fully formed:
data.decode('utf8')

'Sueña el '

In [109]:
# Beware using seek when opening files in any mode other than binary. If the file
# position falls in the middle of the bytes defining a Unicode character, then
# subsequent reads will result in an error:
file_object = open(path, encoding='utf8')
file_object.read(5)
file_object.seek(4)
file_object.read(1)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1 in position 0: invalid start byte