# Chapter 3 Data Structure and Sequence


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls "/content/drive/Othercomputers/My MacBook Pro/python_for_data_analysis/examples"

array_ex.txt	ex3.txt  example.json		     spx.csv	    tseries.csv
csv_mindex.csv	ex4.csv  fdic_failed_bank_list.html  stinkbug.png   volume.csv
ex1.csv		ex5.csv  ipython_bug.py		     stock_px.csv   yahoo_price.pkl
ex1.xlsx	ex6.csv  macrodata.csv		     test_file.csv  yahoo_volume.pkl
ex2.csv		ex7.csv  segismundo.txt		     tips.csv


## 3.1 Data Structures and Sequences

### Tuple

In [None]:
tup = tuple(['foo', [1, 2], True])

In [None]:
# once a tuple is created, it's not possible to modify which object is stored in each slot
tup[2] = False

TypeError: 'tuple' object does not support item assignment

In [None]:
# But if an element in a tuple is mutable, we can modify the element
tup[1].append(3)
print(tup)

In [None]:
tup = (4, 5, 6)
a, b, c = tup
print(a)

4


### List

In [None]:
b_list = ['foo', 'bar', 'baz']

In [None]:
b_list.append('peekaboo')
print(b_list)

['foo', 'bar', 'baz', 'peekaboo']


In [None]:
b_list.insert(1, 'apple')
print(b_list)

['foo', 'apple', 'bar', 'baz', 'peekaboo']


In [None]:
b_element = b_list.pop()
print(b_element)

b_element = b_list.pop(2)
print(b_element)

peekaboo
bar


In [None]:
# concatenating and combining lists
# list concatenation by addition is a comparatively expensive operation
# as a new list is created after concatenation
x = [1, 2, 3]
x = x + [4, 5, 6]
print(x)

[1, 2, 3, 4, 5, 6]


In [None]:
# extend is less expensive compared with addition
x.extend([7, 8, 9])
print(x)

[1, 2, 3, 4, 5, 6, 7, 8, 9]


In [None]:
# the number after the second colon denote the step of the sequence
print(x[::2])

[1, 3, 5, 7, 9]


In [None]:
print(x[::-1])
print(x[-1::-1])

[9, 8, 7, 6, 5, 4, 3, 2, 1]
[9, 8, 7, 6, 5, 4, 3, 2, 1]


### Dictionary

In [None]:
d1 = {'a': 'some value', 'b': [1, 2, 3, 4]}
d1[7] = 'an integer'
print(d1)

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}


In [None]:
'b' in d1

True

In [None]:
d1["dummy"] = "another value"
print(d1)

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer', 'dummy': 'another value'}


In [None]:
ret = d1.pop("dummy")
print(ret)
print(d1)

another value
{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}


In [None]:
del d1['a']
print(d1)

{'b': [1, 2, 3, 4], 7: 'an integer'}


In [None]:
print(list(d1.keys()))
print(list(d1.values()))

['b', 7]
[[1, 2, 3, 4], 'an integer']


In [None]:
print(list(d1.items()))
for key, value in d1.items():
  print(key)
  print(value)

[('b', [1, 2, 3, 4]), (7, 'an integer')]
b
[1, 2, 3, 4]
7
an integer


In [None]:
tuples = zip(range(0,5), reversed(range(0,5)))
print(tuples)

<zip object at 0x7e11083ff180>


In [None]:
d2 = dict(tuples)
print(d2)

# zip object can only be used once
print(list(tuples))

d2 = dict(zip(range(0,5), reversed(range(0,5))))
print(d2)

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}
[]
{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}


In [None]:
# the values of a dict can be any Python object
# but the keys of a dict can only be immutable objects
# to check if an object is immutable or not, we can use hash function
print(hash('string'))
print(hash(100))
print(hash((1, 2, (2, 3))))

# list is not an immutable object, or we can say list is unhashable
print(hash([1, 2, 3]))

115755772960839466
100
-9209053662355515447


TypeError: unhashable type: 'list'

### Set

In [None]:
set([1,2,2,3,3,4,4,4,4,4])

{1, 2, 3, 4}

In [None]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}
a = a.union(b)
print(a)


{1, 2, 3, 4, 5, 6, 7, 8}


In [None]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}
a.update(b)
print(a)

{1, 2, 3, 4, 5, 6, 7, 8}


In [None]:
# all elements in a set must be immutable/hashable
# that means we cannot store a list in a set
# in order to store a list-like object, we should convert it to a tuple first
a = [1, 2, 3]
b = {tuple(a)}
print(b)

{(1, 2, 3)}


In [None]:
c = [4,5,6]
b.add(tuple(c))
print(b)

{(1, 2, 3), (4, 5, 6)}


In [None]:
a_set = {1, 2, 3, 4, 5}
{1, 2, 3}.issubset(a_set)

True

In [None]:
{1,2,3} == {3,2,1}

True

### Built-In Sequence Functions

#### enumerate

In [None]:
list_a = ['foo', 'bar', 'baz']
mapping = {}
for i, v in enumerate(list_a):
  mapping[v] = i
print(mapping)

{'foo': 0, 'bar': 1, 'baz': 2}


In [None]:
for i, v in enumerate(mapping):
  print(i)
  print(v)

0
foo
1
bar
2
baz


In [None]:
a = "horse race"
b = list(a)
print(b)
b.sort()
print(b)


['h', 'o', 'r', 's', 'e', ' ', 'r', 'a', 'c', 'e']
[' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']


### zip

In [None]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
print(list(zipped))

# the zip object can only be used once
print(dict(zipped))

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]
{}


In [None]:
# zip can take an arbitrary number of sequences, and the number
# of elements it produces is determined by the shortest sequence
seq3 = [True, False]
print(list(zip(seq1, seq2, seq3)))

[('foo', 'one', True), ('bar', 'two', False)]


In [None]:
for index, values in enumerate(zip(seq1, seq2)):
  print(values)

('foo', 'one')
('bar', 'two')
('baz', 'three')


### Reversed


In [None]:
seq4 = list(range(10, 0, -1))
print(seq4)

# reversed is a generator, so it does not create
# the reversed sequence until materrialized
reversed_seq4 = reversed(seq4)
print(reversed_seq4)
print(list(reversed_seq4))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
<list_reverseiterator object at 0x7b69366324a0>
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


## List, Set and Dictionary Comprehensions

In [None]:
strings = ["a", "as", "bat", "car", "dove", "python"]
seq = [x.title() for x in strings if len(x) > 2]
print(seq)

['Bat', 'Car', 'Dove', 'Python']


In [None]:
unique_lengths = {len(x) for x in strings}
print(unique_lengths)

{1, 2, 3, 4, 6}


In [None]:
loc_mapping = {value: index for index, value in enumerate(strings)}
print(loc_mapping)

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}


### Nested list comprehensions

In [None]:
# assume we want to get a single list containing all names with two or more a's in them
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

# solution 1
results = []
for names in all_data:
    sub_list = [name for name in names if name.count('a') >= 2]
    results.extend(sub_list)

print(results)

['Maria', 'Natalia']


In [None]:
# solution 2
results = [name for names in all_data for name in names if name.count('a') >= 2]
print(results)

['Maria', 'Natalia']


## 3.2 Functions

### Lambda Functions

In [None]:
# sort a list based on the number of unique characters in each element
strings = ["foo", "card", "bar", "aaaa"]
strings.sort(key=lambda x: len(set(x)))
print(strings)

['aaaa', 'foo', 'bar', 'card']


In [None]:
import itertools

itertools.permutations(strings, 3)
for _ in itertools.permutations(strings, 3):
    print(_)

print("####")

for _ in itertools.combinations(strings, 2):
    print(_)

('aaaa', 'foo', 'bar')
('aaaa', 'foo', 'card')
('aaaa', 'bar', 'foo')
('aaaa', 'bar', 'card')
('aaaa', 'card', 'foo')
('aaaa', 'card', 'bar')
('foo', 'aaaa', 'bar')
('foo', 'aaaa', 'card')
('foo', 'bar', 'aaaa')
('foo', 'bar', 'card')
('foo', 'card', 'aaaa')
('foo', 'card', 'bar')
('bar', 'aaaa', 'foo')
('bar', 'aaaa', 'card')
('bar', 'foo', 'aaaa')
('bar', 'foo', 'card')
('bar', 'card', 'aaaa')
('bar', 'card', 'foo')
('card', 'aaaa', 'foo')
('card', 'aaaa', 'bar')
('card', 'foo', 'aaaa')
('card', 'foo', 'bar')
('card', 'bar', 'aaaa')
('card', 'bar', 'foo')
####
('aaaa', 'foo')
('aaaa', 'bar')
('aaaa', 'card')
('foo', 'bar')
('foo', 'card')
('bar', 'card')


### Errors and Exception Handling

In [None]:
def attempt_float(x):
    try:
        return float(x)
    except:
        print(f"{x} cannot be converted to a float number.")
        return x


In [None]:
a = "1.234"
b = "Tuesday"

print(attempt_float(a))
print(attempt_float(b))

1.234
Tuesday cannot be converted to a float number.
Tuesday


## 3.3 Files and the Operating System

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls "/content/drive/Othercomputers/My MacBook Pro/python_for_data_analysis/examples"

array_ex.txt	ex3.txt  example.json		     spx.csv	    tseries.csv
csv_mindex.csv	ex4.csv  fdic_failed_bank_list.html  stinkbug.png   volume.csv
ex1.csv		ex5.csv  ipython_bug.py		     stock_px.csv   yahoo_price.pkl
ex1.xlsx	ex6.csv  macrodata.csv		     test_file.csv  yahoo_volume.pkl
ex2.csv		ex7.csv  segismundo.txt		     tips.csv


In [None]:
path = "/content/drive/Othercomputers/My MacBook Pro/python_for_data_analysis/examples/segismundo.txt"
f = open(path, encoding="utf-8")

In [None]:
for line in f:
    print(line)

Sueña el rico en su riqueza,

que más cuidados le ofrece;



sueña el pobre que padece

su miseria y su pobreza;



sueña el que a medrar empieza,

sueña el que afana y pretende,

sueña el que agravia y ofende,



y en el mundo, en conclusión,

todos sueñan lo que son,

aunque ninguno lo entiende.



