# PCEP-30-02 3.2 – Collect and process data using tuples

In [None]:
# Tuples are created using parentheses () and can hold elements of different types:

# Empty tuple
empty_tuple = ()

# Tuple with elements
my_tuple = (1, 2, 3, 4, 5)

# Tuple with mixed data types
mixed_tuple = (10, "Python", 3.14, True)

# Tuple with a single element (must include a comma)
single_element_tuple = (5,)  # Without the comma, it would just be an integer

### Tuples: Indexing & Slicing

In [None]:
# Tuple indexing works the same way as in lists

my_tuple = (10, 20, 30, 40, 50, 60)

# Access
my_tuple[-1]

In [None]:
# Tuple slicing works the same way as in lists

print(my_tuple[1:4])   # (20, 30, 40)
print(my_tuple[:3])    # (10, 20, 30)
print(my_tuple[::2])   # (10, 30, 50)

In [None]:
# Tuples are Immutable

my_tuple[0] = 100         # <-- raises TypeError

In [None]:
t = 1, 2, 3
type(t)

In [None]:
t = (1)
u = (1,)
print(type(t))
print(type(u))

### Tuple: Operations

In [None]:
t = (1, 2, 3)
t += (4,)
print(t)


In [None]:
# Which operations are valid??

t = (1, 2, [1, 2, 3])
t.append(4)
t[0] = 3
t = t + (3,)
t = t[1].append(3)

In [None]:
3 + (1, 2, 3)                           # <-- TypeError: 3 needs to be a tuple: (3,)

In [92]:
(3,) + (1, 2, 3)                        # <-- works now

(3, 1, 2, 3)

In [None]:
d = {([1, 2], "X"): "Invalid"}  # TypeError: unhashable type: 'list'. Having non-hashable (immutable) elements in the tuple
                                #            prevents tuples from being used as dictionary keys              

#### NOTE**

In [None]:
t = (1, 2, [3, 4])  

u = t[2] + [5]                   # <-- we can index into an mutable element inside the tuple, perform addition and return the mutable element
u

In [None]:
t = (1, 2, [3, 4])

t[2] += [5]                      # <-- Using the '+=' operator will throw a TypeError


In [None]:
t                                # <-- even though t[2] += [5] threw a TypeError, the list gets modified before the error

In [None]:
t = (1, 2, [3, 4])

t[2] = t[2] + [5]                # <-- if we try to explicitly perform the addition (without '+=' operator) a TypeError is raised again

In [None]:
t                                # <-- but this time, the list does not get modified prior to the error

In [None]:
# Reasons

# - '+=' gets processed in two steps:

# - 1. '+=' modifies the mutable element in the tuple in place before the error ('+=' == .extend([5]))
# - 2. python then tries to assign the modified list to the tuple, at the tuple level, which causes the TypeError due to tuples being immutable


# - 'item assignment' (t[2] = t[2] + [5]) gets processed in two steps too:

# - 1. the operation gets read from right-to-left, and creates a brand new list
# - 2. python then tries to assign the new list to the tuple but fails

In [None]:
# The extend method modifies a list in place and is equivalent to '+='

lst = [1, 2, 3]
print(id(lst))                                    # <-- id: 140249923650624
lst.extend([4, 5])
print(id(lst))                                    # <-- id: 140249923650624                       
print(lst)                                        # <-- [1, 2, 3, 4, 5]

In [None]:
# The + operator creates a new list entirely

lst1 = [1, 2, 3]
lst2 = [4, 5]

print(id(lst1))                                    # <-- id: 139737393756224
print(id(lst2))                                    # <-- id: 139737393751552
lst1 += lst2                                 
print(id(lst1))                                    # <-- id: 139737393756224
print(id(lst2))                                    # <-- id: 139737393751552
print(lst1)  

### Tuple: Methods

In [None]:
# There are only two tuple methods. This is a result of their immutability
t = 1, 2, 3, 4, 5, 6, 7, 3, 3, 3
print(t.count(3))                             # <-- both can be assigned a variable if needed      
print(t.index(3))                             # <-- returns the index of the first occurence

### Tuple: Unpacking

In [None]:
t = (10, 20, 30)
a, b, c = t
print(a, b, c)  # 10 20 30

In [None]:
def unpack(a, b, c):
    return a + b + c

t = 1, 2, 3
unpack(*t)

In [None]:
# The number of variables must match for tuple packing & unpacking or else a ValueError occurs

a, b = (1, 2, 3)                             # <-- ValueError: too many values to unpack

In [1]:
# We can fix this by using 'extended unpacking'

a, *b = (1, 2, 3)

print(a)
print(b)

1
[2, 3]


In [2]:
a, b, *c = (1, 2, 3)

print(a)
print(b)
print(c)

1
2
[3]


### Tuples Inside Lists

In [None]:
data = [(1, "Alice"), (2, "Bob"), (3, "Charlie")]
print(data[1])  # (2, "Bob")
print(data[1][1])  # "Bob"


### Lists Inside Tuples

In [None]:
tuple_with_list = (1, 2, [3, 4, 5])
tuple_with_list[2].append(6)                                  # <-- Allowed
print(tuple_with_list)                                        # <--  (1, 2, [3, 4, 5, 6])

In [3]:
users = [
    (101, "Alice", "alice@example.com"),
    (102, "Bob", "bob@example.com"),
    (103, "Charlie", "charlie@example.com")
]

# Accessing a tuple
print(users[1])  # Output: (102, "Bob", "bob@example.com")

# Accessing an element inside the tuple
print(users[1][1])  # Output: "Bob"


(102, 'Bob', 'bob@example.com')
Bob


In [4]:
# Adding a new user (a new tuple)
users.append((104, "David", "david@example.com"))
print(users)

# Removing a tuple from the list
users.remove((102, "Bob", "bob@example.com"))
print(users)


[(101, 'Alice', 'alice@example.com'), (102, 'Bob', 'bob@example.com'), (103, 'Charlie', 'charlie@example.com'), (104, 'David', 'david@example.com')]
[(101, 'Alice', 'alice@example.com'), (103, 'Charlie', 'charlie@example.com'), (104, 'David', 'david@example.com')]


### Sorting a List of Tuples: .sort()

In [None]:
# Python sorts lexicographically with both .sort() and sorted()
# Strings: compared alphabetically, then if one has more elements than the other
# Lists: compared element-wise, then if one has more elements than the other
# Tuples: compared element-wise, then if one has more elements than the other


In [None]:
# - .sort() is a stable sorting algorithm that uses 'Tim Sort' underneath
# - This means that elements are left in the same relative order as before they were sorted

# - Ex: stable: only elements that need to be moved are moved
# - [(1, "apple"), (2, "banana"), (1, "cherry"), (2, "date")] <-- input
# - [(1, "apple"), (1, "cherry"), (2, "banana"), (2, "date")] <-- output

# - Ex: unstable: some elements that do not need to be moved might move
# - [(1, "apple"), (1, "cherry"), (2, "banana"), (2, "date")] <-- input
# - [(1, "cherry"), (1, "apple"), (2, "banana"), (2, "date")] <-- output


In [None]:
# Since Python compares lexicographically, and the first elements of the tuples are integers, .sort() works just fine
data = [(3, ["Charlie"]), (1, ["Alice"]), (2, ["Bob"])]
data.sort()                                                   # <-- .sort() only returns None, if assigned to variable will get None
data

In [5]:
# However, what if we take the integers out of the equation?? Sorting still works since Python can directly compare the types
# within the lists (strings)
data = [(1, ["Charlie"]), (1, ["Alice"]), (1, ["Bob"])]
data.sort()
data

[(1, ['Alice']), (1, ['Bob']), (1, ['Charlie'])]

In [None]:
# What about when Python cannot directly compare the types inside the lists?? A type error will occur.
# This is because strings and integers cannot be comapared with '<= > >= < == !='
data = [(1, ["Charlie"]), (1, [1, 2]), (1, ["Bob"])]
data.sort()
data

In [None]:
# The solution to this problem is using a sorting key to extract a comparable type
data = [(1, ["Charlie"]), (1, [1, 2]), (1, ["Bob"])]

data.sort(key=lambda x: x[0])                               # <-- remember we must use the keyword argument or will get type error (key=)
print(data)                                                 # <-- is sorted by the first key only

In [None]:
data = [(1, [("Charlie")]), (1, [("Alice")]), (1, [("Bob")])]   # <-- remember, singleton tuples  must have a ,
data.sort()  
data

In [None]:
data = [(1, [("Charlie",)]), (1, [("Alice",)]), (1, [("Bob",)])]   # <-- remember, singleton tuples  must have a ,
data.sort()                                                        # <-- these sort just fine too
data

In [None]:
# Will this throw a TypeError? Why or why not?
data = [(2, [[None], "X"]), (1, [["Alice"], 42]), (3, [["Bob"], 0])]
data.sort()
print(data)

# A TypeError does not occur because Python can decide which elements are greater or less than before ever getting to the incomparable types
# This is because there are three main tuples that contain integers as their first elements

In [None]:
data = [(1, "Alice"), (1, "Bob"), (1, ["Charlie"])]
data.sort()                                          # <-- TypeError: "Bob" and ["Charlie"] cannot be compared

In [None]:
data = [(3, ["Charlie"]), (1, ["Alice"]), (2, ["Bob"])]
data = data.sort()
print(data)

### Tuple Sorting: sorted()

In [6]:
t = (3, 1, 2)

sorted_t = sorted(t)  # <-- Returns a new list: [1, 2, 3], and not a sorted tuple!!
print(sorted_t)

# We can turn it back into a tuple tho
t = tuple(sorted(t))
print(t)

[1, 2, 3]
(1, 2, 3)


In [None]:
# When sorting tuples the data types have to be comparable

t = (1, "apple", 3, "banana")
sorted(t)                         # <-- TypeError: '<' not supported between instances of 'str' and 'int'


In [89]:
t = ((1, [2, 3]), (1, [4, 5]), (1, [0, 1]))

sorted(t)                         # <-- Works, because lists inside tuples are comparable


[(1, [0, 1]), (1, [2, 3]), (1, [4, 5])]

In [103]:
t = ((1, [2, 3]), (1, [None, 5]), (1, ["X", 1]))

sorted_t = sorted(t, key=lambda x: (x[0], str(x[1])))  
print(sorted_t)


[(1, ['X', 1]), (1, [2, 3]), (1, [None, 5])]


In [127]:
# What is the output??
t = ((3, "banana"), (1, "apple"), (2, "banana"), (1, "cherry"))
print(sorted(t, key=lambda x: (x[1], x[0])))                     # <-- 'x' represents all elements in the tuple
                                                                 # <-- the 'key' says to sort by strings in each element first & then integers
                                                                 # <-- in other words, first sort by strings, and if there is a tie, sort by the int

[(1, 'apple'), (2, 'banana'), (3, 'banana'), (1, 'cherry')]


In [None]:
# How to write this out on an exam (on paper)

# 1. Figure out the sorting keys: x[1], x[0] (str, int), then reverse order of tuples if necessary
#          ("banana", 3)
#          ("apple", 1)
#          ("banana" 2)
#          ("cherry", 1)

# 2. Sort by the primary key: x[1] (str), breaking ties with x[0] (int)
#         ("apple", 1)
#         ("banana", 2)
#         ("banana", 3)
#         ("cherry", 1)

# 3. Reverse the order:
#         (1, "apple")
#         (2, "banana")
#         (3, "banana")
#         (1, "cherry")

# 4. Write left-to-right going from top to bottom:
#  [(1, "apple"), (2, "banana"), (3, "banana"), (1, "cherry")]

In [97]:
print((1, "apple") < (3, "banana"))                                     # <-- 1 < 3 : True : stop comparing
print((1, "apple") < (2, "banana"))                                     # <-- 1 < 2 : True : stop comparing
print((1, "apple") < (1, "cherry"))                                     # <-- 1 == 1 : len("apple") < len("cherry") True : stop comparing
print((3, "banana") > (2, "banana"))                                    # <-- 3 > 2 : True : stop comparing
print((3, "banana") > (1, "cherry"))                                    # <-- 3 > 1 : True : stop comparing
print((1, "apple") < (1, "cherry") < (2, "banana") < (3, "banana"))     # <-- True not sorting by a key

True
True
True
True
True
True


In [128]:
# How Python sorts internally with both .sort() & sorted()
t = ((3, "banana"), (1, "apple"), (2, "banana"), (1, "cherry"))
print(sorted(t, key=lambda x: (x[1], x[0]))) 

print((1, "apple")[1] < (3, "banana")[1])
print((2, "banana")[0] < (3, "banana")[0])
print((3, "banana")[1] < (1, "cherry")[1])
print((1, "apple")[1] < (2, "banana")[1])
print((1, "apple")[1] < (1, "cherry")[1])
print((2, "banana")[1] < (1, "cherry")[1])

True
True
True
True
True
True


### Python Memory Management & Tuples

In [None]:
# Reference counting is the core of Python's memory management
# It keeps track of how many references exist for an object
# When a reference count = 0, Python automatically deletes the object
# This is why one doesn't have to automatically free memory like in 'C'

In [129]:
import sys

x = [1, 2, 3]  # Create a list
print(sys.getrefcount(x))                  # <-- 2 (one reference in `x`, one temporary reference for getrefcount)

y = x  # Another reference
print(sys.getrefcount(x))                  # <-- 3 (Now `y` and `x` both reference the list)

del y  # Remove reference
print(sys.getrefcount(x))                  # <-- 2 (Back to one variable holding it)


2
3
2


#### Circular References

In [148]:
# Garbage collection is needed when reference counting fails. Python's garbage collector detects circular references
# and cleans them up eventually. 

import sys

class A:
    def __init__(self):
        self.ref = self  # 🚨 Circular reference!

a = A()


print(sys.getrefcount(a)) 

del a                                   # <-- Reference count never reaches 0 because `self.ref` points to itself!


3


### Tuple -vs- List Memory Usage

In [176]:
import sys
import collections

tpl = (1, 2, 3)
lst = [1, 2, 3]
seht = set({1, 2, 3})
frozen_seht = frozenset({1, 2, 3})
dic = {1: 1, 2: 2, 3: 3}
d_dic = collections.defaultdict(int, {})


print(sys.getsizeof(tpl))                        # <-- Larger (includes list overhead)
print(sys.getsizeof(lst))                        # <-- Smaller (fixed size, immutable)
print(sys.getsizeof(seht))
print(sys.getsizeof(frozen_seht))
print(sys.getsizeof(dic))
print(sys.getsizeof(d_dic))

64
88
216
216
232
72


In [177]:
d_dic[1]

0

#### Tuple Caching (interning)

In [150]:
# Tuples are sometimes stored in the same memory location, while lists are always separate
# Here: 'a' is 'b' is False, but sometimes true depending on the Python version

a = (1, 2, 3)
b = (1, 2, 3)
print(a is b)  # <-- True somtimes (Same object due to interning), but here: False

x = [1, 2, 3]
y = [1, 2, 3]
print(x is y)  # <-- False (Lists are separate objects)


False
False


In [153]:
# We can force tuple interning by nesting the tuples inside a function
# Python interns inside the function for reuse optimization

def test():
    a = (1, 2, 3)
    b = (1, 2, 3)
    print(a is b)  # <-- More likely to be True in a function

test()


True


In [151]:
a = [1, 2, 3]
b = a
del a
print(b)  

c = (4, 5, 6)
d = c
del c
print(d)


[1, 2, 3]
(4, 5, 6)


In [152]:
# There are two reference counts to 'z' in this
import sys
z = (10, 20, 30)
print(sys.getrefcount(z))


2


In [179]:
import sys

x = [1, 2, 3]
y = x
z = y

print(sys.getrefcount(x))  # (A) ?

del y
print(sys.getrefcount(x))  # (B) ?

del z
print(sys.getrefcount(x))  # (C) ?


4
3
2
