### Variables are memory references
---

**Python Memory Manager** gets data from the **heap**
```python
v = 10
```
If ```10``` goes to the **heap**, then ```v``` is a  **reference** to a memory slot

In [1]:
v = 10
print(hex(id(v)))

0x7ff8df77a2b0


In [2]:
greeting = "Hello"
print(greeting)

Hello


In [3]:
print(hex(id(greeting)))

0x2b6657c4d70


### Reference count
---

In [4]:
import sys
import ctypes

In [5]:
a = [1, 2, 3]
print(hex(id(a)))

0x2b6657bd7c8


In [6]:
sys.getrefcount(a)

2

In [7]:
def ref_count(address: int):
  return ctypes.c_long.from_address(address).value

In [8]:
ref_count(id(a))

1

In [9]:
b = a
ref_count(id(a))

2

In [10]:
c = a
ref_count(id(a))

3

In [11]:
c = 3
b = 4
ref_count(id(a))

1

In [12]:
a = None
ref_count(id(a))

26994

In [13]:
for i in range(10):
  print(ref_count(id(a)))

26983
26985
26987
26989
26981
26983
26985
26987
26989
26991


### Garbage collection
---

Can be controlled programmatically using ```gc```

Mainly there to clean up **circular references**

In [14]:
# Circular reference

import gc

In [15]:
def object_by_id(object_id):
  for obj in gc.get_objects():
    if id(obj) == object_id:
      return "Objects exist"
  return "Not found"

In [16]:
class A:
  def __init__(self):
    self.b = B(self)  # Self is instance of A
    print(f"A: self={hex(id(self))}, b={hex(id(self.b))}")

In [17]:
class B:
  def __init__(self, a):
    self.a = a
    print(f"B: self={hex(id(self))}, a={hex(id(self.a))}")

In [18]:
gc.disable()

In [19]:
my_var = A()

B: self=0x2b6656d4408, a=0x2b6656d4988
A: self=0x2b6656d4988, b=0x2b6656d4408


In [20]:
print(hex(id(my_var)))
print(hex(id(my_var.b)))
print(hex(id(my_var.b.a)))

0x2b6656d4988
0x2b6656d4408
0x2b6656d4988


In [21]:
a_id = id(my_var)
b_id = id(my_var.b)

In [22]:
print(ref_count(a_id))
print(ref_count(b_id))
print(object_by_id(a_id))
print(object_by_id(b_id))

2
1
Objects exist
Objects exist


In [23]:
my_var = None
print(ref_count(a_id))
print(ref_count(b_id))
print(object_by_id(a_id))
print(object_by_id(b_id))

1
1
Objects exist
Objects exist


In [24]:
gc.collect()
print(ref_count(a_id))
print(ref_count(b_id))
print(object_by_id(a_id))
print(object_by_id(b_id))

0
0
Not found
Not found


### Variable re-assigment
---

In [1]:
# Immutable
a = 5
print(hex(id(a)))

0x7ff8df77a210


In [2]:
a += 10
print(hex(id(a)))

0x7ff8df77a350


In [3]:
a = 5
b = 5
print(hex(id(a)))
print(hex(id(b)))

0x7ff8df77a210
0x7ff8df77a210


### Object Mutability
---

**Mutable** - An object whose internal state *can* be changed, e.g. ```list()```, ```dict()```, sets, User-Defined Classes

**Immutable** - An object whose internal state *can't* be changed, e.g. ```int()```, ```str()```, tuples, frozen sets, User-Defined Classes...

Note: Tuples are *immutable*, but the elements can be *mutable*

In [4]:
# Mutable
my_list = [1, 2, 3]
my_list2 = [1, 2, 3]
print(hex(id(my_list)))
print(hex(id(my_list2)))

0x1fc21102c48
0x1fc21102988


In [5]:
my_list.append(4)
print(hex(id(my_list)))
print(hex(id(my_list2)))

0x1fc21102c48
0x1fc21102988


In [6]:
my_list2 = my_list
print(hex(id(my_list)))
print(hex(id(my_list2)))

0x1fc21102c48
0x1fc21102c48


In [7]:
my_list = my_list + [4]
print(hex(id(my_list)))
print(hex(id(my_list2)))

0x1fc21102948
0x1fc21102c48


In [9]:
print(my_list)
print(my_list2)

[1, 2, 3, 4, 4]
[1, 2, 3, 4]


In [10]:
my_list.append(4)
print(hex(id(my_list)))
print(hex(id(my_list2)))

0x1fc21102948
0x1fc21102c48


In [11]:
print(my_list)
print(my_list2)

[1, 2, 3, 4, 4, 4]
[1, 2, 3, 4]


In [12]:
# Mutable and Immutable
t = ([1,2], [3,4])
print(hex(id(t)))
print(hex(id(t[0])))
print(hex(id(t[1])))

0x1fc21007348
0x1fc21012c08
0x1fc21012d88


In [13]:
t[0].append(5)
print(hex(id(t)))
print(hex(id(t[0])))
print(hex(id(t[1])))

0x1fc21007348
0x1fc21012c08
0x1fc21012d88


In [34]:
# Immutable are "safe" from side-effects

def process_string(s):
  print(f"s = {hex(id(s))}")
  s = s + ' world'
  print(f"s = {hex(id(s))}")
  print()
  return s

my_string = 'Hello'
print(f"my_string = {my_string}")
print()
process_string(my_string)
print(f"my_string = {my_string}")
print()

my_string = Hello

s = 0x1fc21131030
s = 0x1fc22eef630

my_string = Hello


In [37]:
# Mutable are "not safe" from side-effects

def modify_list(a):
  print(f"a = {hex(id(a))}")
  a.append('Hmmm')   # Change the state of the object
  
  print(f"a = {hex(id(a))}")
  a = a + [3]        # Doesn't change the state of the object
  
  print(f"a = {hex(id(a))}")
  print()
  return a

my_list = [5]
print(f"my_list = {my_list}")
print()
mod_list = modify_list(my_list)
print(f"my_list = {my_list}")
print()

print(f"mod_list = {mod_list}")
print()

my_list = [5]

a = 0x1fc22edfa88
a = 0x1fc22edfa88
a = 0x1fc22e4b048

my_list = [5, 'Hmmm']

mod_list = [5, 'Hmmm', 3]



### Shared reference
---

In [47]:
# Immutable

a = 10
b = a
print(hex(id(a)))
print(hex(id(b)))

0x7ff8df77a2b0
0x7ff8df77a2b0


In [48]:
# Python Memory Manager set up a shared reference automatically
# This is safe as the objects are immutable
a = 5
b = 5
print(hex(id(a)))
print(hex(id(b)))

0x7ff8df77a210
0x7ff8df77a210


In [52]:
# However, it does not always happen
a = 500
b = 500
print(hex(id(a)))
print(hex(id(b)))

0x1fc22ef44b0
0x1fc22ef4770


In [49]:
# Mutable

a = [1, 2, 3]
b = a
print(hex(id(a)))
print(hex(id(b)))

0x1fc22ef6808
0x1fc22ef6808


In [50]:
b.append(100)
print(a)
print(b)
print(hex(id(a)))
print(hex(id(b)))

[1, 2, 3, 100]
[1, 2, 3, 100]
0x1fc22ef6808
0x1fc22ef6808


In [51]:
# Python Memory Manager does not do share referencing with mutable objects
a = [1, 2, 3]
b = [1, 2, 3]
print(hex(id(a)))
print(hex(id(b)))

0x1fc22ef8448
0x1fc22ef84c8


### Variable equality
---

Two fundamental ways:

> Memory Address

```python
var_1 is var_2
``` 
```python
var_1 is not var_2
```

> Object State (data)

```python
var_1 == var_2
``` 
```python
var_1 != var_2
```

In [77]:
# Immutable
a = 10
b = a

In [78]:
a is b

True

In [79]:
a == b

True

In [80]:
# Immutable
a = 10
b = 10

In [81]:
a is b  # But don't count on it

True

In [82]:
a == b

True

In [83]:
# Immutable
c = 10
d = 10.0

In [71]:
c is d

False

In [72]:
c == d

True

In [74]:
# Mutable
e = [1, 2, 3]
f = [1, 2, 3]

In [75]:
e is f

False

In [76]:
e == f

True

In [85]:
# Python Memory Manager will always use a shared reference for None
a = None
b = None
c = None
print(hex(id(a)))
print(hex(id(b)))
print(hex(id(c)))

0x7ff8df705ce0
0x7ff8df705ce0
0x7ff8df705ce0


In [86]:
a is None

True

In [87]:
a == None

True

### Everything is an object (instance of classes)
---

A **function** is a instance of the class ```function```

A **type** is a instance of the class ```type```...

This means they all hace a memory address!

Consequences:
> An object can be **assigned** to a variable, including a function

> An object can be **passed** to a function, including a function

> An object can be **returned** from a function, including a function


In [101]:
a = 10
print(type(a))

<class 'int'>


In [102]:
b = int(10)
print(type(b))

<class 'int'>


In [103]:
help(int)

Help on class int in module builtins:

class int(object)
 |  int([x]) -> integer
 |  int(x, base=10) -> integer
 |  
 |  Convert a number or string to an integer, or return 0 if no arguments
 |  are given.  If x is a number, return x.__int__().  For floating point
 |  numbers, this truncates towards zero.
 |  
 |  If x is not a number or if base is given, then x must be a string,
 |  bytes, or bytearray instance representing an integer literal in the
 |  given base.  The literal can be preceded by '+' or '-' and be surrounded
 |  by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
 |  Base 0 means to interpret the base from the string as an integer literal.
 |  >>> int('0b100', base=0)
 |  4
 |  
 |  Methods defined here:
 |  
 |  __abs__(self, /)
 |      abs(self)
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __bool__(self, /)
 |      self != 0
 |  
 |  __ceil__(...)
 |      Ceiling of

In [104]:
c = int('101', base=2)
c

5

In [105]:
def square(a):
  return a ** 2

def cube(a):
  return a ** 3

In [106]:
def select_function(fun_id):
  if fun_id == 2:
    return square
  elif fun_id == 3:
    return cube
  else:
    return None  

In [107]:
f = select_function(2)

In [108]:
f(4)

16

In [109]:
select_function(3)(4)

64

### Python Optimizations: Interning
---

> ```Cpython``` - Python implementation written in C

> ```Jython``` - Written in Java

> ```PyPy``` - written in RPython...

**Interning** - reusing objects on demand (shared referencing)

At startup, Python (CPython) pre-loads (caches) a global list of integers in the range [-5, 256]

Any time an integer is referece in that ranfe, Python will use the cached verison of that object, *singleton*.

In [121]:
# Singleton
a = 5
b = int('5')
print(hex(id(a)))
print(hex(id(b)))
a is b

0x7ff8df77a210
0x7ff8df77a210


True

In [119]:
# Not a singleton
a = 257
b = 257
print(hex(id(a)))
print(hex(id(b)))
a is b

0x1fc22f1bc50
0x1fc22f1bc30


False

### Python Optimizations: String interning
---

```python
a is b  # Just check the memory slot
```

is faster than

```python
a == b   # Have to check every letter
```
Overall don't need to worry about string interning

In [125]:
a = 'hello'
b = 'hello'
print(hex(id(a)))
print(hex(id(b)))
a is b

0x1fc22f155f0
0x1fc22f155f0


True

In [126]:
# Long strings don't get interned automatically
a = 'hello world'
b = 'hello world'
print(hex(id(a)))
print(hex(id(b)))
a is b

0x1fc22edfab0
0x1fc22edfcf0


False

In [127]:
# Identifiers get interned automatically
a = '_this_is_a_long_string_that_could_be_a_identifier'
b = '_this_is_a_long_string_that_could_be_a_identifier'
print(hex(id(a)))
print(hex(id(b)))
a is b

0x1fc22f02d50
0x1fc22f02d50


True

In [129]:
# Interned manually - when you are doing lots of string comparisons
import sys

a = sys.intern('hello world')
b = sys.intern('hello world')
print(hex(id(a)))
print(hex(id(b)))
a is b

0x1fc22f12830
0x1fc22f12830


True

In [169]:
def compare_using_equals(n):
  a = 'a long string that is not interned' * 200
  b = 'a long string that is not interned' * 200
  for i in range(n):
    if a == b:
      pass

def compare_using_interning(n):
  a = sys.intern('a long string that is not interned' * 200)
  b = sys.intern('a long string that is not interned' * 200)
  for i in range(n):
    if a is b:
      pass

In [171]:
import time

start = time.perf_counter()
compare_using_equals(10000000)
end = time.perf_counter()
print(f"Equality (==): {round(end-start,5)} seconds")

start = time.perf_counter()
compare_using_interning(10000000)
end = time.perf_counter()
print(f"Address (is):  {round(end-start,5)} seconds")

Equality (==): 2.42844 seconds
Address (is):  0.23545 seconds


### Python Optimizations: Peephole
---

In [162]:
# Constant expressions
def my_func():
  a = 24 * 60
  b = 'abc' * 3
  c = 'abc' * 30
  d = 'abc' * 300000
  e = 'The quick brown fox ' * 5
  f = ['a', 'b'] * 5
  g = ('a', 'b') * 5
  
my_func.__code__.co_consts  

(None,
 1440,
 'abcabcabc',
 'abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc',
 'abc',
 300000,
 'The quick brown fox The quick brown fox The quick brown fox The quick brown fox The quick brown fox ',
 'a',
 'b',
 5,
 ('a', 'b', 'a', 'b', 'a', 'b', 'a', 'b', 'a', 'b'))

In [165]:
# Membership test
def my_func2():
  if e in [1, 2, 3]:
    pass

my_func2.__code__.co_consts

(None, (1, 2, 3))

In [166]:
def my_func3():
  if e in {1, 2, 3}:
    pass

my_func3.__code__.co_consts

(None, frozenset({1, 2, 3}))

In [183]:
# Benchmark for membership test

import string

char_list = list(string.ascii_letters)
char_tuple = tuple(string.ascii_letters)
char_set = set(string.ascii_letters)

In [184]:
def membership_test(n, container):
  for i in range(n):
    if 'z' in container:
      pass

In [186]:
start = time.perf_counter()
membership_test(10000000, char_list)
end = time.perf_counter()
print(f"List:  {round(end-start,5)} seconds")
 
start = time.perf_counter()
membership_test(10000000, char_tuple)
end = time.perf_counter()
print(f"Tuple: {round(end-start,5)} seconds")

start = time.perf_counter()
membership_test(10000000, char_set)
end = time.perf_counter()
print(f"Set:   {round(end-start,5)} seconds")

List:  2.99004 seconds
Tuple: 2.90516 seconds
Set:   0.29798 seconds
