# Dynamic Array Implementation

In [54]:
from typing import Any, Optional
from collections.abc import Iterable
import ctypes
from math import log2, ceil
from numpy import iterable


class DynamicArray(object):
    def __init__(self):
        super().__init__()
        self.capacity = 1
        self.n_elements = 0
        self.array = self.make_array(self.capacity)

    def _check_index_type(self, idx: Any) -> None:
        if type(idx) is not int:
            raise IndexError("Array indices should be of integer type")

    def _check_index_in_limits(self, idx: int) -> None:
        if idx >= self.n_elements or idx < 0:
            raise IndexError(
                f"Trying to access element {idx} of an array with {self.n_elements} elements"
            )

    def _check_array_size(self, size: int) -> None:
        if (type(size) is not int) or size <= 0:
            raise ValueError("Array sizes should be positive integers")

    def make_array(self, size: int):
        self._check_array_size(size)

        return (size * ctypes.py_object)()

    def __getitem__(self, idx: int):
        self._check_index_type(idx)
        self._check_index_in_limits(idx)

        return self.array[idx]

    def __setitem__(self, idx: int, value: Any):
        self._check_index_type(idx)
        self._check_index_in_limits(idx)

        self.array[idx] = value

    def resize(self, new_capacity: int):
        self._check_array_size(new_capacity)

        if new_capacity < self.capacity:
            raise ValueError("Trying to shrink the array!")

        new_array = self.make_array(new_capacity)

        for i in range(self.n_elements):
            new_array[i] = self.array[i]

        self.array = new_array
        self.capacity = new_capacity

    def append(self, value: Any):

        if self.capacity <= self.n_elements + 1:
            self.resize(self.capacity * 2)

        self.array[self.n_elements] = value
        self.n_elements += 1

    def extend(self, values: Iterable):
        if not isinstance(values, Iterable):
            raise ValueError("Provide an iterable")

        new_element_total = self.n_elements + len(values)
        if new_element_total >= self.capacity:

            # The new size is the smallest power of 2 that can hold all the elements of the new array
            new_size = 2 ** ceil(log2(new_element_total))
            self.resize(new_size)

        for i in range(len(values)):
            self.array[self.n_elements + i] = values[i]

        self.n_elements = new_element_total

    def pop(self, idx: Optional[int] = None):

        if idx == None:
            idx = self.n_elements - 1

        self._check_index_type(idx)
        self._check_index_in_limits(idx)

        return_value = None

        if idx == self.n_elements - 1:
            self.n_elements -= 1
            return_value = self.array[self.n_elements]
        else:
            return_value = self.array[idx]
            self.n_elements -= 1

            for i in range(idx, self.n_elements - 1):
                self.array[i] = self.array[i + 1]

        # Shrink the array if n_elements < array_capacity / 4
        if self.n_elements < self.capacity / 4:
            new_capacity = self.capacity // 2
            new_array = self.make_array(new_capacity)

            for i in range(self.n_elements):
                new_array[i] = self.array[i]

            self.array = new_array
            self.capacity = new_capacity

        return return_value

    def insert(self, idx: int, value: Any):
        if type(idx) is not int:
            raise IndexError("Array indices should be integers")
        if idx < 0 or idx > self.n_elements:
            raise IndexError(
                f"Cannot insert at idx {idx} in an array of size {self.n_elements}"
            )

        if self.capacity <= self.n_elements + 1:
            self.resize(self.capacity * 2)

        self.n_elements += 1

        for i in range(self.n_elements - 1, idx, -1):

            self.array[i] = self.array[i - 1]

        self.array[idx] = value

    def __repr__(self) -> str:
        string_repr = "["

        for i in range(self.n_elements):
            string_repr += repr(self.array[i])
            if i != self.n_elements - 1:
                string_repr += ","

        string_repr += "]"
        return string_repr


In [62]:
x = DynamicArray()

# Create a new array and add some data
x.append(1234)
x.extend([9, 8, 7, 6, 12])
x.insert(1, 55555)

# Print the array
print(x)

# Remove some points from the array
print("Popping", x.pop(3))
print(x)

# Assign & Get operations
x[1] = "casa"
print("Value:", x[1])


[1234,55555,9,8,7,6,12]
Popping 8
[1234,55555,9,7,6,6]
Value: casa


### What is the time complexity of appending one element to a Dynamic Array?

**IF WE DOUBLE THE SIZE OF THE ARRAY EVERY TIME WE RUN OUT OF MEMORY:**

If we start with an array that only holds one element and append n elements.

$T(n) = n + \sum_{k=1}^{\lfloor log_2{n} \rfloor}(2^k) < n + 2n = 3n ~ O(n)$

When we perform n insertions, it takes roughly 3n times, so on average the time complexity is $O(1)$ (ammortized time complexity)

**Keep in mind that other algorithms, such as adding constant extra capacity to the array may have different time complexities.**

Adding a constant number of free slots every time yields a $O(n)$ ammortized time complexity for the append operation.

In this section, we explore Python’s various “sequence” classes, namely the builtin list, tuple, and str classes. There is significant commonality between these classes, most notably: each supports indexing to access an individual element of a sequence, using a syntax such as sequence[k], and each uses a low-level concept known
as an array to represent the sequence. 

## Low Level Arrays

The primary memory of a computer is composed of bits of information, and those bits are typically grouped into larger units that depend upon the precise system architecture. Such a typical unit is a byte, which is equivalent to 8 bits.

A computer system will have a huge number of bytes of memory, and to keep track of what information is stored in what byte, the computer uses an abstraction known as a memory address. In effect, each byte of memory is associated with a unique number that serves as its address

A group of related variables can be stored one after another in a contiguous
portion of the computer’s memory. We will denote such a representation as an
array.

<center>
<figure align = ="center">
<img  src=images/memory.png style="width:30%">
<figcaption align = "center"> Example of a character array. The numbers at the top indicate the physical memory address, while the numbers at the bottom are the array indices. </figcaption>
</figure>
</center>

The example above is an array of eight characters. Each of the locations within that array is called a cell, which is identifiead with an integer index in the array. To figure out the memory adress of a given cell, we can use the following computation: $address=start+cellsize\cdot index$. **Each cell of the array must use the same number of bytes**

However if you have used Python for a while, you may have noticed that we can store arrays with elements of various sizes (even with different data types):

```python
my_list = [1234,'geology', "today is a great day!", 3.12312]
```

This is because Python does not use a low level array to represent the information in the list. It uses a dynamic referential array!

## Referential Arrays

Python represents a list or tuple instance using an internal storage
mechanism of an array of object references. At the lowest level, what is stored
is a consecutive sequence of memory addresses at which the elements of the sequence reside

<center>
<figure align = ="center">
<img  src=images/referential_array_1.png style="width:30%">
<figcaption align = "center"> Each cell of a referential array holds a pointer to each of the elements of the list, this allows for arrays that combine different data types of different sizes (even non constant sizes) </figcaption>
</figure>
</center>


The fact that lists and tuples are referential structures is significant to the semantics of these classes. A single list instance may include multiple references
to the same object as elements of the list, and it is possible for a single object to
be an element of two or more lists, as those lists simply store references back to
that object. As an example, when you compute a slice of a list, the result is a new
list instance, but that new list has references to the same elements that are in the
original list.

<center>
<figure align = ="center">
<img  src=images/referential_array_2.png style="width:30%">
<figcaption align = "center"> Two arrays can reference the samee elements </figcaption>
</figure>
</center>

The elements of two arrays can point to the exact same elements in memory. This can be a problem when working with **mutable objects** (integers, bools, tuples, frozensets, floats and strings are inmutable objects in Python), since a change to the 0th element of the temp array will also affect the 3rd element of the primes array


## Compact Arrays in Python

They are implemented as dynamic arrays in the `array` module

In [47]:
import array

# We must specify the data type that we want to store.
primes = array.array("i", [2, 3, 5, 7, 11, 13, 17, 19])


## Python Lists

### Python uses dynamic referential arrays to store data in the List class

The CPython implementation of lists uses an array of pointers to store the elements of the list. : [Ìmplementation of PyListObject](https://github.com/python/cpython/blob/5c22476c01622f11b7745ee693f8b296a9d6a761/Include/listobject.h#L22)

```
typedef struct {
    PyObject_VAR_HEAD
    /* Vector of pointers to list elements.  list[0] is ob_item[0], etc. */
    PyObject **ob_item;

    /* ob_item contains space for 'allocated' elements.  The number
     * currently in use is ob_size.
     * Invariants:
     *     0 <= ob_size <= allocated
     *     len(list) == ob_size
     *     ob_item == NULL implies ob_size == allocated == 0
     * list.sort() temporarily sets allocated to -1 to detect mutations.
     *
     * Items must normally not be NULL, except during construction when
     * the list is not yet visible outside the function that builds it.
     */
    Py_ssize_t allocated;
} PyListObject;
```

The code below shows how the size of the list changes when appending new elements, proving that it is indeed a dynamic array. Yet, a careful examination of the intermediate array capacities suggests that Python is not using a pure geometric progression, nor is it using an arithmetic progression

In [11]:
import sys

data = []

for i in range(25):
    n_elements = len(data)
    size_in_bytes = sys.getsizeof(data)

    print(f"Number of elements: {n_elements:<4} - Size in bytes {size_in_bytes}")
    data.append(i)


Number of elements: 0    - Size in bytes 56
Number of elements: 1    - Size in bytes 88
Number of elements: 2    - Size in bytes 88
Number of elements: 3    - Size in bytes 88
Number of elements: 4    - Size in bytes 88
Number of elements: 5    - Size in bytes 120
Number of elements: 6    - Size in bytes 120
Number of elements: 7    - Size in bytes 120
Number of elements: 8    - Size in bytes 120
Number of elements: 9    - Size in bytes 184
Number of elements: 10   - Size in bytes 184
Number of elements: 11   - Size in bytes 184
Number of elements: 12   - Size in bytes 184
Number of elements: 13   - Size in bytes 184
Number of elements: 14   - Size in bytes 184
Number of elements: 15   - Size in bytes 184
Number of elements: 16   - Size in bytes 184
Number of elements: 17   - Size in bytes 248
Number of elements: 18   - Size in bytes 248
Number of elements: 19   - Size in bytes 248
Number of elements: 20   - Size in bytes 248
Number of elements: 21   - Size in bytes 248
Number of eleme

### Basic operations
- Get value at position k: `a[k]`. Time complexity ~ $O(1)$
- Assign value at position k: `a[k]=value`. Time complexity ~ $O(1)$
- Get the number of elements in the list: `len(a)`. Time complexity ~ $O(1)$

### Slicing and the slice function

**Slicing notation**. CAUTION! Remember that the **time complexity of slicing is $O(k)$** where k is the size of the slice. Don't ruin a $O(log(n))$ algorithm because of slicing!
```python
    a[start:stop]  # items start through stop-1
    a[start:]      # items start through the rest of the array
    a[:stop]       # items from the beginning through stop-1
    a[:]           # a copy of the whole array
    a[start:stop:step] # start through not past stop, by step


    a[-1]    # last item in the array
    a[-2:]   # last two items in the array
    a[:-2]   # everything except the last two items

    a[::-1]    # all items in the array, reversed
    a[1::-1]   # the first two items, reversed
    a[:-3:-1]  # the last two items, reversed
    a[-3::-1]  # everything except the last two items, reversed
```


The **slice(start, stop, step)** function returns a slice object that is used to slice any sequence (string, tuple, list, range, or bytes). Equivalence: `a[slice(start, stop, step)] = a[start:stop:step]`
```python
    text = 'Python Programing'
    text2 = 'Data structures!'

    # get slice object to slice the strings
    sliced_text = slice(0, 6, 1)

    print(text[sliced_text]) # Output: Python
    print(text2[sliced_text]) # Output Data s

```

### Methods available for the list built in class

The following methods are available:
- **copy**: Create a shallow copy of the list. Time complexity ~ $O(n)$.
```python
    import copy

    # Lists inside a list
    groups = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

    # Let's take a shallow copy of groups
    new_groups = groups.copy()

    new_groups[0][0] = 100000

    # Since we have performed a shallow copy, the value of the original list is altered.
    print(new_groups[0]) # [100000, 2, 3]
    print(groups[0]) # [100000, 2, 3]

    # To get two truly independent variables, use deepcopy
    new_groups_2 = copy.deepcopy(groups)
    new_groups_2[0[0] = 999
    
    print(new_groups_2[0]) # [999, 2, 3]
    print(groups[0]) # [100000, 2, 3]

```
-  **append**: Add a **single** element to the end of the list. Time complexity (ammortized) ~ $O(1) $
    ```python
        vegetables = ['Tomato', 'Brocoli', 'Corn']
        vegetables.append(['Lettuce'])
        print(vegetables) # ['Tomato', 'Brocoli', 'Corn', 'Lettuce']
    ```
-  **extend**: Add **all the elements in an iterable** to the end of the list. Time complexity (ammortized) ~ $O(k)$, where k is the length of the sequence to be added.
    ```python
        color = ['Red', 'Pink', 'Orange']
        color2 = ['Blue', 'Yellow']
        color.extend(color2)
        print(color) # ['Red', 'Pink', 'Orange', 'Blue', 'Yellow']
    ```
-  **insert**: Insert an element in the list at the given index. Time complexity ~ $O(n)$
    ```python
        name = ['Rita', 'Pinky', 'Babita']
        name.insert(1, 'Mini')
        print(name) # ['Rita', 'Mini', 'Pinky', 'Babita']
    ```
-  **index**: Returns the position at the **first** occurrence of the specified value. Time complexity ~ $O(n)$ since in the worst case the whole array should be iterated.
    ```python
        flower = ['Flora', 'Hana', 'Rose', 'Hana', 'Hana']
        name = flower.index('Hana')
        print(name) # 1
    ```
-  **pop**: By default it removes the last element and returns its value. The time complexity (ammortized) is $O(1)$, since Python will need to shrink the array from time to time . If the position of the element to be removed is provided, the time complexity increases to $O(n)$.
    ```python
        # create a list of prime numbers
        prime_numbers = [2, 3, 5, 7]

        # remove the element at index 2
        removed_element = prime_numbers.pop(2)

        print(removed_element) # 5
    ```
-  **remove**: The remove(value) method removes the first matching element (which is passed as an argument) from the list. Time complexity ~ $O(n)$.
    ```python
        # create a list
        numbers = [2, 3, 5, 7, 9, 11, 9]

        # removes the first 9 from the list
        numbers.remove(9)
        print(numbers) # [2, 3, 5, 7, 11, 9]
    ```
-  **sort**: Sort the elements of the list. Time comlexity ~ $O(k \cdot n log(n))$ where k is the time complexity of the comparaison between the elements of the list. For strings k ~ $O(L)$ where L is the length of the longest string in the list, if the value of k is bounded, then we can drop the term as it is considered a constant.
- **clear**: Removes all the items from the list. Time complexity ~ $O(1)$ (for algorithms analysis) or $O(n)$ (real life). If memory management is taken into account (realistic), the code iterates over all elements of the list and decreases their reference count, but since this depends on the implementation of the operation, GC related stuff etc.
    ```python
        color = ['Red', 'Pink', 'Orange']
        print(color.clear()) # None
    ```
-  **count**: Count the number of instances of a particular element in the list
    ```python
        place = ['Delhi', 'Bangalore', 'kolkata', 'Delhi']
        value = place.count('Delhi')
        print(value) #2
    ```


### Reversing lists: reversed vs reverse [::-1]

- **reversed**: **Returns an iterator** ready to traverse the list in order. Time comlexity ~ $O(1)$, since it only returns an iterator and does nothing to the list. **Use in list comprehensions or in for loops**. The resulting iterator can be converted into a list using the list() function.
    ```python
        my_list = [123, 44, 99, 11, 99]
        my_new_list = [my_function(x) for x in reversed(my_list)]

        for element in my_list:
            print(2*element) # prints 198, 22, 198, ...
    ```
- **list.reverse()** Reverses the list, but does not return any value. Time complexity ~ $O(n)$.
    ```python
        my_list = [123, 44, 99, 11, 99]
        print(my_list.reverse()) # prints nothing
        print(my_list) # [99, 11, 99, 44, 123]
    ```
- **list[::-1]** Returns a **new list** containing the same elements but in the reverse order. **The original list is left unchanged**. Time complexity ~ $O(n)$
    ```python
        my_list = [123, 44, 99, 11, 99]
        my_new_list = my_list[::-1]
        print(my_new_list) # [99, 11, 99, 44, 123]
        print(my_list is my_new_list) # False
    ```

### Sort vs Sorted
- **sorted(iterable, key, reverse=False)**: The sorted() method sorts the given sequence as well as set and dictionary(which is not a sequence) either in ascending order or in descending order(does unicode comparison for string char by char) and always returns the a sorted list. This method doesn’t effect the original sequence. The key function is used to generate the values used for sortng.

    ```python
        L = ['aaaa', 'bbb', 'cc', 'd']
        
        # sorted without key parameter
        print(sorted(L)) # ['aaaa', 'bbb', 'cc', 'd']
        print()
        
        # sorted with key parameter
        print(sorted(L, key = len)) # ['d', 'cc', 'bbb', 'aaaa']
    ```
- **List_name.sort(key, reverse=False))**: The sort() function is very similar to sorted() but unlike sorted **it returns nothing and makes changes to the original sequence**. Moreover, sort() is a method of list class and can only be used with lists.
    ```python
        numbers = [1, 3, 4, 2] 
        
        # Sort in descending order
        numbers.sort(reverse = True) 
        print(numbers) # [4, 3, 2, 1]
    ```