# Set ADT

- An ADT used to store items

    -  An item is called a member (or an element) of a set

    <br>


- Items in a set are **unordered**

    - But access to them should be fast!
    - And **NO duplicates** are allowed!

    <br>

- Items can be added or removed

- Two sets can be compared, merged, subtracted, etc.


<br>

<img src="images/set.png" width="600px"> <img src="images/venn.png" width="500px">

<br><hr>


## Base Abstract Class: Set class

In [2]:
from abc import ABC, abstractmethod
from typing import TypeVar, Generic


T = TypeVar('T')

class Set(ABC, Generic[T]):

    def __init__(self) -> None:
        self.clear()


    @abstractmethod
    def __len__(self) -> int:
        pass
    

    @abstractmethod
    def is_empty(self) -> bool:
        pass


    @abstractmethod
    def is_full(self) -> bool:
        pass


    @abstractmethod
    def clear(self) -> None:
        pass


    @abstractmethod
    def  __contains__(self, item: T) -> bool:#  magic method for "in" keyword
        pass


    @abstractmethod
    def add(self, item: T) -> None:
        pass


    @abstractmethod
    def remove(self, item: T) -> None:
        pass    


# Should give an error as abstract class cannot be instantiated
test = Set()

TypeError: Can't instantiate abstract class Set with abstract methods __contains__, __len__, add, clear, is_empty, is_full, remove

<br><hr>

## Inheriting Base Abstract Class: ArraySet class

In [3]:
from __future__ import annotations
from referential_array import ArrayR


class ArraySet(Set[T]):
    MIN_CAPACITY = 1

    def __init__(self, capacity: int) -> None:
        self.size = 0
        self.array = ArrayR(max(ArraySet.MIN_CAPACITY, capacity)) # to create self.array, Time Complexity = O(n) or O(capacity)
        super().__init__()


    def __len__(self) -> int:
        return self.size
    

    def clear(self) -> None:
        self.size = 0
    

    def is_empty(self) -> bool:
        return len(self) == 0
    

    def is_full(self) -> bool:
        return len(self) == len(self.array)
    

    def __contains__(self, item: T) -> bool:
        for i in range(self.size):
            if item == self.array[i]: # comp? (Computation when items are comparing)
                return True
        return False
    

    def add(self, item: T) -> None:
        # item "in" uses __contains__ which is O(n * comp?)
        if item not in self:
            if self.is_full():
                raise Exception("Set is full")
            
            self.array[self.size] = item
            self.size += 1

    
    def remove(self, item: T) -> None:        
        # finding the item's idx, then swapping it & size-=1
        # the item is not really removed, but its out of range, as self.size is the only way to traverse through the Set

        # for/else loop
        for i in range(self.size):
            if item == self.array[i]: # comp? (Computation when items are comparing)
                self.array[i] = self.array[self.size-1]
                self.size -= 1
                break

        # if the loop is NOT terminated by a break statement
        # else block below will be executed
        else:
            # raise an error that this item doesn't exist
            raise KeyError(item)
    

    def union(self, other: ArraySet[T]) -> ArraySet[T]:
        # res is an ArraySet (allocating a large enough array)
        res = ArraySet(len(self.array) + len(other.array))
        """
        O(n + m) 
        or 
        O(len(self) + len(other))
        """
        
        # ADD BOTH self & other to res-set using add func
        # add func will make sure there are no duplicated
        for i in range(len(self)):
            res.array[i] = self.array[i]
        res.size = self.size
        """
        O(n)  *  O((n+m) * comp?)
        or
        O(len(self)) *  O(add)
        """

        for i in range(len(other)):
            res.add(other.array[i])
        """
        O(m)  * O((n+m) * comp?)
        or 
        O(len(other)) *  O(add)
        """

        # Overall:
        """
        O(n+m) + O(n * (n+m)*comp?) + O(m * (n+m)*comp?) 
        res    + ADD loop           + ADD loop
        
        Big-O = O((n+m) * ((n+m)*comp?))
        """
        return res
    

    def intersection(self, other: ArraySet[T]) -> ArraySet[T]:
        # res is an ArraySet (allocating a large enough array)
        res = ArraySet(min(len(self.array), len(other.array)))
        """
        O(min(n, m))
        or
        O( min(len(self), len(other)) )
        """

        # COPY self items into res-array
        for i in range(len(self)):
            res.array[i] = self.array[i]
        """
        O(n)
        or
        O(len(self))
        """
        
        # ADD the mathicing items into the res-set (bounded by res.size)
        for i in range(len(other)):
            if other.array[i] in res.array:
                res.add(other.array[i])
        """
        O(m) * ( O((n+m)*comp?) + O((n+m)*comp?) )
        or
        O(len(other)) * ( O(in comparison) + O(add) )
        """

        # Overall:
        """
        O(min(n, m)) + O(n) + O(m) * ( O((n+m)*comp?) + O((n+m)*comp?) )
        res          + COPY + ADD loop
        
        Big-O = O(m * (n+m)*comp?)
        """
        return res
    

    def difference(self, other: ArraySet[T]) -> ArraySet[T]:
        # res is an ArraySet (allocating a large enough array)
        res = ArraySet(min(len(self.array), len(other.array)))
        """
        O(min(n,m))
        or
        O( min(len(self), len(other)) )
        """

        # ADD self items into res-set (bouned by res.size)
        for i in range(len(self)):
            res.add(self.array[i])
        """
        O(m)  * O((min(n,m) * comp?))
        or 
        O(len(self)) *  O(add)
        """

        # REMOVE res items that are present in other-set
        for i in range(len(res)):
            if res.array[i] in other.array:
                res.remove(res.array[i])
        """
        O( min(n, m)) * ( O(min(n, m)*comp?) + O(min(n, m)*comp?) )
        or
        O( len(res)) * ( O(in comparison) + O(remove) )
        """

        # Overall:
        """
        O(min(n, m)) + O((min(n,m) * comp?)) + O( min(n, m)) * ( O(min(n, m)*comp?) + O(min(n, m)*comp?) )
        res          + ADD loop              + REMOVE loop
        
        Big-O = O(min(n, m) * min(n, m))*comp?)
        """
        return res



    

"""
Big-O of the implemented functions:

• __init__:     O(n)
• __len__:      O(1)
• clear:        O(1)
• is_empty:     O(1)
• is_full:      O(1)
• __contains__: O(n * comp?)
• add:          O(n * comp?)
• remove:       O(n * comp?)
• union:        O((n+m) * (n+m)*comp?)              <-CHECK with tutor
• intersection: O(m * (n+m)*comp?)                  <-CHECK with tutor
• difference:   O(min(n, m) * min(n, m)*comp?)      <-CHECK with tutor

"""

test = ArraySet(5)

<br><hr>

## Using the completed ArraySet (Set ADT)

In [4]:
def display(arrSet, name):
    set = []
    for i in range(len(arrSet)):
        set.append(arrSet.array[i])
    print('\n'+name+":\t" + str(set))

    

x = ArraySet(3)
x.add(1)
x.add(2)
x.add(3)

y = ArraySet(3)
y.add(2)
y.add(3)
y.add(4)

union_x_y = x.union(y)
intersection_x_y = x.intersection(y)
difference_x_y = x.difference(y)

display(x, "x")
display(y, "y")
display(union_x_y, "x ∪ y")
display(intersection_x_y, "x ∩ y")
display(difference_x_y, "x \ y")


x:	[1, 2, 3]

y:	[2, 3, 4]

x ∪ y:	[1, 2, 3, 4]

x ∩ y:	[2, 3]

x \ y:	[1]


<br><hr>

## Inheriting Base Abstract Class: BitVectorSet class

<img src="images/bit1.png" width="650px"> <img src="images/bit2.png" width="600px">

In [None]:
# PYTHON BITWISE OPERATORS:


a = 5    # binary representation: 0101
b = 3    # binary representation: 0011
result = a ^ b   # binary representation: 0110 (6 in decimal)
result

In [None]:
class BitVectorSet(Set[T]):

    def __init__(self, dummy_capacity: int = 1) -> None:
        self.elms = 0
        super().__init__()

    
    def clear(self) -> None:
        self.elms = 0


    def is_empty(self) -> bool:
        return self.elms == 0
    

    def is_full(self) -> bool:
        return False    # arbitary sized ints cannot become full
    

    def __contains__(self, item: int) -> bool:
        if not isinstance(item, int) or item <= 0:
            raise TypeError('Set elements should be positive integers')
        return bool((self.elms >> (item - 1)) & 1)
    

    def add(self, item: int) -> None:
        if not isinstance(item, int) or item <= 0:
            raise TypeError('Set elements should be positive integers')
        self.elms |= 1 << (item - 1)


    def remove(self, item: T) -> None:
        if not isinstance(item, int) or item <= 0:
            raise TypeError('Set elements should be integers')
        if item in self:
            self.elems ^= 1 << (item - 1)
        else:
            raise KeyError(item)
    

    def __len__(self) -> int:
        # __len__ will have to do COUNTING
        # bin(37) = 0b100101    (37).bit_length() = 6
        # loop running from 1 to 6
        size = 0
        for item in range(1, self.elms.bit_length() + 1):
            if item in self:
                size += 1
        return size
    """
    Big-O is NOT O(n), where n = number of elems in set
    
    __len__ func iterates bit_length times
    So,
        Big-O = O(|elms|) or O(# of bits that are '1')

    Ex: A set contains "1 item", which is "5"
         |   bit4    |   bit3    |   bit2    |   bit1    |   bit0    |
    self:|     1     |     0     |     0     |     0     |     0     |
    elms:------^

    LOOP runs 4 times to get the length
    ALTHOUGH, the set size is just '1', 
    BUT the 'elms' or # of bits that are '1' not '0' in the BitVectorSet is located in the 4th bit of BitVectorSet (self)

    The __len__ func counts how many elms/'1's there are in set, which gives the size of the Set.

    TIP: look at the bit-vector slides/pics above to understand the code
    """


    def union(self, other: BitVectorSet[int]) -> BitVectorSet[int]:
        res = BitVectorSet()
        res.elms = self.elms | other.elms
        return res
    
    def __str__(self):
        """ Construct a nice string representation. """
        bit_elems = self.elems
        out_elems = []
        current = 0
        while bit_elems:
            if bit_elems & (1 << current):
                out_elems.append(str(current+1))
                bit_elems &= ~(1 << current)
            current += 1
        return '{' + ', '.join(out_elems) + '}'
    

    def intersection(self, other: BitVectorSet[int]) -> BitVectorSet[int]:
        """ Creates a new set equal to the intersection with another one,
        i.e. the result set should contain the elements that are both in 
        self *and* other.
        """
        res = BitVectorSet()
        res.elms = self.elms & other.elms
        return res

    def difference(self, other: BitVectorSet[int]) -> BitVectorSet[int]:
        """ Creates a new set equal to the difference with another one,
        i.e. the result set should contain the elements of self that 
        *are not* in other.
        """
        res = BitVectorSet()
        res.elms = self.elms & ~other.elms
        return res
    

