1. Quick Example

In [None]:
s = set([2,3,5])
print(3 in s)          # prints True
print(4 in s)          # prints False
for x in range(7):
    if (x not in s):
        print(x)       # prints 0 1 4 6

## Creating Sets

2.1 Creating an empty set

In [None]:
s = set()
print(s)     # prints set()

In [None]:
hash("hello")

2.2 Creating a set from a list

In [None]:
s = set(["cat", "cow", "dog"])
print(s)     # prints {'dog', 'cow', 'cat'}

2.3 Create a set from any iterable object

In [None]:
s = set("wahoo")
print(s)     # surprised?

2.4 Create a statically-allocated set

In [None]:
s = { 2, 3, 5 }
print(s)    # prints { 2, 3, 5 }

Caution: { } is not an empty set!

In [None]:
s = { }
print(type(s) == set)  # False!
print(type(s))         # This is a dict (we'll learn about those soon)

## Properties of Sets

3.1 Sets are Unordered

In [None]:
s = set([2,4,8])
print(s)          # prints {8, 2, 4} in standard Python 
for element in s: # prints 8, 2, 4
    print(element)

3.2 Elements are Unique

In [1]:
s = set([2,2,2])
print(s)          # what will this print?
print(len(s))     # what will this print?

{2}
1


3.3 Elements Must Be Immutable

In [None]:
a = ["lists", "are", "mutable"]
s = set([a])       # TypeError: unhashable type: 'list'
print(s)

Another example:

In [None]:
s1 = set(["sets", "are", "mutable", "too"])
s2 = set([s1])     # TypeError: unhashable type: 'set'
print(s)

## Sets are Very Efficient
Sets are much more efficient than lists. It's possible to check whether an element appears in a set in constant time, while it takes more time to find an element in a list based on how large the list is. This is accomplished with an approach called hashing.

A hash function takes a value as input and returns an integer. The function should return the same integer each time its called on a given value, and should generally return different integers for different values, though that does not always need to be the case. We actually don't need to build the hash function ourselves; python has one already, a built-in function called hash.

The computer stores items in a set by creating a list of some length n, then choosing indexes in the list where each element in the set can be stored. These indexes are calculated as hash(element) % n. Then, when we need to check whether an item exists in a set, we don't need to check every possible index; we only need to check the one computed by our formula!

A practical example of how sets are faster than lists is shown below:

In [2]:
# 0. Preliminaries
import time
n = 10000

# 1. Create a list [2,4,6,...,n] then check for membership
# among [1,2,3,...,n] in that list.

# don't count the list creation in the timing
a = list(range(2,n+1,2))

print("Using a list... ", end="")
start = time.time()
count = 0
for x in range(n+1):
    if x in a:
        count += 1
end = time.time()
elapsed1 = end - start
print("count=", count," and time = %0.4f seconds" % elapsed1)

# 2. Repeat, using a set
print("Using a set.... ", end="")
start = time.time()
s = set(a)
count = 0
for x in range(n+1):
    if x in s:
        count += 1
end = time.time()
elapsed2 = end - start
print("count=", count," and time = %0.4f seconds" % elapsed2)
print("With n=%d, sets ran about %0.1f times faster than lists!" %
      (n, elapsed1/elapsed2))
print("Try a larger n to see an even greater savings!")

Using a list... count= 5000  and time = 0.1936 seconds
Using a set.... count= 5000  and time = 0.0013 seconds
With n=10000, sets ran about 143.5 times faster than lists!
Try a larger n to see an even greater savings!


## Set Operations

Set operations are provided via operators, functions, and methods in Python as follows:

1. Operations on a set

len(s): cardinality (size) of set s

In [None]:
s = { 2, 3, 2, 4, 3 }
print(len(s)) # what will this print?

s.copy(): new set with a shallow copy of s

In [None]:
s = { 1, 2, 3 }
t = s.copy()
s.add(4)
print(s)
print(t)

s.clear(): remove all elements from set s

In [None]:
s = { 1, 2, 3 }
s.clear()
print(s, len(s))

2. Operations on a set and an element


x in s: test x for membership in s

In [None]:
s = { 1, 2, 3 }
print(0 in s)
print(1 in s)

x not in s: test x for non-membership in s

In [None]:
s = { 1, 2, 3 }
print(0 not in s)
print(1 not in s)

s.add(x): 	add element x to set s

In [None]:
s = { 1, 2, 3 }
print(s, 4 in s)
s.add(4)
print(s, 4 in s)

s.remove(x): remove x from set s; raises KeyError if not present

In [None]:
s = { 1, 2, 3 }
print(s, 3 in s)
s.remove(3)
print(s, 3 in s)
s.remove(3) # crashes

s.discard(x): 	removes x from set s if present

In [None]:
s = { 1, 2, 3 }
print(s, 3 in s)
s.discard(3)
print(s, 3 in s)
s.discard(3) # does not crash!
print(s, 3 in s)

3. Operations on two sets (or a set and an iterable)

s.issubset(t): s <= t

test whether every element in s is in t

In [None]:
print({1,2} <= {1},     {1,2}.issubset({1}))
print({1,2} <= {1,2},   {1,2}.issubset({1,2}))
print({1,2} <= {1,2,3}, {1,2}.issubset({1,2,3}))

s.issuperset(t): s >= t

test whether every element in t is in s

In [None]:
print({1,2} >= {1},     {1,2}.issuperset({1}))
print({1,2} >= {1,2},   {1,2}.issuperset({1,2}))
print({1,2} >= {1,2,3}, {1,2}.issuperset({1,2,3}))

s.union(t): s | t

new set with elements from both s and t

In [None]:
print({1,2} | {1},     {1,2}.union({1}))
print({1,2} | {1,3},   {1,2}.union({1,3}))
s = {1,2}
t = s | {1,3}
print(s, t)

s.intersection(t): s & t

new set with elements common to s and t

In [None]:
print({1,2} & {1},     {1,2}.intersection({1}))
print({1,2} & {1,3},   {1,2}.intersection({1,3}))
s = {1,2}
t = s & {1,3}
print(s, t) # what will this print?

s.difference(t): s - t

new set with elements in s but not in t

In [None]:
print({1,2} - {1},     {1,2}.difference({1}))
print({1,2} - {1,3},   {1,2}.difference({1,3}))
s = {1,2}
t = s - {1,3}
print(s, t)

s.update(t): s |= t

modify s adding all elements found in t

In [None]:
s = {1,2}
t = {1,3}
u = {2,3}
s.update(u)
t |= u
print(s, t, u)

## Some Worked Examples Using Sets

In [None]:
## Example 1 Permutation

def isPermutation(L):
    # return True if L is a permutation of [0,...,n-1]
    # and False otherwise
    return (set(L) == set(range(len(L))))

def testIsPermutation():
    print("Testing isPermutation()...", end="")
    assert(isPermutation([0,2,1,4,3]) == True)
    assert(isPermutation([1,3,0,4,2]) == True)
    assert(isPermutation([1,3,5,4,2]) == False)
    assert(isPermutation([1,4,0,4,2]) == False)
    print("Passed!")

testIsPermutation()

In [None]:
## Example 2 repeats

def repeats(L):
    # return a sorted list of the repeat elements in the list L
    seen = set()
    seenAgain = set()
    for element in L:
        if (element in seen):
            seenAgain.add(element)
        seen.add(element)
    # seenAgain = list(seenAgain)
    return sorted(seenAgain)

def testRepeats():
    print("Testing repeats()...", end="")
    assert(repeats([1,2,3,2,1]) == [1,2])
    assert(repeats([1,2,3,2,2,4]) == [2])
    assert(repeats(list(range(100))) == [ ])
    assert(repeats(list(range(100))*5) == list(range(100)))
    print("Passed!")

testRepeats()