## Sets

Set properties: unordered, iterable, mutable, can contain multiple data types

Made of unique elements (strings, numbers, or tuples)

Like dictionaries, but with keys only (no values)

Sets only stores unique values

Sets are unordered in nature 


In [89]:
# create an empty set

empty_set = set()

print(empty_set)
print(type(empty_set))

set()
<class 'set'>


In [90]:
# create a set directly

languages = {'python', 'r', 'java'}

print(languages)

{'java', 'python', 'r'}


In [91]:
# create a set from a list

snakes = set(['cobra', 'viper', 'python'])

print(snakes)

{'python', 'viper', 'cobra'}


In [92]:
s = {10, 20, 20, 30, 30, 30}

print(s)
# automatically set won't consider duplicate elements

{10, 20, 30}


In [93]:
s = set([10, 20, 20, 30, 30, 30])

print(s)
# automatically set won't consider duplicate elements

{10, 20, 30}


### Examine a set

In [94]:
print(languages)
print('\n')
print(snakes)

{'java', 'python', 'r'}


{'python', 'viper', 'cobra'}


In [95]:
len(languages)

3

In [96]:
len(snakes)

3

In [97]:
'python' in languages

True

In [98]:
'viper' in snakes

True

In [99]:
# Set doesn't suppots indexing

s = {10, 20, 20, 30, 30, 30}

print(s)
print(s[1])

{10, 20, 30}


TypeError: 'set' object is not subscriptable

### Set operations

In [100]:
# intersection

languages & snakes

{'python'}

In [101]:
# intersection

languages.intersection(snakes)

{'python'}

In [102]:
# union

languages | snakes

{'cobra', 'java', 'python', 'r', 'viper'}

In [103]:
# union

languages.union(snakes)

{'cobra', 'java', 'python', 'r', 'viper'}

In [104]:
# set difference

languages - snakes # returns items that are only in languages and not in snakes

{'java', 'r'}

In [105]:
# set difference

languages.difference(snakes)

{'java', 'r'}

In [106]:
# set difference

snakes - languages # returns items that are only in snakes and not in languages

{'cobra', 'viper'}

In [107]:
# set difference

snakes.difference(languages)

{'cobra', 'viper'}

In [108]:
# symmetric difference
# returns items that are present in languages znd snakes but not the common items
# Mathematiclly: symmetric_difference = (A ∪ B) - (A ∩ B)
#i.e. A union B minus A intersection b

languages.symmetric_difference(snakes) 

{'cobra', 'java', 'r', 'viper'}

In [109]:
# symmetric difference

languages ^ snakes

{'cobra', 'java', 'r', 'viper'}

In [110]:
# Subset

x = {'a', 'b', 'c', 'd', 'e', 'f'}
y = {'e', 'f'}

print(x.issubset(y))

print('\n')

print(y.issubset(x))

False


True


In [111]:
# Frozen sets 
# it is an immutable set
# supports union, intersection etc..

set1 = frozenset([1, 2, 3, 4])
set2 = frozenset([3, 4, 5, 6])

set1.add(5)     # but not this
set1[1]         # but not this

AttributeError: 'frozenset' object has no attribute 'add'

### Modify a set (does not return the set)

In [112]:
# add a new element

languages.add('sql')

languages

{'java', 'python', 'r', 'sql'}

In [113]:
# try to add an existing element (ignored, no error)

languages.add('r')

languages

{'java', 'python', 'r', 'sql'}

In [114]:
# add multiple elements (can also pass a set)

languages.update(['go', 'spark'])

languages

{'go', 'java', 'python', 'r', 'spark', 'sql'}

In [115]:
languages.update(['c', 'c++', 'c#'])

languages

{'c', 'c#', 'c++', 'go', 'java', 'python', 'r', 'spark', 'sql'}

In [116]:
languages.update(['go', 'spark'], {'ruby', 'swift'})

languages

{'c',
 'c#',
 'c++',
 'go',
 'java',
 'python',
 'r',
 'ruby',
 'spark',
 'sql',
 'swift'}

In [117]:
# remove an element

languages.remove('java')

languages

{'c', 'c#', 'c++', 'go', 'python', 'r', 'ruby', 'spark', 'sql', 'swift'}

In [118]:
# try to remove a non-existing element (this would throw an error)

languages.remove('php')

KeyError: 'php'

In [119]:
# remove an element if present, but ignored otherwise

languages.discard('php')

languages

{'c', 'c#', 'c++', 'go', 'python', 'r', 'ruby', 'spark', 'sql', 'swift'}

In [120]:
# remove and return an arbitrary element

languages.pop()

languages

{'c', 'c++', 'go', 'python', 'r', 'ruby', 'spark', 'sql', 'swift'}

In [176]:
languages.pop()

languages

{'c', 'c#', 'c++', 'go', 'python', 'spark', 'sql', 'swift'}

In [121]:
# remove all elements

languages.clear()

languages

set()

In [122]:
## Get a sorted list of unique elements from a list

sorted(set([9, 0, 2, 1, 0]))

[0, 1, 2, 9]

## Practice

In [123]:
s = {}
s

{}

In [124]:
type(s)

dict

In [125]:
s = {2,3}
s

{2, 3}

In [126]:
type(s)

set

In [127]:
s = {2,3,4,5,5,5,5,555,6,7,2,2,'sohel','Sohel','sohel','Sohel'}
type(s)

set

In [128]:
s

{2, 3, 4, 5, 555, 6, 7, 'Sohel', 'sohel'}

In [129]:
s = {[4,5,6],[4,5,6]}

TypeError: unhashable type: 'list'

In [132]:
s = {(4,5,6),(4,5,6)}
s

{(4, 5, 6)}

In [133]:
type(s)

set

In [135]:
x = set()
x

set()

In [136]:
type(x)

set

In [137]:
x = set([1,2,3,4,5,7,7,7,7,77,777])
x

{1, 2, 3, 4, 5, 7, 77, 777}

In [138]:
for i in x:
    print(i)

1
2
3
4
5
7
777
77


In [139]:
for i in range(len(x)):
    print(i)

0
1
2
3
4
5
6
7


In [140]:
x.add(8)
x

{1, 2, 3, 4, 5, 7, 8, 77, 777}

In [141]:
x.add(5)
x

{1, 2, 3, 4, 5, 7, 8, 77, 777}

In [143]:
l = list(x)
l

[1, 2, 3, 4, 5, 7, 8, 777, 77]

In [144]:
set(l)

{1, 2, 3, 4, 5, 7, 8, 77, 777}

In [145]:
n = {5,6,7,456,34,234,45}
n

{5, 6, 7, 34, 45, 234, 456}

In [146]:
# Check the order of the elements

n = {5,6,7,456,34,234,45,"sohel","abc",3,4,6,75675767,5756545353,5756}
n

{234, 3, 34, 4, 45, 456, 5, 5756, 5756545353, 6, 7, 75675767, 'abc', 'sohel'}

In [147]:
type(n)

set