In [1]:
# CB: Figure out a function to do set shattering in Python.
# A class (collection of sets) shatters a set if the intersection of
# sets in the class can construct (or capture) the powerset of the set to be shattered.

# We will use itertools.

In [2]:
import itertools

In [3]:
# CB: Sets are collections of (non-repeating) elements.
# These can be integers, strings, other sets, etc.
# In set theory there are technical distinctions distinguishing classes, 
# families, multisets, sets of sets... these aren't particularly relevant for the basic concepts.
# I am using "class" as synonymous with a set of sets.

In [4]:
%%latex
The empty set is represented by empty brackets $\{\}$. It is also an element of the powerset of a set.

<IPython.core.display.Latex object>

In [5]:
%%latex
An example set is $\{1,2,three,four\}$. 

<IPython.core.display.Latex object>

In [6]:
# Python has an in-built type which we can use (and check that repeated elements are excluded).
a_set = set([1,2,2,'three','three','four'])

In [7]:
a_set

{1, 2, 'four', 'three'}

In [8]:
%%latex
The powerset $\mathcal{P}(S)$ of a set is all combinations of subsets of that set $S$.  
The cardinality (length, number of elements) of the powerset $|\mathcal{P}(S)|$ is equal to $2^n$, for $|S| = n$.  

<IPython.core.display.Latex object>

In [9]:
# CB: First step is to create a function which constructs the power set.
# This function should take a set of n elements, and return a set
# of 2**n elements of all combinations of subsets including the empty set.

In [10]:
def powerset(input_set):
    '''Takes a set as input argument and outputs the powerset of that set.'''
    
    # Initialize 
    powerset_iterator = {}
    
    # Range over combination iterators, and chain them together.
    for r in range(len(input_set)+1):
        powerset_iterator = itertools.chain(itertools.combinations(input_set,r),powerset_iterator)
        
    # Create initial temporary set (evaluating iterator).
    powerset_temp = set(powerset_iterator)
    
    # Convert elements in powerset_temp to actual sets (using frozenset).
    powerset = set()
    for i in powerset_temp:
        powerset.add(frozenset(i))
    
    # Sanity prints.
    print('A powerset of the set {} has been constructed with {} elements.'.format(input_set, len(powerset)))
    if len(powerset) == 2**(len(input_set)):
        print('This is sane: len(powerset) == 2**(len(input_set)), i.e. {} = {}.'.format(len(powerset),2**(len(input_set))))
    else:
        print('Something is insane.')
        
    return powerset

In [11]:
pwr = powerset(a_set)

A powerset of the set {'three', 1, 2, 'four'} has been constructed with 16 elements.
This is sane: len(powerset) == 2**(len(input_set)), i.e. 16 = 16.


In [12]:
pwr

{frozenset(),
 frozenset({2}),
 frozenset({2, 'three'}),
 frozenset({'four'}),
 frozenset({1}),
 frozenset({1, 2}),
 frozenset({1, 'four'}),
 frozenset({2, 'four'}),
 frozenset({1, 2, 'four'}),
 frozenset({'three'}),
 frozenset({'four', 'three'}),
 frozenset({1, 'three'}),
 frozenset({1, 'four', 'three'}),
 frozenset({2, 'four', 'three'}),
 frozenset({1, 2, 'three'}),
 frozenset({1, 2, 'four', 'three'})}

In [13]:
len(pwr)

16

In [14]:
powerset(set('hi'))

A powerset of the set {'i', 'h'} has been constructed with 4 elements.
This is sane: len(powerset) == 2**(len(input_set)), i.e. 4 = 4.


{frozenset(), frozenset({'i'}), frozenset({'h'}), frozenset({'h', 'i'})}

In [15]:
powerset(set(['hi','you']))

A powerset of the set {'you', 'hi'} has been constructed with 4 elements.
This is sane: len(powerset) == 2**(len(input_set)), i.e. 4 = 4.


{frozenset(), frozenset({'you'}), frozenset({'hi'}), frozenset({'hi', 'you'})}

In [16]:
# CB: Now we create another function which checks if one set "shatters" another.
# A class of sets C shatters another set S if P(S) can be 
# constructed by intersection of sets in C with S (making the subsets of P(S).)

In [17]:
%%latex
$S$ is shattered by $C$ if $P(S) = \{c \cap S | c \in C \}$

<IPython.core.display.Latex object>

In [45]:
# CB: A quick function to check if a set has been shattered.
# It uses our previous powerset function.

def shatter_check(set_to_shatter, class_of_sets):
    '''
    This function takes two arguments as input.
    The first argument is a set to shatter.
    The second argument is a set of sets (class).
    
    The function checks whether all subsets of 
    the powerset of the set to shatter are in
    the class (i.e. the intersections exist in the 
    class and can construct the powerset).
    '''
    # First calculate the powerset.
    pwrset = powerset(set_to_shatter)
    
    if all(i in class_of_sets for i in pwrset):
        return print("Shattered.  The size of the class {} is greater or equal to the size of the set to be shattered {} and it's powerset's size {}.".format(len(class_of_sets),len(set_to_shatter),len(pwrset)))
    
    else:
        return print('Not Shattered')

In [46]:
ex_class = pwr.copy()

In [47]:
ex_class.add(frozenset([1,2,3,4,5]))
ex_class

{frozenset(),
 frozenset({2, 'three'}),
 frozenset({'four'}),
 frozenset({1}),
 frozenset({1, 'four'}),
 frozenset({2}),
 frozenset({1, 2}),
 frozenset({1, 2, 'four'}),
 frozenset({'three'}),
 frozenset({1, 'four', 'three'}),
 frozenset({2, 'four', 'three'}),
 frozenset({1, 2, 'three'}),
 frozenset({2, 'four'}),
 frozenset({'four', 'three'}),
 frozenset({1, 'three'}),
 frozenset({1, 2, 'four', 'three'}),
 frozenset({1, 2, 3, 4, 5})}

In [48]:
pwr

{frozenset(),
 frozenset({2}),
 frozenset({2, 'three'}),
 frozenset({'four'}),
 frozenset({1}),
 frozenset({1, 2}),
 frozenset({1, 'four'}),
 frozenset({2, 'four'}),
 frozenset({1, 2, 'four'}),
 frozenset({'three'}),
 frozenset({'four', 'three'}),
 frozenset({1, 'three'}),
 frozenset({1, 'four', 'three'}),
 frozenset({2, 'four', 'three'}),
 frozenset({1, 2, 'three'}),
 frozenset({1, 2, 'four', 'three'})}

In [49]:
len(pwr) == len(ex_class)

False

In [50]:
shatter_check(a_set,ex_class)

A powerset of the set {'three', 1, 2, 'four'} has been constructed with 16 elements.
This is sane: len(powerset) == 2**(len(input_set)), i.e. 16 = 16.
Shattered.  The size of the class 17 is greater or equal to the size of the set to be shattered 4 and it's powerset's size 16.


In [51]:
# CB: We could create a brute force shattering function,
# which takes a set as input and returns a class that 
# shatters the input set.  What would this function do?

In [2]:
def shatter(input_set):
    '''What would this function do?'''
    pass

In [1]:
from set_shattering import powerset,shatter_check

In [4]:
v = 123456

In [5]:
another_set = set([23,24,24,25,'twenty siz','whooopdie doooo',v])

In [6]:
powerset(another_set)

A powerset of the set {123456, 'twenty siz', 'whooopdie doooo', 23, 24, 25} has been constructed with 64 elements.
This is sane: len(powerset) == 2**(len(input_set)), i.e. 64 = 64.


{frozenset(),
 frozenset({123456, 'twenty siz'}),
 frozenset({23}),
 frozenset({24}),
 frozenset({'twenty siz'}),
 frozenset({23, 24, 25, 'twenty siz'}),
 frozenset({'whooopdie doooo'}),
 frozenset({23, 24, 25, 'whooopdie doooo'}),
 frozenset({23, 24, 'twenty siz', 'whooopdie doooo'}),
 frozenset({123456, 24, 'twenty siz', 'whooopdie doooo'}),
 frozenset({25}),
 frozenset({25, 'twenty siz'}),
 frozenset({25, 123456}),
 frozenset({123456, 24, 25, 'twenty siz'}),
 frozenset({23, 25}),
 frozenset({24, 25}),
 frozenset({123456, 24, 'twenty siz'}),
 frozenset({123456, 23, 24, 25, 'twenty siz'}),
 frozenset({25, 'whooopdie doooo'}),
 frozenset({123456, 23, 24, 'whooopdie doooo'}),
 frozenset({123456, 23, 'twenty siz', 'whooopdie doooo'}),
 frozenset({23, 24, 25, 123456}),
 frozenset({23, 'whooopdie doooo'}),
 frozenset({123456}),
 frozenset({23, 'twenty siz'}),
 frozenset({123456, 25, 'twenty siz', 'whooopdie doooo'}),
 frozenset({24, 'whooopdie doooo'}),
 frozenset({'twenty siz', 'whooopdie

In [None]:
# A machine learning model, or a classifier, is an object which attempts to split data.
# Or, it tries to respond to inputs (data) by an appropriate action (prediction, classification).
# The VC dimension is a measure of the maximum ability of the model to achieve the data separation or classification.