In [1]:
from random import randrange,randint
import pandas as pd

In [2]:
# generate a random set of characters from the
def gen_axiom(maxlen=3):
    u_chars = 'MIU'
    return ''.join([u_chars[randrange(0,3)] for i in range(maxlen)])

In [3]:
# program the four rules
def rule1(s):
    if s[-1] == 'I':
        return s + 'U'
    return s

def rule2(s):
    if s[0] == 'M':
        return 'M' + s[1:] + s[1:]
    return s

def rule3(s):
    if "III" in s:
        return s.replace("III","U")
    else:
        return s
    
def rule4(s):
    if "UU" in s:
        return s.replace("UU","")
    return s

In [4]:
def gen_derivation(axiom,n_derivations=10,rules=[rule1,rule2,rule3,rule4]):
    # generate up to n_derivations derivations
    theorem = []
    step_changes = []
    step_change_ixs = []
    for i in range(n_derivations):
        func = rules[randrange(0,4)]
        derivation = func(axiom)
        if derivation == '':
            theorem.append(axiom)
            return theorem,len(theorem),step_changes,step_change_ixs

        if i != 0:
            if derivation != axiom:
                step_changes.append((axiom,derivation))
                step_change_ixs.append((i-1,i))
        axiom = derivation
        
        theorem.append(axiom)
    return theorem,len(theorem),step_changes,step_change_ixs

In [5]:
def get_first_cliff_and_drop(diff_lens,diffs):
    cliff_drops = {
        'drop_ixs':[],
        'cliffs':[],
        'drops':[],
        'cliff_drop_diff':[],
        'pct_cliff_drop_diff':[],
        'overall_change':[]
    }
     
    for ix,val in enumerate(diffs):
        if (val<0) and (diffs[ix+1] >= 0):
            cliff_drops['overall_change'].append(sum(diffs))
            cliff_drops['drop_ixs'].append(ix+1)
            cliff_drops['cliffs'].append(diff_lens[ix])
            cliff_drops['drops'].append(diff_lens[ix+1])
            
            cliff_drop_diff = diff_lens[ix]-diff_lens[ix+1]
            cliff_drops['cliff_drop_diff'].append(cliff_drop_diff)
            cliff_drops['pct_cliff_drop_diff'].append(cliff_drop_diff/diff_lens[ix])
    return cliff_drops

In [6]:
def create_random_axiom_dataset(nsamples=20,max_axiom_size=3,max_derivations=20):
    sample_data = []
    all_derivations = []
    steps = []
    step_changes = []
    for i in range(nsamples):
        axiom = gen_axiom(maxlen=randint(2,max_axiom_size))
        derivations,derivation_len,step_changes,step_change_ixs = gen_derivation(axiom,max_derivations)
        diff_lens = [len(step) for step in derivations]
        diffs = [diff_lens[i] - diff_lens[i-1] for i in range(len(diff_lens))][1:]

        try:
            metadata = pd.DataFrame(get_first_cliff_and_drop(diff_lens,diffs))
            metadata.loc[:,'axiom'] = axiom
            metadata.loc[:,'axiom_len'] = len(axiom)
            metadata.loc[:,'sample_id'] = str(i)
            metadata.loc[:,'derivation_length'] = derivation_len
            metadata.loc[:,'step_longest'] = max(diff_lens)
            metadata.loc[:,'step_shortest'] = min(diff_lens)
            metadata.loc[:,'MU_ind'] = 'MU' in derivations
            sample_data.append(metadata)
            steps.append(step_changes)
            step_changes_ixs.append(step_change_ixs)
        except:
            continue
        all_derivations.append(derivations)
    
    sample_data = pd.concat(sample_data)
    pct_unique_id = len(sample_data['sample_id'].unique())/nsamples
    return sample_data,all_derivations,pct_unique_id,steps,step_changes

Now, we run the functions above, developing derivations for 100,000 random axioms. We then see which ones 

In [7]:
nsamp = 100000
random_theorems,derivations,pct_samples,steps,step_changes =\
    create_random_axiom_dataset(nsamples=nsamp,max_axiom_size=3,max_derivations=20)
pct_samples

0.17019

In [8]:
unique_derivations = random_theorems[['axiom','sample_id','MU_ind']].drop_duplicates()
unique_derivations = unique_derivations.reset_index(drop=True)
unique_derivations.head()

Unnamed: 0,axiom,sample_id,MU_ind
0,MUI,7,False
1,MUU,20,False
2,MII,23,False
3,MU,30,True
4,MI,34,False


In [9]:
summary_unique_derivations = unique_derivations.pivot_table('sample_id','axiom','MU_ind',aggfunc='count',).reset_index()
summary_unique_derivations.sort_values(True).head(15)

MU_ind,axiom,False,True
4,MU,1376.0,3970.0
0,III,1347.0,
1,IUU,1346.0,
2,MI,1177.0,
3,MII,794.0,
5,MUI,1432.0,
6,MUU,1410.0,
7,UUI,1354.0,
8,UUM,1408.0,
9,UUU,1405.0,
