In [1]:
import os,io
from sympy import Poly,terms_gcd,gcd_list
import re
import random
from sympy import factorint,primerange
from fractions import Fraction
import os
import glob
import time
from shutil import copyfile
import shutil

def encode(value):
    if value != 0:
        prefix = []
        w = abs(value)
        while w > 0:
            prefix.append(str(w % 1000))
            w = w // 1000
        prefix = prefix[::-1]
    else:
        prefix =['0']
    prefix = (['+'] if value >= 0 else ['-']) + prefix
    return prefix

class fastRandomDict(object):
    # from: https://stackoverflow.com/questions/24630804/how-to-get-a-random-value-in-a-very-large-python-dictionary
    def __init__(self, init_dict):
        self.mydict=init_dict.copy()
        # Create the bidirectional maps from the dictionary, D
        self.keys = self.mydict.keys()
        self.ints = range(len(self.keys))
        self.int_to_key = dict(zip(self.ints, self.keys))
        self.key_to_int = dict(zip(self.keys, self.ints))
    def __getitem__(self, item):
        if item in self.mydict:
            return self.mydict[item]
        else: return None
    def __contains__(self, item):
        return item in self.mydict
    def __len__(self):
        len(self.mydict)
    def add(self, key, value):  # O(1)
        # Add key-value pair (no extra work needed for simply changing the value)
        new_int = len(self.mydict)
        self.mydict[key] = value
        self.int_to_key[new_int] = key
        self.key_to_int[key] = new_int
    def remove(self, key):  # O(1)
        # Update the bidirectional maps then remove the key-value pair
        # Get the two ints and keys.
        if key not in self.key_to_int: return
        key_int = self.key_to_int[key]
        swap_int = len(self.mydict) - 1  # Should be the highest int
        swap_key = self.int_to_key[swap_int]

        # Update the bidirectional maps so that key now has the highest int
        self.key_to_int[key], self.key_to_int[swap_key] = swap_int, key_int
        self.int_to_key[key_int], self.int_to_key[swap_int] = swap_key, key

        # Remove elements from dictionaries
        del self.mydict[key]
        del self.key_to_int[key]
        del self.int_to_key[swap_int]
    def random_key(self):  # O(1)
        # Select a random key from the dictionary using the int_to_key map
        return self.int_to_key[int(len(self.mydict) * random.random())]
    def remove_random(self):  # O(1)
        # Randomly remove a key from the dictionary via the bidirectional maps
        key = self.random_key()
        self.remove(key)
    def pop_random(self):  # O(1)
        try:
        # Randomly pop a key from the dictionary via the bidirectional maps
            key = self.random_key()
            val = self.mydict[key]
            self.remove(key)
        except IndexError:
            print("Error, symbol is exhausted!")
        return key,val
    def pop_random_gen(self, num_to_pop):  # O(1)
        for i in range(num_to_pop):
            yield self.pop_random()
    def pop_inst_gen(self,subdict_size, num_to_gen):
        # Randomly pop instances from the symb
        for i in range(num_to_gen):
            yield {k:v for k, v in self.pop_random_gen(subdict_size)}

def decode(lst):
    if len(lst) < 1:
        return None
    if len(lst) < 1 or (lst[0] != '+' and lst[0] != '-'):
        return None
    if len(lst) == 1:
        return 0
    res = 0
    for x in lst[1:]:
        if not (x.isdigit()):
            return None
        res = res * 1000 + int(x)
    return -res if lst[0] == '-' else res

def readESymb(loop, name):
    assert os.path.isfile(name)
    res = ''
    prefix = 'Esymb'
    with open(name, 'rt') as f:
        reading_form = False
        for line in f:
            if not reading_form:
                if not line.startswith(prefix + '[' + str(loop) + ']'): continue
                res = ''
                reading_form = True
            res += line[:-2] if line[-2] == '\\' else line[:-1]
            if line[-2] in [":", ";"]:
                break
    return res

def cl(text):
    return re.sub(r'\s+', '', text)
    
def convert(loop, filename):
    dev = re.split(":=|SB\(|\)", re.sub('[,*]', '', readESymb(loop, filename)))[1:-1]
    keys = dev[1::2]
    values = [int(re.sub('[+-]$', t[0] + '1', t)) for t in dev[0::2]]
    out_dict = {}
    for k, v in zip(keys, values):
        out_dict[k] = v
    return out_dict

def export_hash(outfile,data):
    file_handler = io.open(outfile, mode="wt", encoding="utf-8")
    for i, (k, v) in enumerate(data.items()):
        prefix1_str = " ".join(k)
        prefix2 = encode(v)
        prefix2_str = " ".join(prefix2)
        file_handler.write(f"{i + 1}|{prefix1_str}\t{prefix2_str}\n")
        file_handler.flush()
    file_handler.close()
    return
    

def read_encoded(path,train=False):
    with io.open(path, mode="r", encoding="utf-8") as f:
        # either reload the entire file, or the first N lines
        # (for the training set)
        if not train:
            lines = [line.rstrip().split("|") for line in f]
        else:
            lines = []
            for i, line in enumerate(f):
                lines.append(line.rstrip().split("|"))
    data = [xy.split("\t") for _, xy in lines]
    data_dict = {''.join(xy[0].split()):decode(xy[1].split()) for xy in data if len(xy) == 2}
    return data_dict

In [2]:
#read the words
symb6=convert(6,'../data/EZ6_symb_new_norm')

d_rays={k:v for k,v in symb6.items() if k[-5:]=='ddddd'}
e_rays={k:v for k,v in symb6.items() if k[-5:]=='eeeee'}
f_rays={k:v for k,v in symb6.items() if k[-5:]=='fffff'}

In [3]:
symb5=convert(5,'../data/EZ_symb_new_norm')
symb4=convert(4,'../data/EZ_symb_new_norm')
symb3=convert(3,'../data/EZ_symb_new_norm')
symb2=convert(2,'../data/EZ_symb_new_norm')
symb1=convert(1,'../data/EZ_symb_new_norm')

In [4]:
all_rays=(d_rays | e_rays) | f_rays
rayless= {k:v for k,v in symb6.items() if k not in all_rays}

In [5]:
all_loops1_4= (symb4 | symb3 | symb2 | symb1)
all_loops1_5= symb5 | all_loops1_4

In [6]:
L6FRD=fastRandomDict(symb6)
#randomly sample 10k elements from loop 6 nonzero
L6test={k:v for k,v in L6FRD.pop_random_gen(10000)}

#randomly sample 263880 elements from what is left
L6train_even={k:v for k,v in L6FRD.pop_random_gen(263880)}

#take 125k of those 263880
L6train_125k={k:v for k,v in L6FRD.pop_random_gen(125000)}

#take 60k of those 263880
L6train_60k={k:v for k,v in L6FRD.pop_random_gen(60000)}

#take 30k of those 263880
L6train_30k={k:v for k,v in L6FRD.pop_random_gen(30000)}

#take 15k ofthose 263880
L6train_15k={k:v for k,v in L6FRD.pop_random_gen(15000)}

#take 1k of those 263880
L6train_1k={k:v for k,v in L6FRD.pop_random_gen(1000)}

#take 100 of those 263880
L6train_100={k:v for k,v in L6FRD.pop_random_gen(100)}

#check counts
print(len(L6test),len(L6train_even),len(L6train_125k),len(L6train_60k),len(L6train_30k),len(L6train_15k),len(L6train_1k),len(L6train_100))

10000 263880 125000 60000 30000 15000 1000 100


In [7]:
L6rayless=fastRandomDict(rayless)
L6rayful=fastRandomDict(all_rays)
#randomly sample 10k elements from loop 6 runless
L6_raylesstest={k:v for k,v in L6rayless.pop_random_gen(10000)}

L6rayful_30k=all_rays

#randomly sample 15k elements from rayful set
L6rayful_15k={k:v for k,v in L6rayful.pop_random_gen(15000)}

#randomly sample 1k elements from rayful set
L6rayful_1k={k:v for k,v in L6rayful.pop_random_gen(1000)}

#randomly sample 100 elements from rayful set
L6rayful_100={k:v for k,v in L6rayful.pop_random_gen(100)}

print(len(L6_raylesstest),len(L6rayful_30k),len(L6rayful_15k),len(L6rayful_1k),len(L6rayful_100))

10000 29118 15000 1000 100


In [8]:
mydicts=[L6rayful_30k,L6rayful_15k,L6rayful_1k,L6rayful_100]
for mydict in mydicts:
    print(len(mydict.items() & L6_raylesstest.items()))

0
0
0
0


In [9]:
mydicts=[L6train_even,L6train_125k,L6train_60k,L6train_30k,L6train_15k,L6train_1k,L6train_100]
for mydict in mydicts:
    print(len(mydict.items() & L6test.items()))

0
0
0
0
0
0
0


In [10]:
#L6_norays
export_hash("priming_6L_rayless.test",L6_raylesstest)
export_hash("priming_6L.test",L6test)
#all1
export_hash("priming_loop1.train",symb1)
#all2
export_hash("priming_loop2.train",symb2)
#all3
export_hash("priming_loop3.train",symb3)
#all4
export_hash("priming_loop4.train",symb4)
#all5
export_hash("priming_loop5.train",symb5)
#even6
export_hash("priming_6L_even.train",L6train_even)
#125k6
export_hash("priming_6L_125k.train",L6train_125k)
#60k6
export_hash("priming_6L_60k.train",L6train_60k)
#30k6
export_hash("priming_6L_30k.train",L6train_30k)
#15k6
export_hash("priming_6L_15k.train",L6train_15k)
#1k6
export_hash("priming_6L_1k.train",L6train_1k)
#100_6
export_hash("priming_6L_100.train",L6train_100)
#L6_norays
export_hash("priming_6L_rayless.test",L6_raylesstest)
#L6rayful_30k
export_hash("priming_6L_rayful_30k.train",L6rayful_30k)
#L6rayful_15k
export_hash("priming_6L_rayful_15k.train",L6rayful_15k)
#L6rayful_1k
export_hash("priming_6L_rayful_1k.train",L6rayful_1k)
#L6rayful_100
export_hash("priming_6L_rayful_100.train",L6rayful_100)

In [11]:
my_dirs=["priming_all1thru5L_1k6L_train",
      "priming_all1thru5L_1krays6L_train",
      "priming_all1thru5L_30k6L_train",
      "priming_all1thru5L_1006L_train",
      "priming_all1thru5L_30krays6L_train",
      "priming_all1thru5L_100rays6L_train",
      "priming_all1thru5L_60k6L_train",
      "priming_all1thru5L_125k6L_train",
      "priming_all1thru5L_even6L_train",
      "priming_all1thru5L_15k6L_train",
      "priming_all1thru5L_train",
      "priming_all1thru5L_15krays6L_train",
      "priming_all5L_1k6L_train",
      "priming_all5L_1krays6L_train",
      "priming_all5L_30k6L_train",
      "priming_all5L_1006L_train",
      "priming_all5L_30krays6L_train",
      "priming_all5L_100rays6L_train",
      "priming_all5L_60k6L_train",
      "priming_all5L_125k6L_train",
      "priming_all5L_even6L_train",
      "priming_all5L_15k6L_train",
      "priming_all5L_train",
      "priming_all5L_15krays6L_train",
      "priming_6L_rayless_test",
      "priming_6L_test"]

def make_datadirs(mydir,norm=False):

    if norm: isnorm="norm_"
    else: isnorm=""
    
    os.mkdir(mydir)
    if "train" in mydir:
        if "all1" in mydir:
            shutil.copyfile(f"priming_{isnorm}loop1.train",f"{mydir}/priming_{isnorm}loop1.train")
            shutil.copyfile(f"priming_{isnorm}loop2.train",f"{mydir}/priming_{isnorm}loop2.train")
            shutil.copyfile(f"priming_{isnorm}loop3.train",f"{mydir}/priming_{isnorm}loop3.train")
            shutil.copyfile(f"priming_{isnorm}loop4.train",f"{mydir}/priming_{isnorm}loop4.train")
        shutil.copyfile(f"priming_{isnorm}loop5.train",f"{mydir}/priming_{isnorm}loop5.train")
        if "ray" in mydir:
            raystr=re.split("rays|\_",mydir)
            raycount=raystr[raystr.index("6L")-1]
            shutil.copyfile(f"priming_{isnorm}6L_rayful_{raycount}.train",f"{mydir}/priming_{isnorm}6L_rayful_{raycount}.train")
        elif "6L" in mydir:
            raystr=re.split("6L|\_",mydir)
            raycount=raystr[raystr.index("")-1]
            shutil.copyfile(f"priming_{isnorm}6L_{raycount}.train",f"{mydir}/priming_{isnorm}6L_{raycount}.train")
    elif "test" in mydir:
        if "rayless" in mydir:
            shutil.copyfile(f"priming_{isnorm}6L_rayless.test",f"{mydir}/priming_{isnorm}6L_rayless.test")
        else:
            shutil.copyfile(f"priming_{isnorm}6L.test",f"{mydir}/priming_{isnorm}6L.test")
            

In [14]:
os.getcwd()
os.chdir("..")

In [15]:
#os.mkdir("priming_data_folders")
mydir=os.getcwd()
myfiles=glob.glob(f'{mydir}/priming_*.train')+glob.glob(f'{mydir}/priming_*.test')
print(myfiles)
for f in myfiles:
    print(f)
    shutil.move(f,f"{mydir}/priming_data_folders/{os.path.basename(f)}")
os.chdir("priming_data_folders")         
for mydir in my_dirs:
    make_datadirs(mydir,norm=False)

thisdir=os.getcwd()
myfiles=glob.glob(f'{thisdir}/priming_*.train')+glob.glob(f'{thisdir}/priming_*.test')
for myfile in myfiles:
    os.remove(myfile)
os.chdir("..")

[]


In [None]:
#merge loop 5 and the train dicts: L=5 + L=6
test_L6=L6test
train_all5_mix_6= symb5 | L6train_even
train_all5_125k_6= symb5| L6train_125k
train_all5_60k_6= symb5| L6train_60k
train_all5_30k_6= symb5| L6train_30k
train_all5_15k_6= symb5| L6train_15k
train_all5_1k_6= symb5| L6train_1k
train_all5_100_6= symb5| L6train_100
train_all5 = symb5
train_all1_5 = all_loops1_5

#merge loop 5 and the train dicts: L=5 + L=6 (rays)
test_rays=L6_raylesstest
train_all5_allrays_6= symb5 | L6rayful_30k
train_all5_15krays_6= symb5| L6rayful_15k
train_all5_1krays_6= symb5| L6rayful_1k
train_all5_100rays_6= symb5| L6rayful_100

#merge loops 1-5 and the train dicts: L=1-5 + L=6
train_all1thru5_mix_6= all_loops1_5 | L6train_even
train_all1thru5_125k_6= all_loops1_5 | L6train_125k
train_all1thru5_60k_6= all_loops1_5 | L6train_60k
train_all1thru5_30k_6= all_loops1_5 | L6train_30k
train_all1thru5_15k_6= all_loops1_5| L6train_15k
train_all1thru5_1k_6= all_loops1_5| L6train_1k
train_all1thru5_100_6= all_loops1_5| L6train_100

#merge loops 1-5 and the train dicts: L=1-5 + L=6 (rays)
train_all1thru5_allrays_6= all_loops1_5 | L6rayful_30k
train_all1thru5_15krays_6= all_loops1_5| L6rayful_15k
train_all1thru5_1krays_6= all_loops1_5| L6rayful_1k
train_all1thru5_100rays_6= all_loops1_5| L6rayful_100

In [11]:
export_hash("priming_all5L_no6L.train",train_all5)
export_hash("priming_all1_5L_no6L.train",train_all1_5)

In [32]:
export_hash("priming_6L.test",test_L6)
export_hash("priming_all5L_none6L.train",train_all5_mix_6)
export_hash("priming_all5L_125k6L.train",train_all5_125k_6)
export_hash("priming_all5L_60k6L.train",train_all5_60k_6)
export_hash("priming_all5L_30k6L.train",train_all5_30k_6)
export_hash("priming_all5L_15k6L.train",train_all5_15k_6)
export_hash("priming_all5L_1k6L.train",train_all5_1k_6)
export_hash("priming_all5L_1006L.train",train_all5_100_6)

export_hash("priming_6L_rayless.test",L6_raylesstest)
export_hash("priming_all5L_allrays6L.train",train_all5_allrays_6)
export_hash("priming_all5L_15krays6L.train",train_all5_15krays_6)
export_hash("priming_all5L_1krays6L.train",train_all5_1krays_6)
export_hash("priming_all5L_100rays6L.train",train_all5_100rays_6)

export_hash("priming_all1thru5L_even6L.train",train_all1thru5_mix_6)
export_hash("priming_all1thru5L_125k6L.train",train_all1thru5_125k_6)
export_hash("priming_all1thru5L_60k6L.train",train_all1thru5_60k_6)
export_hash("priming_all1thru5L_30k6L.train",train_all1thru5_30k_6)
export_hash("priming_all1thru5L_15k6L.train",train_all1thru5_15k_6)
export_hash("priming_all1thru5L_1k6L.train",train_all1thru5_1k_6)
export_hash("priming_all1thru5L_1006L.train",train_all1thru5_100_6)
export_hash("priming_all1thru5L.train",all_loops1_5)

export_hash("priming_all1thru5L_allrays6L.train",train_all1thru5_allrays_6)
export_hash("priming_all1thru5L_15krays6L.train",train_all1thru5_15krays_6)
export_hash("priming_all1thru5L_1krays6L.train",train_all1thru5_1krays_6)
export_hash("priming_all1thru5L_100rays6L.train",train_all1thru5_100rays_6)

In [41]:
#Observation: sometimes C[3] divides C[1]: when?
mycoeffs[1][3] 

{'aaaaabb': ['1E11', '3E13*17', '17E117*19', '2E22E2*3E3*5'],
 'aaaaabc': ['1E11', '3E13*17', '17E117*19', '2E22E2*3E3*5'],
 'aaaaacb': ['1E11', '3E13*17', '17E117*19', '2E22E2*3E3*5'],
 'aaaaacc': ['1E11', '3E13*17', '17E117*19', '2E22E2*3E3*5'],
 'aaaaaec': ['2E42E4*3', '5E25E2*19', '2E12*3*13*19', '2E32E3*5E2*7'],
 'aaaaafb': ['2E42E4*3', '5E25E2*19', '2E12*3*13*19', '2E32E3*5E2*7'],
 'aaaabab': ['0', '3E13', '2E12*11', '2E32E3*5'],
 'aaaabac': ['0', '3E13', '2E12*11', '2E32E3*5'],
 'aaaabbb': ['1E11', '2E52E5*3', '653E1653', '2E22E2*3*5*19'],
 'aaaabbc': ['1E11', '2E52E5*3', '653E1653', '2E22E2*3*5*19'],
 'aaaabbd': ['0', '2E22E2', '2E22E2*7', '7E27E2'],
 'aaaabcb': ['1E11', '2E12*3', '7E17', '2E22E2*3*5'],
 'aaaabcc': ['1E11', '2E12*3', '7E17', '2E22E2*3*5'],
 'aaaabcd': ['0', '2E22E2', '2E22E2*7', '7E27E2'],
 'aaaabdb': ['0', '3E13', '2E22E2*5', '2E12*17'],
 'aaaabdc': ['0', '3E13', '2E22E2*5', '2E12*17'],
 'aaaabfb': ['0', '5E15', '2E12*19', '2E32E3*3E2'],
 'aaaacab': ['0', '3E1

In [None]:
aaaa+?+a+?->0
aaab+?+
len(myquadrats)

In [75]:
uglypolys

{'aaaabdb': '-48*L^2+320*L-544',
 'aaaabdc': '48*L^2-320*L+544',
 'aaaacdb': '48*L^2-320*L+544',
 'aaaacdc': '-48*L^2+320*L-544',
 'aaaaecc': '-572/3*L^3+1848*L^2-16540/3*L+4752',
 'aaaafbb': '-572/3*L^3+1848*L^2-16540/3*L+4752',
 'aaabaec': '-8/3*L^3-88/3*L^2+896/3*L-1760/3',
 'aaabafb': '8/3*L^3-152/3*L^2+928/3*L-1696/3',
 'aaabbdc': '-8/3*L^3-24*L^2+728/3*L-448',
 'aaabbfb': '-8/3*L^3+152/3*L^2-928/3*L+1696/3',
 'aaabcdc': '8/3*L^3+24*L^2-728/3*L+448',
 'aaabcec': '8/3*L^3+88/3*L^2-896/3*L+1760/3',
 'aaabfbb': '-4/3*L^3+72*L^2-1700/3*L+1136',
 'aaacaec': '8/3*L^3-152/3*L^2+928/3*L-1696/3',
 'aaacafb': '-8/3*L^3-88/3*L^2+896/3*L-1760/3',
 'aaacbdb': '8/3*L^3+24*L^2-728/3*L+448',
 'aaacbfb': '8/3*L^3+88/3*L^2-896/3*L+1760/3',
 'aaaccdb': '-8/3*L^3-24*L^2+728/3*L-448',
 'aaaccec': '-8/3*L^3+152/3*L^2-928/3*L+1696/3',
 'aaacecc': '-4/3*L^3+72*L^2-1700/3*L+1136',
 'aaaeccc': '50*L^3-916/3*L^2-98*L+5848/3',
 'aaaecec': '-64*L^3+2200/3*L^2-2904*L+12128/3',
 'aaaeecc': '-58/3*L^3+1184/3*L^2

In [None]:
factorpolys={k:v for k,v in mypolys.items() if k not in uglypolys}
L4factors={k:v for k,v in factorpolys.items() if '(L-4)' in v}
otherfactors={k:v for k,v in factorpolys.items() if k not in L4factors}

In [84]:
print(len(L4factors))
print(len(otherfactors))
print(len(uglypolys))
print(len(mypolys))
badquads={k:v for k,v in myquadrats.items() if k in uglypolys}
badquads

6000
2154
1590
9744


{'aaaabdb': [-48, 320, -544],
 'aaaabdc': [48, -320, 544],
 'aaaacdb': [48, -320, 544],
 'aaaacdc': [-48, 320, -544],
 'aabdbbb': [-16, 196, -592],
 'aabdbdb': [16, -64],
 'aabdbdd': [-32, 112],
 'aabdbfb': [16, -64],
 'aabdccc': [16, -196, 592],
 'aabdcdc': [-16, 64],
 'aabdcdd': [32, -112],
 'aabdcec': [-16, 64],
 'aabddbd': [-32, 112],
 'aabddcd': [32, -112],
 'aabdddb': [-32, 112],
 'aabdddc': [32, -112],
 'aacdbbb': [16, -196, 592],
 'aacdbdb': [-16, 64],
 'aacdbdd': [32, -112],
 'aacdbfb': [-16, 64],
 'aacdccc': [-16, 196, -592],
 'aacdcdc': [16, -64],
 'aacdcdd': [-32, 112],
 'aacdcec': [16, -64],
 'aacddbd': [32, -112],
 'aacddcd': [-32, 112],
 'aacdddb': [32, -112],
 'aacdddc': [-32, 112],
 'aeafbbb': [-32, 440, -1312],
 'aeafffb': [256/3, -352, 320/3],
 'afaeccc': [-32, 440, -1312],
 'afaeeec': [256/3, -352, 320/3],
 'bbdbbbb': [-392/3, 1728, -17920/3],
 'bbdbbdb': [-184/3, 936, -7136/3],
 'bbdbbdd': [-192, 672],
 'bbdbdbb': [-184/3, 896, -6656/3],
 'bbdcccc': [152/3, -656, 6

In [65]:
#There are 200 quadratic polynomials that do not factor nicely and 1430 that do.
#There are 1590 polynomials that do not factor nicely. 200 are quadratic, and 1390 are cubic.

In [69]:
mycoeffs={}
myquadrats={}

for k,v in badquads.items():
    #print(k,v)
    this_coeffs=Poly(parse(v).expand()).all_coeffs()
    if Poly(parse(v).expand()).degree('L') < 3:
        badquads[k] = this_coeffs
        this_coeffs = [0] + this_coeffs
    badquadcoeffs[k] = this_coeffs