In [8]:
with open('sequences20') as handle:
    u = handle.read()

In [9]:
import json
seqs_by_length = json.loads(u)

In [10]:
seqs_by_length_copy = seqs_by_length[:]

In [11]:
import numpy as np

In [103]:
# The matrix for an EVEN LENGTH R-L sequence
def cf2matrix(a):
    u = np.eye(2, dtype=np.int64)
    for x in a:
        u = np.matmul(u, [[x, 1], [1, 0]])
    return u

# The matrix for an even or odd length sequence, where the odd case is just the "condensed form" for an even sequence
def m_rep(seq):
    if len(seq) % 2 == 0:
        return cf2matrix(seq)
    else:
        return cf2matrix(seq + seq)
    
def rotl(x, k):
    return x[k:] + x[:k]

def min_rot(x):
    return min(rotl(x, k) for k in range(len(x)))

def min_rot2(x):
    return min(rotl(x, k) for k in range(0, len(x), 2))
    
def trace2length(t):
    return 2 * np.arccosh(t/2)

def length2trace(l):
    return 2 * np.cosh(l/2)

# The sequences that we actually want, from the "condensed forms" that take primitive roots in PGL_2(Z)
def actual_sequences(a, length):
    min_trace = int(length2trace(length)) + 1
    max_trace = int(length2trace(length+1)) + 1
    result = []
    for _x in a:
        x = _x[:] # Need this to make x harmlessly mutable
        m = m_rep(x)
        t = np.trace(m)
        if t < min_trace or t >= max_trace:
            x_length = trace2length(t)
            k = int((length + 1)/x_length)
            x *= k
        # Symmetrize by both R-L flip and time reversal, and add all four
        if len(x) % 2 == 0:
            result += [x, rotl(x, 1)]
        # Expand condensed form, and then symmetrize by time reversal
        else:
            xx = x + x
            result += [xx]
    return result

def my_actual_sequences(length):
    return actual_sequences(seqs_by_length[length], length)

def reduce_and_remove_duplicates(seqs):
    u = {tuple(min_rot2(x)) for x in seqs}
    return [list(x) for x in u]
            


In [71]:
def guys_for_trace(t):
    l = int(trace2length(t))
    l_seqs = my_actual_sequences(l)
    t_seqs = [x for x in l_seqs if np.trace(cf2matrix(x)) == t]
    return reduce_and_remove_duplicates(t_seqs)
    

In [204]:
# Now we extract from each seqence the partial sequences:
completions = {}

# all ways to divide a sequence by removing two R's
# all sequences start and end with R's, but possibly zero of them
# input is list of list, output is list of pairs of lists
def Rdivisions(seq):
    result = []
    # First the ways where they're both in the same string of R's:
    for i in range(0, len(seq), 2):
        r = rotl(seq, i)
        m = r[0]
        rr = r[1:]
        if m < 2: 
            continue
        for j in range(0, m):
            for k in range(j+1, m):
                result.append(([k-j-1], [m-k-1] + rr + [j]))
    # Now all the ways where they're in different strings of R's
    for i in range(0, len(seq), 2):
        for j in range(i+2, len(seq), 2):
            mi, mj = seq[i], seq[j]
            ai = seq[i+1:j]
            
            aj = seq[j+1:] + seq[:i]
            for si in range(0, mi):
                for sj in range(0, mj):
                    result.append(([mi - si -1] + ai + [sj], [mj - sj -1] + aj + [si]))
    return result  + [(v, u) for (u, v) in result]

# takes R to R as input, gives R to R as output
# output is list
def reverse_segment(seg):
    seq = list(reversed(seg))
    if len(seq) == 1:
        return [0, seq[0], 0]
    if seq[0] > 0:
        start = [0] + seq[:1]
    else:
        start = []
    if seq[-1] > 0:
        end = seq[-1:] + [0]
    else:
        end = []
    return start + seq[1:-1] + end
    
# divisions will be a list of pairs u, v that is an Rdivision of a sequence in seqs
# completions[x] is all the ways of completing x to a sequence in seq
# divisions is list of list
# completions and reverse_completions are dictionaries
def Rdivisions_tables(seqs):
    completions = dict()
    reverse_completions = dict()
    divisions = []
    for seq in seqs:
        divisions += Rdivisions(seq)
    for (u, v) in divisions:
        uu = tuple(u)
        vv = tuple(v)
        if uu in completions:
            completions[uu].add(vv)
            reverse_completions[uu].add(tuple(reverse_segment(vv)))
        else:
            completions[uu] = {vv}
            reverse_completions[uu] = {tuple(reverse_segment(vv))}
    return divisions, completions, reverse_completions

# returns triples sigma_i such that R-(sigma_i)^{-1}-R-sigma_{i+1} are in seqs
def pants_with_boundaries(seqs):
    pants = []
    divisions, completions, reverse_completions = Rdivisions_tables(seqs)
    for u, v in divisions:
        uu = tuple(reverse_segment(u))
        vv = tuple(reverse_segment(v))
        if uu not in reverse_completions or vv not in completions:
            continue
        for w in reverse_completions[uu] & completions[vv]:
            pants.append((list(uu), v, list(w)))
    return pants

# greedily and repeatedly replace x 0 y with x+y
def remove_zeros(seq, circular = True):
    result = []
    buffer = seq[0]
    just_skipped_zero = False
    for x in seq[1:]:
        if just_skipped_zero:
            buffer += x
            just_skipped_zero = False
        elif x > 0:
            result.append(buffer)
            buffer = x
        else: # x = 0 and previous was nonzero
            just_skipped_zero = True
    if just_skipped_zero:
        if circular:
            result[0] += buffer
        else:
            result += [buffer, 0]
    else:    
        result.append(buffer)
    return result


def boundary_for_pants(pants):
    result = []
    for i in range(3):
        u = pants[(i-1) % 3]
        v = reverse_segment(pants[(i+1) % 3])
        print(u, v)
        w = [1, 0] + u + [0, 1, 0] + v + [0]
        result.append(remove_zeros(w))
    return result
        
    

In [213]:
reduce_and_remove_duplicates(my_actual_sequences(4))

[[3, 2],
 [3, 3],
 [9, 1],
 [7, 1],
 [8, 1],
 [6, 1],
 [1, 1, 2, 1],
 [1, 10],
 [1, 8],
 [2, 5],
 [1, 6],
 [2, 3],
 [1, 1, 1, 2],
 [1, 7],
 [1, 9],
 [4, 2],
 [10, 1],
 [5, 2],
 [2, 4]]

In [214]:
[cf2matrix(x).trace() for x in _]

[8, 11, 11, 9, 10, 8, 10, 12, 10, 12, 8, 8, 10, 9, 11, 10, 12, 12, 10]

In [209]:
pants_with_boundaries(_)

[([0, 1, 0], [0, 3, 0], [1, 1, 0]),
 ([0, 1, 0], [0, 3, 0], [0, 1, 1]),
 ([3], [1], [1, 1, 0]),
 ([3], [1], [0, 1, 1]),
 ([0, 2, 0], [2, 1, 0], [1]),
 ([0, 3, 0], [1, 1, 0], [0, 1, 0]),
 ([0, 3, 0], [0, 1, 1], [0, 1, 0]),
 ([0, 2, 0], [0, 1, 2], [1]),
 ([1, 2, 0], [2], [0, 1, 0]),
 ([1, 1, 0], [3], [1]),
 ([0, 1, 1], [3], [1]),
 ([0, 2, 1], [2], [0, 1, 0]),
 ([1], [1, 1, 0], [3]),
 ([0, 1, 1], [0, 1, 0], [0, 3, 0]),
 ([1, 1, 0], [0, 1, 0], [0, 3, 0]),
 ([1], [0, 1, 1], [3]),
 ([1], [0, 2, 0], [0, 1, 2]),
 ([1], [0, 2, 0], [2, 1, 0]),
 ([2], [0, 1, 0], [0, 2, 1]),
 ([2], [0, 1, 0], [1, 2, 0]),
 ([0, 1, 0], [1, 2, 0], [2]),
 ([0, 2, 0], [0, 2, 0], [0, 2, 0]),
 ([0, 1, 0], [0, 2, 1], [2]),
 ([2, 1, 0], [1], [0, 2, 0]),
 ([2], [2], [2]),
 ([0, 1, 2], [1], [0, 2, 0])]

In [210]:
boundary_for_pants(_[0])

[1, 1, 0] [3]
[0, 1, 0] [1, 1, 0]
[0, 3, 0] [1]


[[6, 1], [1, 1, 2, 1], [3, 3]]

In [86]:
# greedily and repeatedly replace x 0 y with x+y
def remove_zeros(seq):
    result = []
    buffer = seq[0]
    just_skipped_zero = False
    for x in seq[1:]:
        if just_skipped_zero:
            buffer += x
            just_skipped_zero = False
        elif x > 0:
            result.append(buffer)
            buffer = x
        else: # x = 0 and previous was nonzero
            just_skipped_zero = True
    if just_skipped_zero:
        result += [buffer, 0]
    else: 
        result.append(buffer)
    return result

def invert_R_to_R(seq):
    return remove_zeros([0] + list(reversed(seq)) + [0])

In [94]:
remove_zeros([5, 0, 0, 0, 4, 0, 8])

[17]

In [215]:
boundary_for_pants(([3], [1], [1, 1, 0]))

[1, 1, 0] [0, 1, 0]
[3] [1, 1, 0]
[1] [0, 3, 0]


[[2, 1, 1, 1], [6, 1], [3, 3]]

In [216]:
[cf2matrix(x).trace() for x in _]

[10, 8, 11]

In [74]:
seqs_by_length[5]

[[1, 1, 1, 3],
 [1, 11],
 [3, 4],
 [1, 12],
 [1, 2],
 [2, 6],
 [1, 1, 2, 2],
 [1, 13],
 [1, 14],
 [1, 1, 1, 4],
 [2, 7],
 [3, 5],
 [1, 15],
 [1, 16],
 [1, 2, 1, 3],
 [1],
 [2, 8],
 [4],
 [1, 1, 1, 5],
 [1, 17],
 [2, 9],
 [1, 1, 2, 3],
 [1, 18],
 [1, 1, 3, 2],
 [3, 6]]

In [77]:
len(pants(seqs_by_length[11]))

189022

In [78]:
len(pants(seqs_by_length[12]))

935079

In [79]:
len(pants(seqs_by_length[13]))

4903283

In [83]:
import numpy as np

In [84]:
[np.log(x) for x in [Out[77], Out[78], Out[79]]]

[12.149618689383981, 13.748386296680188, 15.40541553873106]

In [91]:
pants_by_length = [[]] * 20

In [92]:
import datetime
for i in range(1, 20):
    print(i)
    print(datetime.datetime.now())
    pants_by_length[i] = pants(seqs_by_length[i])

1
2018-04-27 10:19:44.658341
2
2018-04-27 10:19:44.658370
3
2018-04-27 10:19:44.658388
4
2018-04-27 10:19:44.658452
5
2018-04-27 10:19:44.665189
6
2018-04-27 10:19:44.706617
7
2018-04-27 10:19:44.818805
8
2018-04-27 10:19:45.204440
9
2018-04-27 10:19:45.551284
10
2018-04-27 10:19:45.705561
11
2018-04-27 10:19:46.157927
12
2018-04-27 10:19:47.963836
13
2018-04-27 10:20:09.013427
14
2018-04-27 10:20:59.646871
15
2018-04-27 10:30:25.636399


KeyboardInterrupt: 

In [98]:
[(i, np.log(len(pants_by_length[i]))-np.log(len(pants_by_length[i-1]))) for i in range(1,len(pants_by_length))]

  if __name__ == '__main__':
  if __name__ == '__main__':


[(1, nan),
 (2, nan),
 (3, nan),
 (4, nan),
 (5, inf),
 (6, 2.5649493574615367),
 (7, 2.582545119351916),
 (8, 1.6198486484519394),
 (9, 1.9331711232778828),
 (10, 1.6369286825006455),
 (11, 1.8121757583400608),
 (12, 1.5987676072962067),
 (13, 1.6570292420508714),
 (14, 1.6535614417323394),
 (15, -inf),
 (16, nan),
 (17, nan),
 (18, nan),
 (19, nan)]

In [99]:
np.exp(1.65)

5.206979827179849

In [100]:
[len(x) for x in pants_by_length]

[0,
 0,
 0,
 0,
 0,
 1,
 13,
 172,
 869,
 6006,
 30867,
 189022,
 935079,
 4903283,
 25622386,
 0,
 0,
 0,
 0,
 0]

In [101]:
[len(x) for x in seqs_by_length]

[0,
 1,
 1,
 5,
 10,
 25,
 48,
 127,
 303,
 736,
 1768,
 4497,
 11114,
 27944,
 71067,
 180094,
 460635,
 1181266,
 3036122,
 7831535]

The next cell is an attempt to speed things up with multiprocessing, but it only ends up slowing it down. I think it's because each batch has to come with the completions dictionary, which is so large that it's hard to move it around. 

In [23]:
import multiprocessing as mp

# a new version of the above for multiprocessing
def pants_for_batch(division_batch_and_completions):
    division_batch, completions = division_batch_and_completions
    pants = []
    for u, v in division_batch:
        uu = tuple(u)
        vv = tuple(reversed(v))
        if uu not in completions or vv not in completions:
            continue
        for w in completions[uu] & completions[vv]:
            pants.append((list(reversed(u)), v, list(w)))
    return pants

# divides a list l into batches (where the last one has size at most batch_size)
def batches(l, batch_size):
    result = []
    for i in range(0, len(l), batch_size):
        result.append(l[i: i+ batch_size])
    return result

def division_batches_and_completions(seqs, batch_size = None, num_batches = 100):
    divisions, completions = Rdivisions_tables(seqs)
    if not batch_size:
        batch_size = 1 + len(divisions)//num_batches
    division_batches = batches(divisions, batch_size)
    return division_batches, completions
    
def pants_mp(seqs, pool_size):
    num_batches = pool_size
    division_batches, completions = division_batches_and_completions(seqs, num_batches=num_batches)
    batches_with_completions = [(batch, completions) for batch in division_batches]
    with mp.Pool(pool_size) as p:
        prelim_results = p.map_async(pants_for_batch, batches_with_completions)
        pants_arrays = prelim_results.get()
    pants = [pant for pant_array in pants_arrays for pant in pant_array]
    return pants

In [21]:
import datetime
def start_timing():
    global _jk_timer
    _jk_timer = datetime.datetime.now()
    
def end_timing():
    global _jk_timer
    delta = datetime.datetime.now() - _jk_timer
    seconds = delta.seconds
    minutes = (seconds % 3600)// 60
    hours = seconds // 3600
    seconds = seconds % 60
    print("{} days, {} hours, {} minutes, {} seconds, {} microseconds".format(
          delta.days, hours, minutes, seconds, delta.microseconds))

In [25]:
start_timing()
print(len(pants(seqs_by_length[13])))
end_timing()

4903283
0 days, 0 hours, 0 minutes, 47 seconds, 921454 microseconds


In [26]:
start_timing()
print(len(pants_mp(seqs_by_length[13], 8)))
end_timing()

4903283
0 days, 0 hours, 4 minutes, 54 seconds, 787668 microseconds
