In [None]:
#### Cellular Automata

# import dataset
def importCellAuto():
    gridSeqs = []
    with open("../Cellular Automata/conway_samples.txt") as f:
        l = f.read()
        cell_seq = l.split('\n---\n')
        print(f"# Cell Sequences: {len(cell_seq)}")
        
        #save to return later
        maxgridlen = 0
        maxseqlen = 0
        
        #parse the grid sets in each sequence
        for seq in cell_seq:
            gridSets = seq.split("\n\n")
            ind_grids = []
            
            if len(gridSets) > maxseqlen:
                maxseqlen = len(gridSets)
            
            for g in gridSets:
                gRow = g.split("\n")
                #turn into 2d int array
                if len(gRow) > 0:
                    ind_grids.append([[int(c) for c in r] for r in gRow])
                
                #check for max grid length
                if len(gRow) > maxgridlen:
                    maxgridlen = len(gRow)
                    
            ind_grids = ind_grids[:-1]  #remove last one (empty)
            gridSeqs.append(ind_grids)
            
    return gridSeqs, maxseqlen, maxgridlen

ca_seqs, msl, mgl = importCellAuto()

#show first sample
print(f"{len(ca_seqs[0])} x {len(ca_seqs[0][0])}")
print(ca_seqs[0])

# pad to grids that are multiples of 4
def padGridSeqs(grid_seq, multiple):
    pgs = []
    l = len(grid_seq[0])
    p = int((multiple-(l%multiple))/2)
    xtra = (1 if (p*2+l) < multiple else 0)
    #print(f"before:{l} | after:+{p} | extra: {xtra}")
    for g in grid_seq:
        pgs.append(np.pad(g,(p,p+xtra),'constant',constant_values=(2)))
    return pgs

ex = [[[0,1,0],[0,1,1],[1,1,0]]]
pex = padGridSeqs(ex,8)
print(pex)

#separate grid into nxn submatrices (should be evenly divisible)
def segGrid(g,n):
    gi = np.array(g)
    s = []
    h = int(len(gi)/n)
    w = int(len(gi[0])/n)
    
    for wi in range(w):
        for hi in range(h):
            seg = gi[wi*n:(wi+1)*n, hi*n:(hi+1)*n]
            s.append(seg)
    return s

#return nxn segments of the matrix sequences with a library
def segSeq(s,n):
    seqs = []
    lib = {}
    gct = 0      #counter for the subgrid reps
    for si in s:
        cur_seq = []
        #iterate over every full grid in the sequence
        for g in s:
            sg = segGrid(g,n)   #segment the grid
            gk = []
            for i in sg:
                subgrid_key = str(i)
                #add subgrid to the library or use its key from the sequence
                if subgrid_key not in lib:
                    lib[subgrid_key] = gct
                    gct += 1
                    
                gk.append(lib[subgrid_key])
            cur_seq.append(gk)
        seqs.append(cur_seq)
        
    return lib, seqs

test = [[[0,1,0],[0,1,1],[1,1,0]], [[0,1,1],[0,1,1],[1,1,1]], [[0,0,0],[0,0,0],[1,1,0]]]
pad_test = padGridSeqs(test,4)
print(pad_test)
l, s = segSeq(pad_test, 4)
print(l)
print(s)

#### Autoencoding [FAILED]

# pad input grids for encoding
max_grid_size = mgl
print(f"Max Grid Size: {max_grid_size}")

#pads a binary grid sequence with 2's to the max length l to form square grid
def padGridSeq(grid_seq, sql):
    pgs = []
    for g in grid_seq:
        padrow = [2]*sql
        pg = [x+([2]*(sql-len(x))) for x in g]   #pad columns
        for i in range(sql-len(pg)):                    #pad rows
            pg.append(padrow)
        pgs.append(pg)
    return pgs

#test function
#ex = [[[0,1,0,1],[0,1,1,1],[1,1,0,0],[0,0,0,1]]]
#print(padGridSeq(ex,7))

pad_ca_seqs = np.array(list(map(lambda x: padGridSeq(x,max_grid_size), ca_seqs)))

print(ca_seqs[3])
print(pad_ca_seqs[3])

#make train and test data
x_train = []
x_test = []

for s in pad_ca_seqs[:-50]:
    x_train += [np.array(g).flatten() for g in s]
for s in pad_ca_seqs[-50:]:
    x_test += [np.array(g).flatten() for g in s]

assert len(x_train[0]) == 400

x_train = np.array(x_train)
x_test = np.array(x_test)

print(x_train.shape)
print(x_test.shape)

# encode grids using simple autoencoder
# tutorial from https://blog.keras.io/building-autoencoders-in-keras.html
import keras
from keras import layers

#setup layers
input_img = layers.Input(shape=(400,))
encoded = layers.Dense(256, activation='relu')(input_img)
encoded = layers.Dense(128, activation='relu')(encoded)
encoded = layers.Dense(64, activation='relu')(encoded)
#encoded = layers.Dense(32, activation='relu')(encoded)

#decoded = layers.Dense(64, activation='relu')(encoded)
decoded = layers.Dense(128, activation='relu')(encoded)
decoded = layers.Dense(256, activation='relu')(decoded)
decoded = layers.Dense(400, activation='sigmoid')(decoded)

#build and train ae
autoencoder = keras.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

encoder = keras.Model(input_img,encoded)

autoencoder.fit(x_train, x_train,
                epochs=100,
                batch_size=16,
                shuffle=True,
                validation_data=(x_test, x_test))

# Use Matplotlib (don't ask)
import matplotlib.pyplot as plt

decoded_imgs = autoencoder.predict(x_test[:10])

n = 10  # How many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
    ogs = 10
    
    # Display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(max_grid_size, max_grid_size)[:ogs, :ogs], cmap='binary')
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # Display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(max_grid_size, max_grid_size)[:ogs, :ogs], cmap='binary')
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()