# NeuralMosaic
### A CNN + Cosine-Similarity-Based Photomosaic Generator
### Copyright 2018 K.D.P.Ross

This code is licensed only for study and personal enrichment.

In [None]:
import numpy as np
import tensorflow as tf
import progressbar
from progressbar import ProgressBar
import scipy.io
import scipy.misc
import imageio
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

%matplotlib inline

To have the slightest hope of running this, you will need:
- `./weightz.npz`: pretrained weights for VGG-19
- `./ImageTiles.txt`: a one-filename-per-line list of the tiles to use (along with the actual tiles (of size `tileSize`x`tileSize`) wherever it says that they'll be; probably want at least ~100k images) ⟦ N.b., things will go terribly wrong if the dimensions of the tile images is inconsistent! ⟧
- `test.jpg`: an input image for which to generate a photomosaic; best if it's a mult. of the `tileSize`
- a fair bit of RAM (with `conv4_2`, I saw peak at ~30GiB) and a half-decent GPU, especially if you use one of the earlier layers for `layerToUse` (Spoiler: 640k will *not* be enough.)

# Parameters / Preliminaries

In [None]:
# 'Knobs' that may be worth twiddling.

weightsFile    = 'weightz.npz'
filesFile      = 'ImageTiles.txt'
inImage        = 'test.png'
outImage       = 'joined.png'
tileSize       = 32
batchSize      = 256 # Scale this down if your TF b0rks when it tries to run on your GPU.
colourChannels = 3 # Things could get interesting if you change *this*!
layerToUse     = 'conv4_2' # Recommended layers to try: 'conv3_2', 'conv4_2', 'conv5_4'. (If you're low on RAM / have a crap GPU, go for '5_4', for obvious reasons.)

In [None]:
def loadList(f):
    with open(f) as reader:
        return [ x.strip() for x in reader ]

# Prepare the Model

In [None]:
# Load relevant weight / bias matrices from the pretrained
# VGG-19 model. These ought to be freely available, but
# you'll need to dump them into a Numpy file.

weightKeys = set([ 'conv1_1W', 'conv1_1b', 'conv1_2W', 'conv1_2b', 'conv2_1W', 'conv2_1b', 'conv2_2W', 'conv2_2b', 'conv3_1W', 'conv3_1b', 'conv3_2W', 'conv3_2b', 'conv3_3W', 'conv3_3b', 'conv3_4W', 'conv3_4b', 'conv4_1W', 'conv4_1b', 'conv4_2W', 'conv4_2b', 'conv4_3W', 'conv4_3b', 'conv4_4W', 'conv4_4b', 'conv5_1W', 'conv5_1b', 'conv5_2W', 'conv5_2b', 'conv5_3W', 'conv5_3b', 'conv5_4W', 'conv5_4b' ])
weights    = np.load(weightsFile)

assert weightKeys <= set(weights.keys())

In [None]:
# Const's that we couldn't compute.

strides     = [ 1, 1, 1, 1 ]
sizePool    = [ 1, 2, 2, 1 ]
stridesPool = [ 1, 2, 2, 1 ]
padSame     = 'SAME'

# We'll use two kinds of layers for the VGG model: 2d conv's
# + Relu and avg. pooling (I guess max. pooling wasn't
# popular at the time?); left off the final avg.-pool. layer
# because we'll never use it.

def convActivate(lPrev, lName):
    return tf.nn.relu(tf.nn.conv2d(lPrev, 
                                   filter  = tf.constant(weights[ lName + 'W' ]),
                                   strides = strides, 
                                   padding = padSame
                                  ) + tf.constant(weights[ lName + 'b' ].flatten())
                     )

def avgPool(lPrev):
    return tf.nn.avg_pool(lPrev,
                          ksize   = sizePool,
                          strides = stridesPool,
                          padding = padSame
                         )

Input   = tf.placeholder(shape = (None, tileSize, tileSize, colourChannels),
                                  dtype = 'float32'
                                 )
Conv1_1 = convActivate(Input, 'conv1_1')
Conv1_2 = convActivate(Conv1_1, 'conv1_2')
Pool1   = avgPool(Conv1_2)
Conv2_1 = convActivate(Pool1, 'conv2_1')
Conv2_2 = convActivate(Conv2_1, 'conv2_2')
Pool2   = avgPool(Conv2_2)
Conv3_1 = convActivate(Pool2, 'conv3_1')
Conv3_2 = convActivate(Conv3_1, 'conv3_2')
Conv3_3 = convActivate(Conv3_2, 'conv3_3')
Conv3_4 = convActivate(Conv3_3, 'conv3_4')
Pool3   = avgPool(Conv3_4)
Conv4_1 = convActivate(Pool3, 'conv4_1')
Conv4_2 = convActivate(Conv4_1, 'conv4_2')
Conv4_3 = convActivate(Conv4_2, 'conv4_3')
Conv4_4 = convActivate(Conv4_3, 'conv4_4')
Pool4   = avgPool(Conv4_4)
Conv5_1 = convActivate(Pool4, 'conv5_1')
Conv5_2 = convActivate(Conv5_1, 'conv5_2')
Conv5_3 = convActivate(Conv5_2, 'conv5_3')
Conv5_4 = convActivate(Conv5_3, 'conv5_4')

# This is somewhat clunky, but it'll let us index by name
# instead of hard-coding indices.

model = { 'input' : Input,
          'conv1_1' : Conv1_1, 'conv1_2' : Conv1_2,
          'conv2_1' : Conv2_1, 'conv2_2' : Conv2_2,
          'conv3_1' : Conv3_1, 'conv3_2' : Conv3_2, 'conv3_3' : Conv3_3, 'conv3_4' : Conv3_4,
          'conv4_1' : Conv4_1, 'conv4_2' : Conv4_2, 'conv4_3' : Conv4_3, 'conv4_4' : Conv4_4,
          'conv5_1' : Conv5_1, 'conv5_2' : Conv5_2, 'conv5_3' : Conv5_3, 'conv5_4' : Conv5_4
        }

In [None]:
# Boot up TF.

s = tf.InteractiveSession()

# Load / Preprocess Target / Input Images

In [None]:
# Load an image, copying a greyscale image across to three
# channels.

def readImage(f):
    raw = imageio.imread(f)

    if 2 == len(raw.shape): # Greyscale image; just copy 3x; not quite right, but it'll do!
        res = np.zeros((raw.shape[ 0 ], raw.shape[ 1 ], 3), dtype = raw.dtype)

        for i in range(3):
            res[ :, :, i ] = raw

        return res
    else:
        return raw

In [None]:
# Load the library of output tiles.

files = loadList(filesFile)

with ProgressBar(max_value = len(files)) as prog:
    imageMatrix = np.array([ readImage(x) for x in  prog(files) ])
        
assert (len(files), tileSize, tileSize, colourChannels) == imageMatrix.shape

In [None]:
# Generate feature vec's for the output tiles.

res = []

with ProgressBar() as prog:
    for i in prog(range(0, len(files), batchSize)):
        batch = imageMatrix[ i : i + batchSize ]
        bar   = s.run(model[ layerToUse ], feed_dict = { Input : batch })

        res.append(bar)
        
res = np.concatenate(res, axis = 0) # Smoosh the batches together.

In [None]:
# Effectively, we'll crop the image to match the tile size;
# method of cropping is utterly arbitrary; for best results,
# use an image that's a multiple of the tile size! Then,
# we'll chop up the image into (tileSize^2
# x colourChannels)-sized tiles.

inputRaw   = np.array(readImage(inImage))
(r, c, _)  = inputRaw.shape
inputTiles = []
rowWidth   = int(c / tileSize)

for i in range(int(r / tileSize)):
    for j in range(rowWidth):
        ir = i * tileSize
        jc = j * tileSize

        inputTiles.append(inputRaw[ ir : ir + tileSize, jc : jc + tileSize : ])

inputMatrix = np.array(inputTiles)

assert (len(inputTiles), tileSize, tileSize, colourChannels) == inputMatrix.shape

In [None]:
# Generate feature vec's for the input tiles.

resInput = []

with ProgressBar() as prog:
    for i in prog(range(0, len(inputTiles), batchSize)):
        batch = inputMatrix[ i : i + batchSize ]
        bar   = s.run(model[ layerToUse ], feed_dict = { Input : batch })

        resInput.append(bar)
        
resInput = np.concatenate(resInput, axis = 0)

In [None]:
# Flatten things out to
# <number-of-images>x<full-size-of-volume-per-image>;
# literally don't care about ordering, so long as it's
# consistent between output / input tiles.

lhsFlat = res.reshape((res.shape[ 0 ], np.product(res.shape[ 1 : ])))
rhsFlat = resInput.reshape((resInput.shape[ 0 ], np.product(resInput.shape[ 1 : ])))

In [None]:
# Compute cos. sim. to find 'semantically-closest' (at least
# in the vec-space projection that we have) output tile for
# each input tile; especially for gradients and
# nearly-solid-colour areas, this will, unsurprisingly,
# choose the same tile repeatedly; could be fun to, perhaps,
# take the top `n` candidates and choose randomly among
# them.

sims        = cosine_similarity(lhsFlat, rhsFlat)
tileIndices = np.argmax(sims, axis = 0)

In [None]:
# Oh, probably should have used `itertools`.

selectedTiles = [ files[ i ] for i in tileIndices ]
tiles         = []

while selectedTiles:
    chunk         = selectedTiles[ : rowWidth ]
    selectedTiles = selectedTiles[ rowWidth : ]
    _             = tiles.append(chunk)

In [None]:
# Construct the output image by jamming the tiles in the
# right places.

outX      = tileSize * len(tiles[ 0 ])
outY      = tileSize * len(tiles)
outMatrix = np.zeros((outY, outX, 3), dtype = 'uint8')
yOff      = 0

for row in tiles:
    xOff = 0

    for img in row:
        x                                                               = readImage(img)
        outMatrix[ yOff : yOff + tileSize, xOff : xOff + tileSize, : ]  = x
        xOff                                                           += tileSize

    yOff += tileSize

In [None]:
# Preview of the output.

fig = plt.figure(figsize = (10, 10))

plt.imshow(outMatrix)

In [None]:
# Save our 'masterpiece' for future generations to marvel
# at. Or something like that.

imageio.imwrite(outImage, outMatrix)

In [None]:
# Tidy up by closing the TF session.

s.close()