In [4]:
# crawled img links
with open ('./jandan-links.txt', 'r') as f:
    links = f.readlines()
links = [x[:-1] for x in links]
links = [x for x in links if x != '']

In [14]:
# image pre-process: resize, squarize, to_array
import numpy as np
import urllib
from PIL import Image, ImageOps
from io import BytesIO
import http

def url2img(url):
    try:
        response = urllib.request.urlopen(url).read()
        image = Image.open(BytesIO(response))
        # use the first frame if gif
        if '.gif' in url: 
            image.seek(0)
        # make sure the image is in RGB mode
        if image.mode != 'RGB':
            image = image.convert(mode='RGB')
        # center crop
        width, height = image.size
        new    = min(width, height)
        left   = (width - new) // 2
        top    = (height - new) // 2
        right  = (width + new) // 2
        bottom = (height + new) // 2
        image = image.crop((left, top, right, bottom))
        image = image.resize(size=(100,100))
        # now convert the image to numpy array
        array = np.array(image, dtype=np.float32)
        return array
    except http.client.IncompleteRead:
        pass
    except urllib.request.HTTPError:
        pass

In [30]:
import dask.bag as db

img_bag = db.from_sequence(links[:5000])
results = img_bag.map(url2img).compute()

In [10]:
# implementation following notebook by mchablani
# https://github.com/mchablani/deep-learning/blob/master/autoencoder/Convolutional_Autoencoder.ipynb
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('MNIST_data', validation_size=0)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [33]:
import tensorflow as tf

train = results[:4000]
validation = results[4000:]

learning_rate  = 0.01
# Input and target placeholders
inputs_ = tf.placeholder(tf.float32, (None, 100,100,3), name="input")
targets_ = tf.placeholder(tf.float32, (None, 100,100,3), name="target")

### Encoder
conv1 = tf.layers.conv2d(inputs=inputs_, filters=32, kernel_size=(10,10), padding='same', activation=tf.nn.relu)
# Now 28x28x16
maxpool1 = tf.layers.max_pooling2d(conv1, pool_size=(2,2), strides=(2,2), padding='same')
# Now 14x14x16
conv2 = tf.layers.conv2d(inputs=maxpool1, filters=8, kernel_size=(3,3), padding='same', activation=tf.nn.relu)
# Now 14x14x8
maxpool2 = tf.layers.max_pooling2d(conv2, pool_size=(2,2), strides=(2,2), padding='same')
# Now 7x7x8
conv3 = tf.layers.conv2d(inputs=maxpool2, filters=8, kernel_size=(3,3), padding='same', activation=tf.nn.relu)
# Now 7x7x8
encoded = tf.layers.max_pooling2d(conv3, pool_size=(2,2), strides=(2,2), padding='same')
# Now 4x4x8

### Decoder
upsample1 = tf.image.resize_images(encoded, size=(7,7), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# Now 7x7x8
conv4 = tf.layers.conv2d(inputs=upsample1, filters=8, kernel_size=(3,3), padding='same', activation=tf.nn.relu)
# Now 7x7x8
upsample2 = tf.image.resize_images(conv4, size=(14,14), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# Now 14x14x8
conv5 = tf.layers.conv2d(inputs=upsample2, filters=8, kernel_size=(3,3), padding='same', activation=tf.nn.relu)
# Now 14x14x8
upsample3 = tf.image.resize_images(conv5, size=(100,100), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
# Now 28x28x8
conv6 = tf.layers.conv2d(inputs=upsample3, filters=32, kernel_size=(10,10), padding='same', activation=tf.nn.relu)
# Now 28x28x16

logits = tf.layers.conv2d(inputs=conv6, filters=3, kernel_size=(10,10), padding='same', activation=None)
#Now 28x28x1

# Pass logits through sigmoid to get reconstructed image
decoded = tf.nn.sigmoid(logits)

# Pass logits through sigmoid and calculate the cross-entropy loss
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits)

# Get cost and define the optimizer
cost = tf.reduce_mean(loss)
opt = tf.train.AdamOptimizer(learning_rate).minimize(cost)

In [34]:
session = tf.Session()

In [35]:
epochs = 20
batch_size = 5
session.run(tf.global_variables_initializer())
for e in range(epochs):
    for ii in range(len(train)//batch_size - 1):
        batch = train[batch_size*ii: batch_size*(ii+1)]
        imgs = batch[0].reshape((-1, 100, 100, 3))
        batch_cost, _ = session.run([cost, opt], feed_dict={inputs_: imgs,
                                                         targets_: imgs})
        print("Epoch: {}/{}...".format(e+1, epochs),
              "Training loss: {:.4f}".format(batch_cost))

Epoch: 1/20... Training loss: -19.8318
Epoch: 1/20... Training loss: -8193970.5000
Epoch: 1/20... Training loss: -141576848.0000
Epoch: 1/20... Training loss: -584238464.0000
Epoch: 1/20... Training loss: -2187585792.0000
Epoch: 1/20... Training loss: -5083965440.0000
Epoch: 1/20... Training loss: -30152822784.0000
Epoch: 1/20... Training loss: -113656610816.0000
Epoch: 1/20... Training loss: -39158587392.0000
Epoch: 1/20... Training loss: -123788886016.0000
Epoch: 1/20... Training loss: -1065293316096.0000
Epoch: 1/20... Training loss: -2938027900928.0000
Epoch: 1/20... Training loss: -1241241223168.0000
Epoch: 1/20... Training loss: -6040435294208.0000
Epoch: 1/20... Training loss: -2711570874368.0000
Epoch: 1/20... Training loss: -29780642627584.0000
Epoch: 1/20... Training loss: -36330616651776.0000
Epoch: 1/20... Training loss: -8814105460736.0000
Epoch: 1/20... Training loss: -47033972948992.0000
Epoch: 1/20... Training loss: -24063984533504.0000
Epoch: 1/20... Training loss: -86

AttributeError: 'NoneType' object has no attribute 'reshape'

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=10, sharex=True, sharey=True, figsize=(20,4))
in_imgs = mnist.test.images[:10]
reconstructed = sess.run(decoded, feed_dict={inputs_: in_imgs.reshape((10, 28, 28, 1))})

for images, row in zip([in_imgs, reconstructed], axes):
    for img, ax in zip(images, row):
        ax.imshow(img.reshape((28, 28)), cmap='Greys_r')
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)


fig.tight_layout(pad=0.1)