In [1]:
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
from keras.datasets import mnist
from keras.callbacks import TensorBoard

import numpy as np
import matplotlib.pyplot as plt

from skimage.io import imread
from skimage.transform import resize,rescale
from os import listdir

Using TensorFlow backend.


## We'll use a dirty document dataset to get clean denoised document texts

## We have to manually build our data

In [2]:
def build_data():
    
    x_train = []
    
    for file in listdir('kdd/train'):
    
        img = imread('kdd/train/' + file)
        
        img = resize(img,(260,540),True)
        
        img = img.reshape(1,img.shape[0],img.shape[1])
    
        
        x_train.append(img)
        
    
    x_train = tuple(x_train)
    
    x_train = np.concatenate(x_train)
    
    y_train = [] 
    
    
    for file in listdir('kdd/train_cleaned'):
    
        img = imread('kdd/train_cleaned/' + file)
        
        img = resize(img,(260,540),True)
        
        y_train.append(img.reshape(1,img.shape[0],img.shape[1]))
    
    
    
    y_train = tuple(y_train)
    
    y_train = np.concatenate(y_train)
    
    x_test = []
    
    for file in listdir('kdd/test'):
    
        img = imread('kdd/test/' + file)
        
        img = resize(img,(260,540),True)
        
        x_test.append(img.reshape(1,img.shape[0],img.shape[1]))
    
    
    
    x_test = tuple(x_test)
    
    x_test = np.concatenate(x_test)
    
    return x_train,y_train,x_test

In [3]:
x_train, y_train, x_test = build_data()

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [4]:
x_train = np.reshape(x_train, (len(x_train), 260, 540, 1))
y_train = np.reshape(y_train, (len(x_train), 260, 540, 1))  
x_test = np.reshape(x_test, (len(x_test), 260, 540, 1)) 

## We have to split the dataset to train and validate

In [8]:
x_validation = x_train[121:145]
x_train = x_train[0:120]

y_validation = y_train[121:145]
y_train = y_train[0:120]

## Network hyperparameters

In [9]:
epochs = 100
input_shape = (260,540,1)
batch_size=12

## The encoder model

In [10]:
input_img = Input(shape=input_shape)
downConv = Conv2D(32,(3,3),activation='relu',padding='same')(input_img)
downConv = MaxPooling2D((2,2),padding='same')(downConv)
downConv = Conv2D(32,(3,3),activation='relu',padding='same')(downConv)

encoded = MaxPooling2D((2,2),padding='same')(downConv)

## The decoder model

In [11]:
upConv = Conv2D(32,(3,3),activation='relu',padding='same')(encoded)
upConv = UpSampling2D((2,2))(upConv)
upConv = Conv2D(32,(3,3),activation='relu',padding='same')(upConv)
upConv = UpSampling2D((2,2))(upConv)


decoded = Conv2D(1,(3,3), activation='sigmoid', padding='same')(upConv)

## The autoencoder model

In [12]:
autoencoder = Model(input_img,decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

autoencoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 260, 540, 1)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 260, 540, 32)      320       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 130, 270, 32)      0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 130, 270, 32)      9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 65, 135, 32)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 65, 135, 32)       9248      
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 130, 270, 32)      0         
__________

## Training

In [None]:
autoencoder.fit(x_train, y_train,
                epochs=epochs,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(x_validation, y_validation),
                verbose=False)