In [35]:
from os import listdir
from keras.models import model_from_json
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm
import numpy as np
import h5py as h5py
from compiler.classes.Compiler import *
import shutil
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "0"  # 限制只使用GPU 0

In [36]:
# Read a file and return a string
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text


def get_preprocessed_img(img_path, image_size):
        import cv2
        img = cv2.imread(img_path)
        img = cv2.resize(img, (image_size, image_size))
        img = img.astype('float32')
        img /= 255
        return img


def load_data(data_dir):
    text = []
    images = []
    # Load all the files and order them
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    print(all_filenames)
    for filename in (all_filenames)[0:]:
        if filename.find(".png") != -1:
            img = get_preprocessed_img("{}/{}".format(data_dir, filename), 256)
            # file_name = filename[:filename.find(".png")]
            # np.savez_compressed("{}/{}".format(data_dir, file_name), features=img)
            # retrieve = np.load("{}/{}.npz".format(data_dir, file_name))["features"]
            # assert np.array_equal(img, retrieve)
            # shutil.copyfile("{}/{}.gui".format(data_dir, file_name), "{}/{}.gui".format(output_path, file_name))
            images.append(img)
        elif filename[-3:] == "npz":
            # Load the images already prepared in arrays
            image = np.load(data_dir+filename)
            images.append(image['features'])
        else:
            # Load the boostrap tokens and rap them in a start and end tag
            syntax = '<START> ' + load_doc(data_dir+filename) + ' <END>'
            # Seperate all the words with a single space
            syntax = ' '.join(syntax.split())
            # Add a space after each comma
            syntax = syntax.replace(',', ' ,')
            text.append(syntax)
    images = np.array(images, dtype=float)
    return images, text

In [37]:
# Initialize the function to create the vocabulary 
tokenizer = Tokenizer(filters='', split=" ", lower=False)
# Create the vocabulary 
tokenizer.fit_on_texts([load_doc('resources/bootstrap.vocab')])

In [38]:
dir_name = 'resources/testdata/'
train_features, texts = load_data(dir_name)

['00150311-A7AE-4804-8B7D-9273687B4FC0.gui', '00150311-A7AE-4804-8B7D-9273687B4FC0.npz', '00190F39-0DE9-47EB-B0C2-856FDD3ACE62.gui', '00190F39-0DE9-47EB-B0C2-856FDD3ACE62.npz', '00CDC9A8-3D73-4291-90EF-49178E408797.gui', '00CDC9A8-3D73-4291-90EF-49178E408797.npz', '020BDFEE-DA74-4EB0-BF6A-5D29C371983E.gui', '020BDFEE-DA74-4EB0-BF6A-5D29C371983E.npz', '022B62A5-5FFC-4CC9-B10F-0EF1E3E04367.gui', '022B62A5-5FFC-4CC9-B10F-0EF1E3E04367.npz', '02AC4408-AC50-4ADE-9AB0-10479A69987F.gui', '02AC4408-AC50-4ADE-9AB0-10479A69987F.npz', '03428AC1-EC38-424D-97E2-D063F1578F4F.gui', '03428AC1-EC38-424D-97E2-D063F1578F4F.npz', '03D5D298-8F87-4DED-AF85-7E7CC2D40924.gui', '03D5D298-8F87-4DED-AF85-7E7CC2D40924.npz', '0566F61B-9B69-4A36-B868-3BDB4FB19CB8.gui', '0566F61B-9B69-4A36-B868-3BDB4FB19CB8.npz', '05E08DEE-21EA-4612-9424-17B52F6D2DC0.gui', '05E08DEE-21EA-4612-9424-17B52F6D2DC0.npz', '076292E2-ABB7-401F-9C93-B72EE17CF54E.gui', '076292E2-ABB7-401F-9C93-B72EE17CF54E.npz', '097BCAB5-B471-48F7-A32A-591A9A

In [39]:
#load model and weights 
json_file = open('../../../emilwallner-datasets-imagetocode-2/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("../../../emilwallner-datasets-imagetocode-2/weights.h5")
print("Loaded model from disk")

Loaded model from disk


In [40]:
# map an integer to a word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None
print(word_for_id(17, tokenizer))

single


In [41]:
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
    photo = np.array([photo])
    # seed the generation process
    in_text = '<START> '
    # iterate over the whole length of the sequence
    print('\nPrediction---->\n\n<START> ', end='')
    for i in range(150):
        # integer encode input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        # pad input
        sequence = pad_sequences([sequence], maxlen=max_length)
        # predict next word
        yhat = loaded_model.predict([photo, sequence], verbose=0)
        # convert probability to integer
        yhat = np.argmax(yhat)
        # map integer to word
        word = word_for_id(yhat, tokenizer)
        # stop if we cannot map the word
        if word is None:
            break
        # append as input for generating the next word
        in_text += word + ' '
        # stop if we predict the end of the sequence
        print(word + ' ', end='')
        if word == '<END>':
            break
    return in_text

In [42]:
max_length = 48 
# evaluate the skill of the model
def evaluate_model(model, descriptions, photos, tokenizer, max_length):
    actual, predicted = list(), list()
    # step over the whole set
    print(len(texts))
    for i in range(len(texts)):
        yhat = generate_desc(model, tokenizer, photos[i], max_length)
        # store actual and predicted
        print('\n\nReal---->\n\n' + texts[i])
        actual.append([texts[i].split()])
        predicted.append(yhat.split())
    # calculate BLEU score
    bleu = corpus_bleu(actual, predicted)
    return bleu, actual, predicted

bleu, actual, predicted = evaluate_model(loaded_model, texts, train_features, tokenizer, max_length)

1

Prediction---->

<START> header { btn-active , btn-inactive , btn-inactive , btn-inactive } row { double { small-title , text , btn-green } double { small-title , text , btn-green } } row { single { small-title , text , btn-orange } } row { quadruple { small-title , text , btn-red } quadruple { small-title , text , btn-red } quadruple { small-title , text , btn-red } quadruple { small-title , text , btn-orange } } <END> 

Real---->

<START> header { btn-active , btn-inactive , btn-inactive , btn-inactive } row { double { small-title , text , btn-green } double { small-title , text , btn-green } } row { single { small-title , text , btn-green } } row { quadruple { small-title , text , btn-orange } quadruple { small-title , text , btn-orange } quadruple { small-title , text , btn-orange } quadruple { small-title , text , btn-red } } <END>


In [43]:
#Compile the tokens into HTML and css
dsl_path = "compiler/assets/web-dsl-mapping.json"
compiler = Compiler(dsl_path)
compiled_website = compiler.compile(predicted[0], 'test0.html')

In [44]:
print(compiled_website )

<html>
  <header>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css" integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp" crossorigin="anonymous">
<style>
.header{margin:20px 0}nav ul.nav-pills li{background-color:#333;border-radius:4px;margin-right:10px}.col-lg-3{width:24%;margin-right:1.333333%}.col-lg-6{width:49%;margin-right:2%}.col-lg-12,.col-lg-3,.col-lg-6{margin-bottom:20px;border-radius:6px;background-color:#f5f5f5;padding:20px}.row .col-lg-3:last-child,.row .col-lg-6:last-child{margin-right:0}footer{padding:20px 0;text-align:center;border-top:1px solid #bbb}
</style>
    <title>Scaffol

In [45]:
print(bleu)

0.9007886622213419
