In [1]:
from os import listdir
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from nltk.translate.bleu_score import corpus_bleu
import numpy as np
from keras.models import load_model, model_from_json
import json


ModuleNotFoundError: No module named 'keras'

In [None]:
dataset = 'C:/Users/User-PC/Mubs/Datascience/mlab/aws/650_1/'
weights = "app/data/model/weights/org-weights-epoch-0060--loss-0.0261.hdf5"
model_json = 'app/data/model/model.json'
bootstrap_vocab = 'app/data/model/bootstrap.vocab'
web_dsl_mapping = "app/data/model/web-dsl-mapping.json"

# Read a file and return a string
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

def load_data(data_dir):
    text = []
    images = []
    # Load all the files and order them
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    for filename in (all_filenames)[-6:]:
        if filename[-3:] == "npz":
            # Load the images already prepared in arrays
            image = np.load(data_dir+filename)
            images.append(image['features'])
        else:
            # Load the boostrap tokens and rap them in a start and end tag
            syntax = '<START> ' + load_doc(data_dir+filename) + ' <END>'
            # Seperate all the words with a single space
            syntax = ' '.join(syntax.split())
            # Add a space after each comma
            # syntax = syntax.replace(',', ' ,')
            text.append(syntax)
    images = np.array(images, dtype=float)
    return images, text

In [None]:
# Initialize the function to create the vocabulary 
tokenizer = Tokenizer(filters='', split=" ", lower=False)
# Create the vocabulary 
tokenizer.fit_on_texts([load_doc(bootstrap_vocab)])

In [None]:
dir_name = dataset
train_features, texts = load_data(dir_name)

In [None]:
 # load model and weights
json_file = open(model_json, 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(weights)
print("Loaded model from disk")

In [None]:
# map an integer to a word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [None]:
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
    photo = np.array([photo])
    # seed the generation process
    in_text = '<START> '
    # iterate over the whole length of the sequence
    print('\nPrediction---->\n\n<START> ', end='')
    for i in range(750):
        # integer encode input sequence
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        # pad input
        sequence = pad_sequences([sequence], maxlen=max_length)
        # predict next word
        yhat = model.predict([photo, sequence], verbose=0)
        # convert probability to integer
        yhat = np.argmax(yhat)
        # map integer to word
        word = word_for_id(yhat, tokenizer)
        # stop if we cannot map the word
        if word is None:
            break
        # append as input for generating the next word
        in_text += word + ' '
        # stop if we predict the end of the sequence
        print(word + ' ', end='')
        if word == '<END>':
            break
    return in_text

In [None]:
max_length = 48 
# evaluate the skill of the model
def evaluate_model(model, descriptions, photos, tokenizer, max_length):
    actual, predicted = list(), list()
    # step over the whole set
    for i in range(len(texts)):
        yhat = generate_desc(loaded_model, tokenizer, photos[i], max_length)
        # store actual and predicted
        print('\n\nReal---->\n\n' + texts[i])
        actual.append([texts[i].split()])
        predicted.append(yhat.split())
    # calculate BLEU score
    bleu = corpus_bleu(actual, predicted)
    return bleu, actual, predicted

bleu, actual, predicted = evaluate_model(loaded_model, texts, train_features, tokenizer, max_length)

In [None]:
#Compile the tokens into HTML and css
dsl_path = web_dsl_mapping
class Node:
    def __init__(self, key, parent_node, content_holder):
        self.key = key
        if (self.key) == "":
            self.key = "line"
            print("Empty token found")
        self.parent = parent_node
        self.children = []
        self.content_holder = content_holder

    def add_child(self, child):
        self.children.append(child)
        # if "{}" in child:
        #     print("has parentheses")
        # else:
        #     print("no parantheses")

    def show(self):
        for child in self.children:
            child.show()

    def render(self, mapping, rendering_function=None):
        content = "body"
        for child in self.children:
            placeholder = child.render(mapping, rendering_function)
            # else add none
            if placeholder is None:
                self = None
                return
            else:
                content += placeholder

        # print("self.key:", self.key)
        value = mapping.get(self.key, None)
        # print("Value mapping.get(self.key, None)", value)
        if value is None:
            self = None
            return None
        value = rendering_function(self.key, value)
        # print("value: \n", value)

        if len(self.children) != 0:
            value = value.replace(self.content_holder, content)
            # print("value rendering_function: \n", value)
        return value

class Utils:
    @staticmethod
    def get_random_text(length_text=10, space_number=1, with_upper_case=True):
        results = []
        while len(results) < length_text:
            char = random.choice(string.ascii_letters[:26])
            results.append(char)
        if with_upper_case:
            results[0] = results[0].upper()

        current_spaces = []
        while len(current_spaces) < space_number:
            space_pos = random.randint(2, length_text - 3)
            if space_pos in current_spaces:
                break
            results[space_pos] = " "
            if with_upper_case:
                results[space_pos + 1] = results[space_pos - 1].upper()

            current_spaces.append(space_pos)

        return ''.join(results)

    @staticmethod
    def get_ios_id(length=10):
        results = []

        while len(results) < length:
            char = random.choice(string.digits + string.ascii_letters)
            results.append(char)

        results[3] = "-"
        results[6] = "-"

        return ''.join(results)

    @staticmethod
    def get_android_id(length=10):
        results = []

        while len(results) < length:
            char = random.choice(string.ascii_letters)
            results.append(char)

        return ''.join(results)

def render_content_with_text(key, value):
    if FILL_WITH_RANDOM_TEXT:
        if key.find("button") != -1:
            value = value.replace(TEXT_PLACE_HOLDER, Utils.get_random_text())
        elif key.find("title") != -1:
            value = value.replace(TEXT_PLACE_HOLDER, Utils.get_random_text(length_text=5, space_number=0))
        elif key.find("text") != -1:
            value = value.replace(TEXT_PLACE_HOLDER,
                                  Utils.get_random_text(length_text=56, space_number=7, with_upper_case=False))
    return value
        
class Compiler:
    def __init__(self, dsl_mapping_file_path):
        with open(dsl_mapping_file_path) as data_file:
            self.dsl_mapping = json.load(data_file)

        self.opening_tag = self.dsl_mapping["opening-tag"]
        self.closing_tag = self.dsl_mapping["closing-tag"]
        self.content_holder = self.opening_tag + self.closing_tag

        self.root = Node("", None, self.content_holder)

    def compile(self, tokens, output_file_path):
        dsl_file = tokens
        # Parse fix
        dsl_file = dsl_file[1:-1]
        dsl_file = ' '.join(dsl_file)
        dsl_file = dsl_file.replace('{', '{888').replace('}', '888}888')
        dsl_file = dsl_file.replace(' ', '')
        dsl_file = dsl_file.replace(',', '888')
        dsl_file = dsl_file.split('888')
        # print("dsl_file", dsl_file)
        dsl_file = list(filter(None, dsl_file))
        # End Parse fix
        current_parent = self.root
        for token in dsl_file:
            token = token.replace(" ", "").replace("\n", "")

            if token.find(self.opening_tag) != -1:
                token = token.replace(self.opening_tag, "")
                # print("token", token)
                element = Node(token, current_parent, self.content_holder)
                # print("element", element)
                current_parent.add_child(element)
                current_parent = element

            elif token.find(self.closing_tag) != -1:
                current_parent = current_parent.parent
            else:
                tokens = token.split(",")
                for t in tokens:
                    element = Node(t, current_parent, self.content_holder)
                    current_parent.add_child(element)

        # print('self.root.render(self.dsl_mapping', self.root.render(self.dsl_mapping))
        output_html = self.root.render(self.dsl_mapping, rendering_function=render_content_with_text)

        # print("output_html:", output_html)

        if output_html is None:
            return "Parsing Error"

        print("Compiling website")

        with open(output_file_path, 'w') as output_file:
            output_file.write(output_html)
        return output_html

FILL_WITH_RANDOM_TEXT = True
TEXT_PLACE_HOLDER = "[]"


In [None]:

compiler = Compiler(dsl_path)
compiled_website = compiler.compile(predicted[0], 'index.html')
print(compiled_website )

In [None]:
print(bleu)