# LSTM Model

In [1]:
import numpy as np
from tensorflow.keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Dropout, RepeatVector, concatenate, Embedding, LSTM
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.sequence import pad_sequences






In [2]:
from tensorflow.keras.utils import to_categorical


In [3]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from os import listdir
from PIL import Image


In [5]:
# import the necessary libraries
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text
def load_data(data_dir):
    text = []
    images=[]
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    images = []
    texts = []
    for filename in (all_filenames):
        if str(filename).endswith("png"):
            # Load the images already prepared in arrays
            image = Image.open(data_dir+"/"+filename)
            image = image.resize((256,256)).convert('RGB')  
            images.append(np.array(image).astype('float16')/255.0)
        elif str(filename).endswith(".gui"):
            syntax = '<START> ' + load_doc(data_dir+"/"+filename) + ' <END>'
            # Separate all the words with a single space
            syntax = ' '.join(syntax.split())
            # Add a space after each comma
            syntax = syntax.replace(',', ' ,')
            texts.append(syntax)
        else:
            print(f"File Ignored: {filename}")
    images = np.array(images, dtype=float)
    return images, texts

In [6]:
train_features,sequences = load_data("../../github/pix2code/datasets/pix2code_datasets/web/all_data/")

In [8]:
train_features.shape,len(sequences)


((1742, 256, 256, 3), 1742)

In [9]:
Tokenizr= Tokenizer(filters='', split=" ", lower=False)
Tokenizr.fit_on_texts([load_doc('resources/bootstrap.vocab')])
#  add a one spot for the empty word in the vocabulary
vocab_size = len(Tokenizr.word_index) + 1


In [11]:
train_sequences = Tokenizr.texts_to_sequences(sequences)
max_sequence = max(len(s) for s in train_sequences)

max_length = 48 
# why max length is 48, but max_sequence is 117?
# max_sequence is the maximum length of the sequence in the dataset, while max_length is the length of the sequence that we want to generate.
# so basically we are lower the sequences size ?
# yes, we are lowering the sequence size to the length of the longest sequence in the dataset.
# why do we need to lower the sequence size?
# The reason we are lowering the sequence size is because the model can't handle sequences of arbitrary length.
# okay Copilot ,cool

X, y, image_data = list(), list(), list()
for img_no, seq in enumerate(train_sequences):
    for i in range(1, len(seq)):
        in_seq, out_seq = seq[:i], seq[i]
        in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
        out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
        X.append(in_seq)
        y.append(out_seq)
        image_data.append(train_features[img_no])


In [19]:
# change the float 64 to lower as this causing the memory issue
# /fix MemoryError: Unable to allocate 173. GiB for an array with shape (118259, 256, 256, 3) and data type float64

# X = np.array(X).astype('float32')
# y = np.array(y).astype('float32')
# image_data = np.array(image_data).astype('float16')

X_train, X_test, y_train, y_test, image_train, image_test = train_test_split(X, y, image_data, test_size=0.1, random_state=42)


In [42]:
max_sequence

117

# GRU

In [21]:
import numpy as np
from tensorflow.keras.models import Sequential, Model
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Dropout, RepeatVector, concatenate, Embedding, GRU
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from os import listdir
from PIL import Image


In [None]:

# Function to load the document
def load_doc(filename):
    file = open(filename, 'r')
    text = file.read()
    file.close()
    return text

def load_data(data_dir):
    text = []
    images = []
    # Load all the files and order them
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    for filename in (all_filenames):
        if str(filename).endswith("png"):
            # Load the images already prepared in arrays
            image = Image.open(data_dir+"/"+filename)
            image = image.resize((256,256)).convert('RGB')  
            images.append(np.array(image))
        elif filename.endswith(".gui"):
            syntax = '<START> ' + load_doc(data_dir+"/"+filename) + ' <END>'
            # Separate all the words with a single space
            syntax = ' '.join(syntax.split())
            # Add a space after each comma
            syntax = syntax.replace(',', ' ,')
            text.append(syntax)
        else:
            print(f"Ignoring file: {filename}")
    images = np.array(images, dtype=float)
    return images, text

def data_generator(data_dir, tokenizer, max_sequence, max_length, vocab_size, batch_size=32):
    # Load all the files and order them
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    while True:
        images = []
        texts = []
        for filename in (all_filenames):
            if str(filename).endswith("png"):
                # Load the images already prepared in arrays
                image = Image.open(data_dir+"/"+filename)
                image = image.resize((256,256)).convert('RGB')  
                images.append(np.array(image))
                if len(images) == batch_size:
                    yield preprocess_data(images, texts, tokenizer, max_sequence, max_length, vocab_size)
                    images = []
                    texts = []
            elif filename.endswith(".gui"):
                syntax = '<START> ' + load_doc(data_dir+"/"+filename) + ' <END>'
                # Separate all the words with a single space
                syntax = ' '.join(syntax.split())
                # Add a space after each comma
                syntax = syntax.replace(',', ' ,')
                texts.append(syntax)
        
        if images:
            yield preprocess_data(images, texts, tokenizer, max_sequence, max_length, vocab_size)

def preprocess_data(images, texts, tokenizer, max_sequence, max_length, vocab_size):
    X, y, image_data = list(), list(), list()
    train_sequences = tokenizer.texts_to_sequences(texts)
    for img, seq in zip(images, train_sequences):
        for i in range(1, len(seq)):
            # Add the sentence until the current count(i) and add the current count to the output
            in_seq, out_seq = seq[:i], seq[i]
            # Pad all the input token sentences to max_sequence
            in_seq = pad_sequences([in_seq], maxlen=max_sequence)[0]
            # Turn the output into one-hot encoding
            out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
            # Add the corresponding image to the bootstrap token file
            image_data.append(img)
            # Cap the input sentence to 48 tokens and add it
            X.append(in_seq[-48:])
            y.append(out_seq)
    return np.array(X), np.array(y), np.array(image_data)

dir_name = '../../github/pix2code/datasets/pix2code_datasets/web/all_data'


In [5]:

# Load the data
train_features, texts = load_data(dir_name)
tokenizer = Tokenizer(filters='', split=" ", lower=False)
train_generator = data_generator(dir_name, tokenizer, max_sequence, max_length, vocab_size)

# Initialize the function to create the vocabulary 
tokenizer.fit_on_texts(texts)

# The longest set of bootstrap tokens
max_sequence = 50

# Specify how many tokens to have in each input sentence
max_length = 48

# Initialize the function to create the vocabulary 
train_sequences = tokenizer.texts_to_sequences(texts)

# Add one spot for the empty word in the vocabulary 
vocab_size = len(tokenizer.word_index) + 1

# Split the data into training and validation sets
train_features, train_feature_val, text_train, text_val = train_test_split(train_features, train_sequences, test_size=0.2, random_state=42)


In [8]:
train_features.shape

(1393, 256, 256, 3)

In [22]:

# Define your model architecture
image_model = Sequential()
image_model.add(Conv2D(16, (3, 3), padding='valid', activation='relu', input_shape=(256, 256, 3,)))
image_model.add(Conv2D(16, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(32, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same'))
image_model.add(Conv2D(64, (3,3), activation='relu', padding='same', strides=2))
image_model.add(Conv2D(128, (3,3), activation='relu', padding='same'))

image_model.add(Flatten())
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))
image_model.add(Dense(1024, activation='relu'))
image_model.add(Dropout(0.3))

image_model.add(RepeatVector(max_length))

visual_input = Input(shape=(256, 256, 3,))
encoded_image = image_model(visual_input)

language_input = Input(shape=(max_length,))
language_model = Embedding(vocab_size, 50, input_length=max_length, mask_zero=True)(language_input)
language_model = GRU(128, return_sequences=True)(language_model)
language_model = GRU(128, return_sequences=True)(language_model)

decoder = concatenate([encoded_image, language_model])
decoder = GRU(512, return_sequences=True)(decoder)
decoder = GRU(512, return_sequences=False)(decoder)
decoder = Dense(vocab_size, activation='softmax')(decoder)

model = Model(inputs=[visual_input, language_input], outputs=decoder)
optimizer = RMSprop(learning_rate=0.0001, clipvalue=1.0)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])


In [23]:

# Save the model for every 2nd epoch
filepath="/weights/org-weights-epoch-{epoch:04d}--val_loss-{val_loss:.4f}--loss-{loss:.4f}.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_weights_only=True, save_freq=2)
callbacks_list = [checkpoint]


In [24]:
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 48)]                 0         []                            
                                                                                                  
 embedding_1 (Embedding)     (None, 48, 50)               900       ['input_4[0][0]']             
                                                                                                  
 input_3 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 gru (GRU)                   (None, 48, 128)              69120     ['embedding_1[0][0]']         
                                                                                              

In [15]:
train_generator

<generator object data_generator at 0x000002948C87FCA0>

In [26]:

# Train the model
history = model.fit([image_data, X], y, batch_size=35, shuffle=False, validation_split=0.1, callbacks=callbacks_list, verbose=1, epochs=50)
# Save the final model
model.save('models/org_model.h5')


: 

In [None]:
# !shutdown.exe /r /t 0