# Language Generation with Various Neural Network Architectures

In [1]:
import os
from pathlib import Path
import json
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, preprocessing

import matplotlib.pyplot as plt
import numpy as np

2022-05-14 08:32:55.412845: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-14 08:32:55.412869: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Load Data

In [None]:
!pwd

In [2]:
dataset_raw_dir = "../../data/raw/SciFi_Stories_Text/"
dataset_raw_path = dataset_raw_dir + "internet_archive_scifi_v3.txt"


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [3]:
!ls $dataset_raw_dir

internet_archive_scifi_v3.txt


# Hyperparameters

In [4]:
session_name = "test_1"

batch_size = 1024
seed = 42

vocabulary_size = 10_000

embedding_sizes = [16, 32, 64, 128]
sequence_lengths = [16, 32, 64, 128]

hidden_layer_neurons = [[16, 32, 64, 128], [16, 32, 64, 128, 256]]
hidden_layer_activations = ["sigmoid", "relu", "tanh"]

epochs = 10

In [5]:
dataset = preprocessing.text_dataset_from_directory(
    dataset_raw_dir,
    labels=None,
    batch_size=batch_size,
    seed=seed
)

Found 1 files belonging to 1 classes.


2022-05-14 08:33:08.526761: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-14 08:33:08.527139: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-14 08:33:08.527204: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-05-14 08:33:08.527259: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-05-14 08:33:08.527307: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [6]:
encoder = layers.TextVectorization(
    max_tokens=vocabulary_size,
    standardize="lower_and_strip_punctuation",
    split="whitespace",
    output_mode="int",
)

encoder.adapt(dataset)

2022-05-14 08:33:16.392638: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 420938160 exceeds 10% of free system memory.
2022-05-14 08:33:16.392975: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 631407240 exceeds 10% of free system memory.
2022-05-14 08:33:17.275882: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 210469080 exceeds 10% of free system memory.


In [7]:
vocabulary = encoder.get_vocabulary()

In [8]:
print(len(vocabulary))
print(vocabulary[:30])

10000
['', '[UNK]', 'the', 'and', 'of', 'to', 'a', 'he', 'in', 'i', 'it', 'was', 'that', 'you', 'his', 'had', 'for', 'on', 'with', 'but', 'as', 'at', 'is', 'be', 'said', 'they', 'not', 'have', 'were', 'him']


### Define Model

In [13]:
def create_model(
    embedding_size,
    sequence_length,
    hidden_layer_neuron_1,
    hidden_layer_neuron_2,
):
    model = models.Sequential()
    
    model.add(encoder)
    model.add(layers.LSTM(hidden_layer_neuron_1, return_sequences=True))
    model.add(layers.LSTM(hidden_layer_neuron_2))
    model.add(layers.Dense(vocabulary_size, activation="softmax"))
    
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    return model
              

In [14]:
!pwd

/home/sam/Projects/DSR_Batch30/notebooks/DSR_NLP


In [15]:
def create_model_dir(model_name):
    model_dir = "../../models/03_sam_language_generator/" + model_name + "/"

    saved_dir = model_dir + "saved/"
    checkpoint_dir = model_dir + "cp/"
    history_dir = model_dir + "history/"

    checkpoint_path = checkpoint_dir + "cp-{epoch:02d}.ckpt"

    Path(saved_dir).mkdir(parents=True, exist_ok=True)
    Path(checkpoint_dir).mkdir(parents=True, exist_ok=True)
    Path(history_dir).mkdir(parents=True, exist_ok=True)
    
    return saved_dir, checkpoint_dir, history_dir

In [16]:

for embedding_size in embedding_sizes:
    for sequence_length in sequence_lengths:
        for hidden_layer_neuron_1 in hidden_layer_neurons[0]:
            for hidden_layer_neuron_2 in hidden_layer_neurons[1]:
                model_name = "_".join([session_name, str(embedding_size), str(sequence_length), str(hidden_layer_neuron_1), str(hidden_layer_neuron_2)])
                print(model_name)
                model = create_model(embedding_size, sequence_length, hidden_layer_neuron_1, hidden_layer_neuron_2)
                model.summary()
                
                saved_dir, checkpoint_dir, history_dir = create_model_dir(model_name)
                
                cp_callback = tf.keras.callbacks.ModelCheckpoint(
                    filepath=checkpoint_dir + "cp-{epoch:02d}.ckpt",
                    save_weights_only=True,
                    verbose=1
                )
                
                history = model.fit(
                    dataset,
                    epochs=epochs,
                    #validation_split=0.2,
                    callbacks=[cp_callback]
                )
                
                model.save(saved_dir)
                
                history_path_json = history_dir + model_name + ".json"
                json.dump(history.history, open(history_path_json, 'w'))
                
                history_path_csv = history_dir + model_name + ".csv"
                pd.DataFrame.from_dict(history.history).to_csv(history_path_csv, index=False)
            
            
            

test_1_16_16_16_16


ValueError: Input 0 of layer "lstm_4" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, None)