ASOP_RNN_Streamlit.py

# # Load Packages
import streamlit as st
import numpy as np
import altair as alt
from tensorflow.keras.models import load_model
import pandas as pd

# Streamlit application starts here
st.set_page_config(page_title="ASOP Dreaming", page_icon="💤", layout="wide")


def pdf_transform(preds, temperature=1.0, column_name="Base"):
    preds = np.asarray(preds).astype("float64")

    # Apply logarithm to the probability array (temperature being zero will get an error)
    preds = np.log(preds) / max(temperature, 0.000001)
    exp_preds = np.exp(preds)

    # Apply softmax to normalize the array
    preds = exp_preds / np.sum(exp_preds)
    row_labels = ["a", "b", "c", "d", "e"]
    df = pd.DataFrame(preds, index=row_labels, columns=[column_name])
    return df


my_array = np.array([0.5, 0.1, 0.2, 0.05, 0.15])


# # Set sidebar
with st.sidebar:
    st.header("ASOP Dreaming Model")
    # st.subheader('Background')
    link1 = "https://github.com/DanTCIM/ASOP_RNN"
    link2 = "http://www.actuarialstandardsboard.org/wp-content/uploads/2023/12/ASOPs-as-of-Decemeber-2023.zip"

    st.markdown(
        f"**Background:** The language model is built for educational purposes only. The recurrent neural network (RNN) model is trained on [ASOP documents]({link2})."
    )

    st.subheader("What is RNN?")
    st.write(
        "RNN is an artificial neural network designed to recognize patterns in sequences of data, such as text. The model is trained at a character unit."
    )
    st.write(
        "The model inferences the next character from the 40-character context window. The words and structure of sentences are generated by shifting through the context window one by one."
    )

    st.image(
        "images/RNN_structure.png", caption="The RNN structure used in this example"
    )
    st.write(
        "The current RNN structure uses Long Short-Term Memory (LSTM) layers to create a memory of previous inputs in its internal state. Dropout layers are used to prevent overfitting."
    )

    with st.container(border=True):
        st.subheader("⚙️ Parameters")
        p_temp = st.slider(
            "Temperature:",
            min_value=0.25,
            max_value=2.0,
            value=1.0,
            step=0.25,
            help="Higher temperature leads to more randomness. See chart below.",
        )
        p_max = st.slider(
            "ASOP character length:", min_value=200, max_value=500, value=400, step=100
        )

        df = pdf_transform(my_array, temperature=1.0, column_name="Base PDF")
        df["Temp: " + str(p_temp)] = pdf_transform(
            my_array, temperature=p_temp, column_name="Temp: " + str(p_temp)
        )

    melted_df = df.reset_index().melt(
        id_vars="index", var_name="ParameterSet", value_name="Probability"
    )
    chart = (
        alt.Chart(melted_df)
        .mark_bar()
        .encode(
            x="index:O",
            # y='Probability:Q',
            y=alt.Y("Probability:Q", scale=alt.Scale(domain=[0, 1])),
            color=alt.Color("ParameterSet:N", legend=None),
            column="ParameterSet:N",
        )
        .properties(width=100, height=200)
    )

    st.altair_chart(
        chart,
        theme=None,
        # theme="streamlit",
        use_container_width=False,
    )

    st.caption(
        "The chart illustrates what temperature does to the multinomial probability distribution predicting the next character. The actual PDF differs for each inference and has 69 possible cases (26 lower cases + numbers + special characters)."
    )

    st.subheader("📖 Further notes")
    st.write(
        f"The Python code and further detailed documentation of the project are in [GitHub]({link1})."
    )

# # Set up the title and input
st.title("Actuarial Standards of Practice (ASOP) Dreaming Model")
st.header("Imagine a world where 🤖 AI dreams Actuarial Standards of Practice")

st.markdown(
    "Write the beginning of your ASOP, the ASOP Dreaming Model will complete it. Your input is: "
)
usr_input = st.text_input(
    label="Enter your ASOP start text here", label_visibility="collapsed"
)


# # Input set up
# Set the length of the sequences for model
Tx = 40

chars = [
    "\n",
    " ",
    "#",
    "$",
    "%",
    "&",
    "'",
    "(",
    ")",
    ",",
    "-",
    ".",
    "/",
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9",
    ":",
    ";",
    "?",
    "[",
    "]",
    "a",
    "b",
    "c",
    "d",
    "e",
    "f",
    "g",
    "h",
    "i",
    "j",
    "k",
    "l",
    "m",
    "n",
    "o",
    "p",
    "q",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
    "\xa0",
    "ω",
    "‐",
    "–",
    "—",
    "‘",
    "’",
    "“",
    "”",
    "•",
    "…",
    "⎯",
    "\uf0b7",
    "\uf0be",
    "\uf8e7",
]

# Create a dictionary that maps each character to its index in the 'chars' list
char_indices = dict((c, i) for i, c in enumerate(chars))

# Create a dictionary that maps each index to its corresponding character in the 'chars' list
indices_char = dict((i, c) for i, c in enumerate(chars))


# # Model and Function setup
@st.cache_resource
def load_keras_model(model_path):
    """Load and return the Keras model from the given path."""
    model = load_model(model_path)
    return model


model_path = "model/Life_ASOP_rnn_model030.keras"
model = load_keras_model(model_path)


def sample(preds, temperature=1.0):
    """
    Helper function to sample an index from a probability array.

    Arguments:
    preds (list): The input probability array.
    temperature (float): Controls the randomness of the sampling. Higher values make the sampling more random.

    Returns:
    int: The sampled index.
    """

    preds = np.asarray(preds).astype("float64")

    # Apply logarithm to the probability array (temperature being zero will get an error)
    preds = np.log(preds) / max(temperature, 0.000001)
    exp_preds = np.exp(preds)

    # Apply softmax to normalize the array
    preds = exp_preds / np.sum(exp_preds)

    # Use the softmax probabilities to perform multinomial sampling
    probas = np.random.multinomial(1, preds, 1)

    # Get the sampled index using the probabilities
    out = np.random.choice(range(len(chars)), p=probas.ravel())
    return out


def generate_output(temperature=1.0, ASOP_length=500):
    """
    Generates an ASOP based on user input.

    Arguments:
    - temperature (float): Controls the randomness of the generated output. Higher values result in more randomness.
    - ASOP_length (int): The desired length of the generated ASOP in characters.

    Returns:
    - generated (str): The generated ASOP string.
    """

    generated = ""  # Initializes an empty string

    sentence = (
        ("{0:0>" + str(Tx) + "}").format(usr_input).lower()
    )  # Zero pad the input sentence to make length Tx
    generated += usr_input

    st.write("\n\nHere is your ASOP dream 💤: \n\n")
    # Placeholder for continuous output
    output_placeholder = st.empty()

    for i in range(ASOP_length):
        x_pred = np.zeros(
            (1, Tx, len(chars))
        )  # Initialize NumPy array with zeros. len(chars) = 69

        for t, char in enumerate(
            sentence
        ):  # Iterates over each character in the current sentence
            if char != "0":
                x_pred[0, t, char_indices[char]] = 1.0  # One-hot coding the sentence

        preds = model.predict(x_pred, verbose=0)[
            0
        ]  # Get next character's probability distribution (softmax)
        next_index = sample(
            preds, temperature=temperature
        )  # Sample an index from the distribution out of len(chars)
        next_char = indices_char[next_index]  # Convert index to character

        generated += next_char

        # Updates the sentence by removing its first character
        # and appending the newly generated character, maintaining a fixed length of Tx.
        sentence = sentence[1:] + next_char

        # Update the output text dynamically
        output_placeholder.text(generated)


# # Model and Function set up
# Let's generate ASOP!
if st.button(label="Generate ASOP"):
    generate_output(temperature=p_temp, ASOP_length=p_max)