-
Notifications
You must be signed in to change notification settings - Fork 0
/
ASOP_RNN_Streamlit.py
297 lines (244 loc) · 8.09 KB
/
ASOP_RNN_Streamlit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# # Load Packages
import streamlit as st
import numpy as np
import altair as alt
from tensorflow.keras.models import load_model
import pandas as pd
# Streamlit application starts here
st.set_page_config(page_title="ASOP Dreaming", page_icon="💤", layout="wide")
def pdf_transform(preds, temperature=1.0, column_name="Base"):
preds = np.asarray(preds).astype("float64")
# Apply logarithm to the probability array (temperature being zero will get an error)
preds = np.log(preds) / max(temperature, 0.000001)
exp_preds = np.exp(preds)
# Apply softmax to normalize the array
preds = exp_preds / np.sum(exp_preds)
row_labels = ["a", "b", "c", "d", "e"]
df = pd.DataFrame(preds, index=row_labels, columns=[column_name])
return df
my_array = np.array([0.5, 0.1, 0.2, 0.05, 0.15])
# # Set sidebar
with st.sidebar:
st.header("ASOP Dreaming Model")
# st.subheader('Background')
link1 = "https://github.com/DanTCIM/ASOP_RNN"
link2 = "http://www.actuarialstandardsboard.org/wp-content/uploads/2023/12/ASOPs-as-of-Decemeber-2023.zip"
st.markdown(
f"**Background:** The language model is built for educational purposes only. The recurrent neural network (RNN) model is trained on [ASOP documents]({link2})."
)
st.subheader("What is RNN?")
st.write(
"RNN is an artificial neural network designed to recognize patterns in sequences of data, such as text. The model is trained at a character unit."
)
st.write(
"The model inferences the next character from the 40-character context window. The words and structure of sentences are generated by shifting through the context window one by one."
)
st.image(
"images/RNN_structure.png", caption="The RNN structure used in this example"
)
st.write(
"The current RNN structure uses Long Short-Term Memory (LSTM) layers to create a memory of previous inputs in its internal state. Dropout layers are used to prevent overfitting."
)
with st.container(border=True):
st.subheader("⚙️ Parameters")
p_temp = st.slider(
"Temperature:",
min_value=0.25,
max_value=2.0,
value=1.0,
step=0.25,
help="Higher temperature leads to more randomness. See chart below.",
)
p_max = st.slider(
"ASOP character length:", min_value=200, max_value=500, value=400, step=100
)
df = pdf_transform(my_array, temperature=1.0, column_name="Base PDF")
df["Temp: " + str(p_temp)] = pdf_transform(
my_array, temperature=p_temp, column_name="Temp: " + str(p_temp)
)
melted_df = df.reset_index().melt(
id_vars="index", var_name="ParameterSet", value_name="Probability"
)
chart = (
alt.Chart(melted_df)
.mark_bar()
.encode(
x="index:O",
# y='Probability:Q',
y=alt.Y("Probability:Q", scale=alt.Scale(domain=[0, 1])),
color=alt.Color("ParameterSet:N", legend=None),
column="ParameterSet:N",
)
.properties(width=100, height=200)
)
st.altair_chart(
chart,
theme=None,
# theme="streamlit",
use_container_width=False,
)
st.caption(
"The chart illustrates what temperature does to the multinomial probability distribution predicting the next character. The actual PDF differs for each inference and has 69 possible cases (26 lower cases + numbers + special characters)."
)
st.subheader("📖 Further notes")
st.write(
f"The Python code and further detailed documentation of the project are in [GitHub]({link1})."
)
# # Set up the title and input
st.title("Actuarial Standards of Practice (ASOP) Dreaming Model")
st.header("Imagine a world where 🤖 AI dreams Actuarial Standards of Practice")
st.markdown(
"Write the beginning of your ASOP, the ASOP Dreaming Model will complete it. Your input is: "
)
usr_input = st.text_input(
label="Enter your ASOP start text here", label_visibility="collapsed"
)
# # Input set up
# Set the length of the sequences for model
Tx = 40
chars = [
"\n",
" ",
"#",
"$",
"%",
"&",
"'",
"(",
")",
",",
"-",
".",
"/",
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
":",
";",
"?",
"[",
"]",
"a",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"\xa0",
"ω",
"‐",
"–",
"—",
"‘",
"’",
"“",
"”",
"•",
"…",
"⎯",
"\uf0b7",
"\uf0be",
"\uf8e7",
]
# Create a dictionary that maps each character to its index in the 'chars' list
char_indices = dict((c, i) for i, c in enumerate(chars))
# Create a dictionary that maps each index to its corresponding character in the 'chars' list
indices_char = dict((i, c) for i, c in enumerate(chars))
# # Model and Function setup
@st.cache_resource
def load_keras_model(model_path):
"""Load and return the Keras model from the given path."""
model = load_model(model_path)
return model
model_path = "model/Life_ASOP_rnn_model030.keras"
model = load_keras_model(model_path)
def sample(preds, temperature=1.0):
"""
Helper function to sample an index from a probability array.
Arguments:
preds (list): The input probability array.
temperature (float): Controls the randomness of the sampling. Higher values make the sampling more random.
Returns:
int: The sampled index.
"""
preds = np.asarray(preds).astype("float64")
# Apply logarithm to the probability array (temperature being zero will get an error)
preds = np.log(preds) / max(temperature, 0.000001)
exp_preds = np.exp(preds)
# Apply softmax to normalize the array
preds = exp_preds / np.sum(exp_preds)
# Use the softmax probabilities to perform multinomial sampling
probas = np.random.multinomial(1, preds, 1)
# Get the sampled index using the probabilities
out = np.random.choice(range(len(chars)), p=probas.ravel())
return out
def generate_output(temperature=1.0, ASOP_length=500):
"""
Generates an ASOP based on user input.
Arguments:
- temperature (float): Controls the randomness of the generated output. Higher values result in more randomness.
- ASOP_length (int): The desired length of the generated ASOP in characters.
Returns:
- generated (str): The generated ASOP string.
"""
generated = "" # Initializes an empty string
sentence = (
("{0:0>" + str(Tx) + "}").format(usr_input).lower()
) # Zero pad the input sentence to make length Tx
generated += usr_input
st.write("\n\nHere is your ASOP dream 💤: \n\n")
# Placeholder for continuous output
output_placeholder = st.empty()
for i in range(ASOP_length):
x_pred = np.zeros(
(1, Tx, len(chars))
) # Initialize NumPy array with zeros. len(chars) = 69
for t, char in enumerate(
sentence
): # Iterates over each character in the current sentence
if char != "0":
x_pred[0, t, char_indices[char]] = 1.0 # One-hot coding the sentence
preds = model.predict(x_pred, verbose=0)[
0
] # Get next character's probability distribution (softmax)
next_index = sample(
preds, temperature=temperature
) # Sample an index from the distribution out of len(chars)
next_char = indices_char[next_index] # Convert index to character
generated += next_char
# Updates the sentence by removing its first character
# and appending the newly generated character, maintaining a fixed length of Tx.
sentence = sentence[1:] + next_char
# Update the output text dynamically
output_placeholder.text(generated)
# # Model and Function set up
# Let's generate ASOP!
if st.button(label="Generate ASOP"):
generate_output(temperature=p_temp, ASOP_length=p_max)