# Preprocess & analyze

In [None]:

import numpy as np
import string

def preprocess(text, maxlen=20):
    
    chars = list(string.ascii_lowercase + string.digits + " ")
    char2idx = {ch: i + 1 for i, ch in enumerate(chars)}

    
    text = text.lower()
    seq = [char2idx.get(ch, 0) for ch in text]
    seq = seq[:maxlen] + [0]*(maxlen - len(seq))
    return np.array(seq)

def analyze_text(text, model):
    input_seq = preprocess(text)
    pred = model.predict(np.expand_dims(input_seq, axis=0), verbose=0)[0]

    print(pred)

    return {
        "input": text,
        "percent_string": round(pred[0]*100, 2),
        "percent_number": round(pred[1]*100, 2)
    }

# Create Model

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, GlobalAveragePooling1D
import numpy as np
import string
import sys

# 1. Character-level vocabulary
chars = list(string.ascii_lowercase + string.digits + " ")
char2idx = {ch: i + 1 for i, ch in enumerate(chars)}  # reserve 0 for padding
vocab_size = len(char2idx) + 1

# 3. Model definition


def create_model(maxlen=20):
    inp = Input(shape=(maxlen,))
    x = Embedding(vocab_size, 16)(inp)
    x = LSTM(32, return_sequences=True)(x)
    x = GlobalAveragePooling1D()(x)
    out = Dense(2, activation='softmax')(x)  # %string, %number
    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer='adam', loss='mse')
    return model


model = create_model()

# 4. Example training data
examples = [
    ("hello", [1.0, 0.0]),
    ("", [0.0, 0.0]),
    ("42", [0.0, 1.0]),
    ("h3110", [0.6, 0.4]),
    ("123", [0.0, 1.0]),
    ("a1b2", [0.5, 0.5]),
    ("world2025", [0.5, 0.5]),
    ("sam1", [0.75, 0.25]),
]

X = np.array([preprocess(text) for text, _ in examples])
y = np.array([label for _, label in examples])

# 5. Train model
model.fit(X, y, epochs=50, verbose=0)


# Test
print(analyze_text(text="Hello", model=model))
print(analyze_text(text="h3110", model=model))
print(analyze_text(text="123456", model=model))
print(analyze_text(text="AI2025", model=model))
print(analyze_text(text="A", model=model))

model.save("string_number_classifier.keras")

sys.modules[__name__] = analyze_text
# __all__ = [analyze_text, preprocess]

: 

# Loaded Model

In [None]:
import preprocess, analyze_text

import tensorflow as tf
# from tensorflow.keras.models import load_model

import keras
from keras import ops
import numpy as np

model = keras.models.load_model("./string_number_classifier.keras")

examples = [
    ("h1", [0.5, 0.5]),
    ("", [0.0, 0.0]),
    ("1234567890", [0.0, 1.0]),
    ("0987654321", [0.0, 1.0]),
    ("h3110", [0.6, 0.4]),
    ("789", [0.0, 1.0]),
    ("AA000", [0.4, 0.6]),
    ("A2B1", [0.5, 0.5]),
    ("123lll123", [0.33, 0.66]),
    ("1amb", [0.75, 0.25]),
    ("155b", [0.25, 0.75]),
]

x = np.array([preprocess(text) for text, _ in examples])
y = np.array([label for _, label in examples])

opt1 = tf.keras.optimizers.Adam(learning_rate=1e-3)
opt2 = tf.keras.optimizers.SGD(learning_rate=0)

opt_layer_pairs = [(opt1, model.layers[0]), (opt2, model.layers[1])]

loss = tf.keras.losses.MSE

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, clipnorm=1.0)
model.compile(optimizer=optimizer, loss='mse')

model.fit(x, y, batch_size=8, epochs=10, verbose=0)

print(analyze_text(text="Hello", model=model))
print(analyze_text(text="h3110", model=model))
print(analyze_text(text="123456", model=model))
print(analyze_text(text="AI2025", model=model))
print(analyze_text(text="A", model=model))

NameError: name 'preprocess' is not defined