In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import random


In [9]:
def generate_balanced_sequence(length):
    seq = ['('] * (length // 2) + [')'] * (length // 2)
    random.shuffle(seq)
    return ''.join(seq)

def generate_unbalanced_sequence(length):
    seq = [random.choice(['(', ')']) for _ in range(length)]
    # Ensure it is unbalanced
    if seq.count('(') == seq.count(')'):
        seq[-1] = '('
    return ''.join(seq)

def generate_dataset(n_samples=10000, maxlen=20):
    X, y = [], []
    for _ in range(n_samples // 2):
        s = generate_balanced_sequence(random.choice(range(6, maxlen, 2)))
        X.append(s)
        y.append(1)
    for _ in range(n_samples // 2):
        s = generate_unbalanced_sequence(random.randint(6, maxlen))
        X.append(s)
        y.append(0)
    return X, y


In [10]:
X_text, y = generate_dataset()
char_to_int = {'(': 1, ')': 2}
X_seq = [[char_to_int[c] for c in seq] for seq in X_text]
max_seq_len = max(len(seq) for seq in X_seq)
X_pad = pad_sequences(X_seq, maxlen=max_seq_len, padding='post')
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# 3. Build LSTM Model
model = Sequential([
    Embedding(input_dim=3, output_dim=16, input_length=max_seq_len),
    LSTM(32),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [11]:
history = model.fit(
    X_train, np.array(y_train),
    validation_split=0.2,
    epochs=5,
    batch_size=64,
    verbose=1
)

# 5. Evaluate Model
loss, accuracy = model.evaluate(X_test, np.array(y_test), verbose=0)
print(f"\nLSTM Test Accuracy: {accuracy * 100:.2f}%")


Epoch 1/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.5138 - loss: 0.6890 - val_accuracy: 0.5581 - val_loss: 0.6834
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5463 - loss: 0.6742 - val_accuracy: 0.5244 - val_loss: 0.6702
Epoch 3/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5435 - loss: 0.6615 - val_accuracy: 0.5031 - val_loss: 0.6576
Epoch 4/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.5466 - loss: 0.6498 - val_accuracy: 0.5163 - val_loss: 0.6470
Epoch 5/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.6007 - loss: 0.6252 - val_accuracy: 0.7625 - val_loss: 0.4874

LSTM Test Accuracy: 76.95%


In [12]:
def predict_sequence(model, seq_str):
    seq_encoded = [char_to_int[c] for c in seq_str]
    seq_padded = pad_sequences([seq_encoded], maxlen=max_seq_len, padding='post')
    prob = model.predict(seq_padded, verbose=0)[0][0]
    return prob, "Balanced" if prob >= 0.5 else "Unbalanced"


In [13]:
test_sequences = ["(()(()))", "(()())(()", "((()))", "()))("]
print("\n=== Sample Predictions ===")
for seq in test_sequences:
    prob, label = predict_sequence(model, seq)
    print(f"Sequence: {seq} --> {label} (Confidence: {prob:.2f})")


=== Sample Predictions ===
Sequence: (()(())) --> Balanced (Confidence: 0.67)
Sequence: (()())(() --> Unbalanced (Confidence: 0.10)
Sequence: ((())) --> Balanced (Confidence: 0.67)
Sequence: ()))( --> Unbalanced (Confidence: 0.12)
