In [21]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

def prepare_lstm_data(data, min_length=2):
    """Prepares sequences and mapping for LSTM"""
    if not data:
        raise ValueError("Input data is empty. Please provide password samples.")
    
    # Filter out items that are too short
    filtered_data = [item for item in data if len(item) >= min_length]
    if not filtered_data:
        raise ValueError(f"No items meet the minimum length requirement of {min_length} characters.")
    
    # Create character mappings
    chars = sorted(set("".join(filtered_data)))
    if not chars:
        raise ValueError("No valid characters found in the input data.")
    
    char_to_index = {c: i+1 for i, c in enumerate(chars)}  # 0 reserved for padding
    index_to_char = {i: c for c, i in char_to_index.items()}
    
    # Generate sequences
    sequences = []
    for item in filtered_data:
        encoded = [char_to_index[c] for c in item]
        for i in range(1, len(encoded)):
            sequences.append(encoded[:i+1])
    
    if not sequences:
        raise ValueError("No training sequences generated. Check input data format.")
    
    # Prepare X and y
    max_seq_length = max(len(seq) for seq in sequences)
    X = pad_sequences([seq[:-1] for seq in sequences], 
                     maxlen=max_seq_length-1, 
                     padding="pre",
                     value=0)
    y = [seq[-1] for seq in sequences]
    y = to_categorical(y, num_classes=len(char_to_index)+1)  # +1 for padding
    
    return X, y, max_seq_length-1, char_to_index, index_to_char

def train_lstm_model(X, y, vocab_size, max_seq_length, epochs=100):
    """Trains an LSTM model with correct output shape"""
    model = Sequential([
        Embedding(input_dim=vocab_size+1,  # +1 for padding
                 output_dim=16, 
                 input_length=max_seq_length,
                 mask_zero=True),
        LSTM(128, return_sequences=True, dropout=0.2),
        LSTM(128, dropout=0.2),
        Dense(64, activation='relu'),
        Dense(vocab_size+1, activation='softmax')  # Must match num_classes
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    
    model.fit(
        X, y,
        epochs=epochs,
        batch_size=32,
        verbose=1
    )
    
    return model

# Enhanced password components
passwords = {
    'Capital': ['ABCDE', 'BCDEF', 'CDEFG', 'DEFGH', 'EFGHI', 'FGHIJ', 'GHIJK'],
    'Word': ['alpha', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', 'golf'],
    'Numeric': ['12345', '23456', '34567', '45678', '56789', '67890', '78901'],
    'Special': ['@!@!@', '#$#$#', '%^%^%', '&*&*&', '()()(', '!@#$%', '^&*()']
}

# Train models
try:
    print("Training models...")
    X_cap, y_cap, max_cap, c2i_cap, i2c_cap = prepare_lstm_data(passwords['Capital'])
    model_cap = train_lstm_model(X_cap, y_cap, len(c2i_cap), max_cap, epochs=50)
    
    X_w, y_w, max_w, c2i_w, i2c_w = prepare_lstm_data(passwords['Word'])
    model_w = train_lstm_model(X_w, y_w, len(c2i_w), max_w, epochs=50)
    
    X_num, y_num, max_num, c2i_num, i2c_num = prepare_lstm_data(passwords['Numeric'])
    model_num = train_lstm_model(X_num, y_num, len(c2i_num), max_num, epochs=50)
    
    X_special, y_special, max_special, c2i_special, i2c_special = prepare_lstm_data(passwords['Special'])
    model_special = train_lstm_model(X_special, y_special, len(c2i_special), max_special, epochs=50)
    
    print("\nAll models trained successfully!")
    
except Exception as e:
    print(f"Error during training: {str(e)}")

Training models...
Epoch 1/50


2025-04-20 11:55:02.125643: E tensorflow/core/util/util.cc:131] oneDNN supports DT_BOOL only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19s/step - accuracy: 0.0357 - loss: 2.4848
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step - accuracy: 0.1429 - loss: 2.4826
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step - accuracy: 0.1429 - loss: 2.4800
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 269ms/step - accuracy: 0.1429 - loss: 2.4773
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step - accuracy: 0.1429 - loss: 2.4741
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step - accuracy: 0.1429 - loss: 2.4707
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282ms/step - accuracy: 0.1429 - loss: 2.4672
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step - accuracy: 0.1429 - loss: 2.4633
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [22]:
import pickle
import os

# Create model directory if not exists
os.makedirs("model_artifacts", exist_ok=True)

# Save Capital model and vocab
model_cap.save("model_artifacts/cap_model.h5")
with open("model_artifacts/cap_vocab.pkl", "wb") as f:
    pickle.dump((c2i_cap, i2c_cap, max_cap), f)

# Save Word model and vocab
model_w.save("model_artifacts/word_model.h5")
with open("model_artifacts/word_vocab.pkl", "wb") as f:
    pickle.dump((c2i_w, i2c_w, max_w), f)

# Save Numeric model and vocab
model_num.save("model_artifacts/num_model.h5")
with open("model_artifacts/num_vocab.pkl", "wb") as f:
    pickle.dump((c2i_num, i2c_num, max_num), f)

# Save Special model and vocab
model_special.save("model_artifacts/special_model.h5")
with open("model_artifacts/special_vocab.pkl", "wb") as f:
    pickle.dump((c2i_special, i2c_special, max_special), f)

