In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

def add_random_noise(texts, noise_level=0.1):
    noisy_texts = []
    for text in texts:
        noisy_text = ''.join([c if np.random.rand() > noise_level else chr(ord(c) + np.random.randint(-3, 3)) for c in text])
        noisy_texts.append(noisy_text)
    return noisy_texts

training_data = pd.read_csv('Dataset.csv')

cipher_texts_augmented = add_random_noise(training_data.iloc[:, 0].values)
cipher_texts_combined = np.concatenate((training_data.iloc[:, 0].values, cipher_texts_augmented))
labels_text_combined = np.concatenate((training_data.iloc[:, 1].values, training_data.iloc[:, 1].values))

tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(cipher_texts_combined)
cipher_texts_tokenized = tokenizer.texts_to_sequences(cipher_texts_combined)
max_sequence_length = 40
cipher_texts_padded = pad_sequences(cipher_texts_tokenized, maxlen=max_sequence_length)

label_encoder = LabelEncoder()
labels_numeric = label_encoder.fit_transform(labels_text_combined)

x_train, x_test, y_train, y_test = train_test_split(cipher_texts_padded, labels_numeric, test_size=0.2, random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

def create_model(units_lstm=128, dropout_rate=0.5):
    model = Sequential()
    model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_sequence_length))
    model.add(Bidirectional(LSTM(units=units_lstm, return_sequences=True)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    model.add(Bidirectional(LSTM(units=units_lstm, return_sequences=True)))
    model.add(BatchNormalization())
    model.add(Dropout(dropout_rate))
    model.add(Bidirectional(LSTM(units=units_lstm)))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=len(label_encoder.classes_), activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model(units_lstm=128, dropout_rate=0.3)

model.fit(x_train, y_train, epochs=2, batch_size=32, validation_data=(x_val, y_val))

model_filename = 'AdvancedCiphers.h5'
model.save(model_filename)
print("Trained model saved to:", model_filename)

y_pred = model.predict(x_test)
y_pred_labels = np.argmax(y_pred, axis=1)
print("Test Set Classification Report:")
print(classification_report(y_test, y_pred_labels))

conf_matrix = confusion_matrix(y_test, y_pred_labels)
print("Confusion Matrix:")
print(conf_matrix)

tokenizer_filename = 'Tokenizer.json'
tokenizer_json = tokenizer.to_json()

with open(tokenizer_filename, 'w') as f:
    f.write(tokenizer_json)

print("Tokenizer saved to:", tokenizer_filename)

label_encoder_filename = 'LabelEncoder.npy'
np.save(label_encoder_filename, label_encoder.classes_)

print("Label encoder saved to:", label_encoder_filename)

num_texts = int(input("Enter the number of cipher texts you want to test: "))
for i in range(num_texts):
    user_input = input("Enter cipher text {} for testing: ".format(i + 1))

    user_input = user_input.lower()
    user_input_tokenized = tokenizer.texts_to_sequences([user_input])
    user_input_padded = pad_sequences(user_input_tokenized, maxlen=max_sequence_length)

    prediction_one_hot = model.predict(user_input_padded)
    predicted_label_numeric = np.argmax(prediction_one_hot, axis=1)
    predicted_label = label_encoder.inverse_transform(predicted_label_numeric)[0]

    print("Predicted Label for input {}: {}".format(i + 1, predicted_label))
