In [None]:
# Main Code

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
import base64
import os

print("Starting model training...")

# AES Encryption Setup
KEY = b'Sixteen byte key'  # 16-byte key for AES-128
BLOCK_SIZE = 16  # AES block size

# Load dataset
dataset_path = 'Final_Dataset.csv'
df = pd.read_csv(dataset_path)

# Clean the data
df['typo'] = df['typo'].fillna('').astype(str)
df['correct_word'] = df['correct_word'].fillna('').astype(str)

# Apply encryption
def safe_aes_encrypt(plaintext, key):
    try:
        if pd.isna(plaintext):  # Handle NA/None
            plaintext = ''
        plaintext = str(plaintext)  # Convert to string
        cipher = AES.new(key, AES.MODE_ECB)
        padded_text = pad(plaintext.encode(), BLOCK_SIZE)
        ciphertext = cipher.encrypt(padded_text)
        return base64.b64encode(ciphertext).decode('utf-8')
    except Exception as e:
        print(f"Error encrypting '{plaintext}': {str(e)}")
        return ''

# Encrypt dataset
print("Encrypting dataset...")
df['encrypted_typo'] = df['typo'].apply(lambda x: safe_aes_encrypt(x, KEY))
df['encrypted_correct'] = df['correct_word'].apply(lambda x: safe_aes_encrypt(x, KEY))

# Save encrypted dataset for later use
df.to_csv('encrypted_dataset.csv', index=False)
print("Encrypted dataset saved to 'encrypted_dataset.csv'")

# Create vocabulary
base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
char_to_idx = {c: i+1 for i, c in enumerate(base64_chars)}
idx_to_char = {i+1: c for i, c in enumerate(base64_chars)}

# Save vocabulary for later use
import pickle
with open('vocabulary.pkl', 'wb') as f:
    pickle.dump({'char_to_idx': char_to_idx, 'idx_to_char': idx_to_char}, f)
print("Vocabulary saved to 'vocabulary.pkl'")

# Convert text to sequences
def text_to_seq(text):
    return [char_to_idx[c] for c in text if c in char_to_idx]

max_len = max(max(len(x) for x in df['encrypted_typo']),
              max(len(x) for x in df['encrypted_correct']))

# Save max_len for later use
with open('max_len.txt', 'w') as f:
    f.write(str(max_len))
print(f"Max length saved: {max_len}")

# Prepare training data
print("Preparing training data...")
X = [text_to_seq(x) for x in df['encrypted_typo']]
X = pad_sequences(X, maxlen=max_len, padding='post')

y = [text_to_seq(x) for x in df['encrypted_correct']]
y = pad_sequences(y, maxlen=max_len, padding='post')

vocab_size = len(base64_chars) + 1

# Build model
print("Building model...")
model = Sequential([
    Embedding(vocab_size, 64, input_length=max_len),
    LSTM(128, return_sequences=True),
    LSTM(128),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Reshape y for sparse categorical crossentropy
y_reshaped = y.reshape(-1, 1)

# Train model
print("Training model... This may take some time.")
model.fit(X, y_reshaped, epochs=20, batch_size=256, validation_split=0.2)

# Save model
model_path = 'nlp_model'
model.save(model_path)
print(f"Model trained and saved to '{model_path}'")

print("Training complete! You can now run the app.py file.")

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Embedding, Input
from tensorflow.keras.preprocessing.sequence import pad_sequences
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad
import base64
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load dataset
dataset_path = '/content/drive/MyDrive/Final_Dataset.csv'
df = pd.read_csv(dataset_path)

# AES Encryption Setup
KEY = b'Sixteen byte key'  # 16-byte key for AES-128
BLOCK_SIZE = 16  # AES block size

def aes_encrypt(plaintext, key):
    cipher = AES.new(key, AES.MODE_ECB)
    padded_text = pad(plaintext.encode(), BLOCK_SIZE)
    ciphertext = cipher.encrypt(padded_text)
    return base64.b64encode(ciphertext).decode('utf-8')

def aes_decrypt(ciphertext, key):
    try:
        cipher = AES.new(key, AES.MODE_ECB)
        decoded_text = base64.b64decode(ciphertext.encode('utf-8'))
        decrypted = cipher.decrypt(decoded_text)
        return unpad(decrypted, BLOCK_SIZE).decode('utf-8')
    except:
        return None
# 1. Clean the data first
df['typo'] = df['typo'].fillna('').astype(str)
df['correct_word'] = df['correct_word'].fillna('').astype(str)

# 2. Use a safe encryption function
def safe_aes_encrypt(plaintext, key):
    try:
        if pd.isna(plaintext):  # Handle NA/None
            plaintext = ''
        plaintext = str(plaintext)  # Convert to string
        cipher = AES.new(key, AES.MODE_ECB)
        padded_text = pad(plaintext.encode(), BLOCK_SIZE)
        ciphertext = cipher.encrypt(padded_text)
        return base64.b64encode(ciphertext).decode('utf-8')
    except Exception as e:
        print(f"Error encrypting '{plaintext}': {str(e)}")
        return ''

# 3. Apply encryption
df['encrypted_typo'] = df['typo'].apply(lambda x: safe_aes_encrypt(x, KEY))
df['encrypted_correct'] = df['correct_word'].apply(lambda x: safe_aes_encrypt(x, KEY))

# Encrypt dataset
#print("Encrypting dataset...")
#df['encrypted_typo'] = df['typo'].apply(lambda x: aes_encrypt(x, KEY))
#df['encrypted_correct'] = df['correct_word'].apply(lambda x: aes_encrypt(x, KEY))

# Create vocabulary
base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
char_to_idx = {c: i+1 for i, c in enumerate(base64_chars)}
idx_to_char = {i+1: c for i, c in enumerate(base64_chars)}

# Convert text to sequences
def text_to_seq(text):
    return [char_to_idx[c] for c in text if c in char_to_idx]

max_len = max(max(len(x) for x in df['encrypted_typo']),
              max(len(x) for x in df['encrypted_correct']))

# Prepare training data
X = [text_to_seq(x) for x in df['encrypted_typo']]
X = pad_sequences(X, maxlen=max_len, padding='post')

y = [text_to_seq(x) for x in df['encrypted_correct']]
y = pad_sequences(y, maxlen=max_len, padding='post')

vocab_size = len(base64_chars) + 1

# Build model
model = Sequential([
    Embedding(vocab_size, 64, input_length=max_len),
    LSTM(128, return_sequences=True),
    LSTM(128),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Reshape y for sparse categorical crossentropy
y_reshaped = y.reshape(-1, 1)

# Train model
model.fit(X, y_reshaped, epochs=20, batch_size=256, validation_split=0.2)

def neural_match(encrypted_input):
    # First check for exact match
    exact_matches = df[df['encrypted_typo'] == encrypted_input]
    if not exact_matches.empty:
        return exact_matches['encrypted_correct'].iloc[0]

    # Use neural network for prediction
    input_seq = np.array([text_to_seq(encrypted_input)])
    input_seq = pad_sequences(input_seq, maxlen=max_len, padding='post')

    # Get predictions
    pred = model.predict(input_seq, verbose=0)

    # Handle different prediction output formats
    if len(pred.shape) == 3:  # Sequence prediction
        predicted_indices = np.argmax(pred, axis=-1)[0]
    else:  # Single prediction
        predicted_indices = [np.argmax(pred)]

    # Convert indices to characters
    predicted_word = ''
    for idx in predicted_indices:
        if isinstance(idx, (np.ndarray, list)):  # Handle nested arrays
            idx = idx[0] if len(idx) > 0 else 0
        if idx in idx_to_char:
            predicted_word += idx_to_char[idx]

    return predicted_word if predicted_word else None

def is_correct(encrypted_word):
    return encrypted_word in df['encrypted_correct'].values

# User interface
while True:
    print("\n1. Check spelling")
    print("2. Exit")
    choice = input("Choose an option: ").strip()

    if choice == '2':
        break

    if choice == '1':
        user_word = input("Enter a word to check: ").strip().lower()

        # Encrypt input
        encrypted_input = aes_encrypt(user_word, KEY)
        print(f"Encrypted input: {encrypted_input}")

        # Check if correct
        if is_correct(encrypted_input):
            print("✓ Word is already correctly spelled!")
            continue

        # Get correction
        encrypted_correction = neural_match(encrypted_input)

        if encrypted_correction:
            print(f"Encrypted correction: {encrypted_correction}")

            decrypt = input("Decrypt the correction? (y/n): ").lower()
            if decrypt == 'y':
                decrypted = aes_decrypt(encrypted_correction, KEY)
                if decrypted:
                    print(f"Decrypted correction: {decrypted}")
                else:
                    print("Decryption failed")
        else:
            print("No suitable correction found")