### Training using LSTM

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
import pandas as pd

# Load and preprocess the dataset
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

max_len = 50  # Adjust as needed
embedding_dim = 50  # Adjust as needed

# Tokenize and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['text'])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data['text']), maxlen=max_len, padding='post')
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data['text']), maxlen=max_len, padding='post')

# Labels
y_train = train_data['label']
y_test = test_data['label']

# LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_dim, input_length=max_len))
lstm_model.add(LSTM(64, activation='relu'))
lstm_model.add(Dense(64, activation='relu'))
lstm_model.add(Dropout(0.5))
lstm_model.add(Dense(1, activation='sigmoid'))

lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
lstm_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate on test set
loss, accuracy = lstm_model.evaluate(X_test, y_test)
print(f'LSTM Test Loss: {loss}, Test Accuracy: {accuracy}')

# Make predictions
lstm_predictions = lstm_model.predict(X_test)


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

def evaluate_sentence(sentence, model, tokenizer, max_len):
    # Tokenize and pad the input sentence
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post')

    # Print for debugging
    print(f'Sequence: {sequence}')
    print(f'Padded Sequence: {padded_sequence}')

    # Make prediction using the trained model
    likelihood = model.predict(padded_sequence)[0][0]

    # Print for debugging
    print(f'Likelihood: {likelihood}')

    return likelihood

# Example usage:
# Assuming 'model', 'tokenizer', and 'max_len' are already defined
sentence_to_evaluate = "I am swim more than dk."
result = evaluate_sentence(sentence_to_evaluate, lstm_model, tokenizer, max_len)

# Print the result
print(f'Likelihood that the sentence is grammatically correct: {result}')

