In [29]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

# Step 1: Load and preprocess the dataset
# Assuming you have a CSV file with two columns: 'text' and 'label'
# 'text' contains the English sentences, and 'label' contains binary values (0 or 1) for correctness.
# You should replace 'your_dataset.csv' with the actual path to your dataset file.

# Load dataset
dataset = pd.read_csv('train.csv')
dataset = dataset.reset_index(drop=True)

# Tokenize and pad the sequences
max_len = 50  # Set a suitable maximum length for your sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(dataset['text'])
sequences = tokenizer.texts_to_sequences(dataset['text'])
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
# print("X_train shape:", X_train.shape)
# print("y_train shape:", y_train.shape)
# print("X_test shape:", X_test.shape)
# print("y_test shape:", y_test.shape)
labels = dataset['label']

# Step 2: Develop a suitable neural net using TensorFlow
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=32, input_length=max_len),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 3: Train the neural net on the training set
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)
X_train_np = np.array(X_train)
y_train_np = np.array(y_train)
X_test_np = np.array(X_test)
y_test_np = np.array(y_test)

# #####
model.fit(X_train_np, y_train_np, epochs=5, batch_size=32, validation_data=(X_test_np, y_test_np))

# Step 4: Create a Python class Grader
class Grader:
    def __init__(self, model):
        self.model = model

    def evaluate(self, sentence):
        sequence = tokenizer.texts_to_sequences([sentence])
        padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post')
        likelihood = self.model.predict(padded_sequence)[0][0]
        return likelihood

# Create an instance of Grader
mygrader = Grader(model)

# Step 5: Evaluate the performance on the provided test set
# Assuming you have a separate CSV file for the test set
test_data = pd.read_csv('test.csv')
test_sequences = tokenizer.texts_to_sequences(test_data['text'])
padded_test_sequences = pad_sequences(test_sequences, maxlen=max_len, padding='post')
predictions = (mygrader.model.predict(padded_test_sequences) > 0.1).astype('int32')

precision = precision_score(test_data['label'], predictions)
recall = recall_score(test_data['label'], predictions)

print(f'Precision: {precision}, Recall: {recall}')

# Step 6: Discuss other ideas for improving the model
# Depending on the performance, you can explore more advanced architectures, hyperparameter tuning, and data augmentation techniques.
# Additionally, you might consider using pre-trained word embeddings, such as GloVe or Word2Vec, for better representation of words.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Precision: 0.6925996204933587, Recall: 1.0


In [32]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

class Grader:
    def __init__(self, model, tokenizer, max_len):
        self.model = model
        self.tokenizer = tokenizer
        self.max_len = max_len

    def evaluate(self, sentence):
        sequence = self.tokenizer.texts_to_sequences([sentence])
        padded_sequence = pad_sequences(sequence, maxlen=self.max_len, padding='post')
        likelihood = self.model.predict(padded_sequence)[0][0]
        return likelihood

# Step 1: Load and preprocess the dataset
dataset = pd.read_csv('train.csv')
# dataset = dataset.reset_index(drop=True)

max_len = 50  
tokenizer = Tokenizer()
tokenizer.fit_on_texts(dataset['text'])
sequences = tokenizer.texts_to_sequences(dataset['text'])
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
labels = dataset['label']

# Step 2: Develop a suitable neural net using TensorFlow
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=32, input_length=max_len),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 3: Train the neural net on the training set
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)


# Step 3: Train the neural net on the training set
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)
X_train_np = np.array(X_train)
y_train_np = np.array(y_train)
X_test_np = np.array(X_test)
y_test_np = np.array(y_test)

# #####
model.fit(X_train_np, y_train_np, epochs=5, batch_size=32, validation_data=(X_test_np, y_test_np))



# Step 4: Create an instance of Grader
mygrader = Grader(model, tokenizer, max_len)

# Step 5: Evaluate a sample sentence
likelihood = mygrader.evaluate("come here no more He don't anyways you stupid bitch.")
print(f'Likelihood that the sentence is grammatically correct: {likelihood}')

# Step 6: Evaluate the performance on the provided test set
test_data = pd.read_csv('test.csv')
test_sequences = tokenizer.texts_to_sequences(test_data['text'])
padded_test_sequences = pad_sequences(test_sequences, maxlen=max_len, padding='post')
predictions = (model.predict(padded_test_sequences) > 0.5).astype('int32')

precision = precision_score(test_data['label'], predictions)
recall = recall_score(test_data['label'], predictions)

print(f'Precision: {precision}, Recall: {recall}')

# Step 7: Discuss other ideas for improving the model
# Depending on the performance, you can explore more advanced architectures, hyperparameter tuning, and data augmentation techniques.
# Additionally, you might consider using pre-trained word embeddings, such as GloVe or Word2Vec, for better representation of words.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Likelihood that the sentence is grammatically correct: 0.6935041546821594
Precision: 0.6925996204933587, Recall: 1.0


In [34]:
predictions

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
    

In [72]:
import pandas as pd

dataset = pd.read_csv('test.csv')
# dataset = dataset.reset_index(drop=True)
dataset

Unnamed: 0,text,label
0,The sailors rode the breeze clear of the rocks.,1
1,The weights made the rope stretch over the pul...,1
2,The mechanical doll wriggled itself loose.,1
3,"If you had eaten more, you would want less.",1
4,"As you eat the most, you want the least.",0
...,...,...
522,I would like to could swim,0
523,I kicked myself,1
524,The bookcase ran,0
525,I shaved myself.,1


In [74]:
!head test.csv

text,label
The sailors rode the breeze clear of the rocks.,1
The weights made the rope stretch over the pulley.,1
The mechanical doll wriggled itself loose.,1
"If you had eaten more, you would want less.",1
"As you eat the most, you want the least.",0
"The more you would want, the less you would eat.",0
"I demand that the more John eat, the more he pays.",0
"Mary listens to the Grateful Dead, she gets depressed.",1
"The angrier Mary got, the more she looked at pictures.",1


In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from transformers import TFBertModel, TFGPT2Model, BertTokenizer, GPT2Tokenizer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
import json


# Step 1: Load and preprocess the dataset
dataset = pd.read_csv('train.csv')
# dataset = dataset.reset_index(drop=True)
dataset = dataset.head(800)


max_len = 50  
tokenizer = Tokenizer()
tokenizer.fit_on_texts(dataset['text'])
sequences = tokenizer.texts_to_sequences(dataset['text'])
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
labels = dataset['label']

# Step 2: Load BERT and GPT-2 models
bert_model = TFBertModel.from_pretrained('bert-base-uncased', trainable=True)
gpt2_model = TFGPT2Model.from_pretrained('gpt2', trainable=True)  # Set trainable=True for GPT-2

# Step 3: Define the neural net using BERT and GPT-2 models
input_ids = tf.keras.layers.Input(shape=(max_len,), dtype=tf.int32)
bert_output = bert_model(input_ids)[1]  # Using [1] to get pooled output
gpt2_output = gpt2_model(input_ids)[0]  # GPT-2 output

# Repeat BERT output for each time step in the sequence
bert_output_expanded = tf.keras.layers.RepeatVector(max_len)(bert_output)
concatenated_output = tf.keras.layers.Concatenate(axis=-1)([bert_output_expanded, gpt2_output])

# Reshape the concatenated output
reshaped_output = tf.keras.layers.Reshape((max_len, -1))(concatenated_output)

# Additional layers
lstm_layer = tf.keras.layers.LSTM(64)(reshaped_output)  
output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(lstm_layer)

model = tf.keras.Model(inputs=input_ids, outputs=output_layer)



optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=1e-5) 
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the neural net on the training set
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

X_train_np = np.array(X_train)
y_train_np = np.array(y_train)
X_test_np = np.array(X_test)
y_test_np = np.array(y_test)

# Reshape labels to match the output shape
y_train_np_categorical = y_train_np[:, np.newaxis]  # y_train_np is 1D
y_test_np_categorical = y_test_np[:, np.newaxis]  # y_test_np is 1D

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model.fit(X_train_np, y_train_np_categorical, epochs=1, batch_size=32, validation_data=(X_test_np, y_test_np_categorical), callbacks=[early_stopping])


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task



<keras.src.callbacks.History at 0x2c7ebab10>

In [2]:
# class Grader:
#     def __init__(self, model, tokenizer, max_len):
#         self.model = model
#         self.tokenizer = tokenizer
#         self.max_len = max_len

#     def evaluate(self, sentence):
#         # Tokenize and pad the input sentence
#         sequence = self.tokenizer.texts_to_sequences([sentence])
#         padded_sequence = pad_sequences(sequence, maxlen=self.max_len, padding='post')
        
#         # Predict using the model
#         likelihood = self.model.predict(padded_sequence)[0][0]
#         return likelihood


In [49]:
class Grader:
    def __init__(self, model, tokenizer, max_len):
        self.model = model
        self.tokenizer = tokenizer
        self.max_len = max_len

    def evaluate(self, sentence, threshold=0.5):
        # Tokenize and pad the input sentence
        sequence = self.tokenizer.texts_to_sequences([sentence])
        padded_sequence = pad_sequences(sequence, maxlen=self.max_len, padding='post')
        
        # Predict using the model
        likelihood = self.model.predict(padded_sequence)[0][0]

        # Scale the output value between 0 and 1
        min_value = np.min(likelihood)
        max_value = np.max(likelihood)
        
        # Check if the range is zero
        if max_value - min_value == 0:
            scaled_value = 0.5  # Set a default value or handle as needed
        else:
            scaled_value = (likelihood - min_value) / (max_value - min_value)

        # # Convert to binary classification
        # binary_prediction = 1 if scaled_value > threshold else 0

        return likelihood


In [50]:
# Step 5: Create an instance of Grader
mygrader = Grader(model, tokenizer, max_len)

In [55]:
# Step 6: Evaluate a sample sentence
likelihood = mygrader.evaluate("she swim do she")
print(f'Likelihood that the sentence is grammatically correct: {likelihood}')

Likelihood that the sentence is grammatically correct: 0.70827716588974


In [46]:
# model.save("fine_tuned_model")  # Replace with the desired path
# model.save("fine_tuned_model.h5")  # Save as HDF5

# # Save the tokenizer configuration
# tokenizer_config = tokenizer.to_json()
# with open('tokenizer_config.json', 'w', encoding='utf-8') as f:
#     f.write(json.dumps(tokenizer_config, ensure_ascii=False))

In [30]:
# Step 7: Evaluate the performance on the provided test set
test_data = pd.read_csv('test.csv')
test_data = test_data.reset_index(drop=True)
test_sequences = tokenizer.texts_to_sequences(test_data['text'])
padded_test_sequences = pad_sequences(test_sequences, maxlen=max_len, padding='post')

predictions = model.predict(padded_test_sequences)



In [56]:
predictions

array([[0.6694774 ],
       [0.63453555],
       [0.5127208 ],
       [0.63622004],
       [0.6843329 ],
       [0.63132036],
       [0.4853956 ],
       [0.41443786],
       [0.70929575],
       [0.6081032 ],
       [0.63056993],
       [0.54238784],
       [0.6147578 ],
       [0.69747555],
       [0.61622953],
       [0.49770805],
       [0.5036871 ],
       [0.5813705 ],
       [0.4958618 ],
       [0.4916681 ],
       [0.67539823],
       [0.6843574 ],
       [0.56115746],
       [0.54830366],
       [0.5768231 ],
       [0.6539475 ],
       [0.71196574],
       [0.5291939 ],
       [0.48834005],
       [0.47858652],
       [0.4965896 ],
       [0.4732234 ],
       [0.70041144],
       [0.58699375],
       [0.57793266],
       [0.67583215],
       [0.7415329 ],
       [0.48808947],
       [0.55854195],
       [0.49009028],
       [0.48739198],
       [0.4674394 ],
       [0.48718944],
       [0.57253456],
       [0.48574626],
       [0.4985756 ],
       [0.48875254],
       [0.488

In [37]:
min_value = np.min(predictions)
max_value = np.max(predictions)
scaled_values = (predictions - min_value) / (max_value - min_value)

binary_predictions = np.where(scaled_values > 0.5, 1, 0)
y_true = test_data['label'].astype(int)

# Calculate precision and recall
precision = precision_score(y_true, binary_predictions)
recall = recall_score(y_true, binary_predictions)


# precision = precision_score(test_data['label'], predictions)
# recall = recall_score(test_data['label'], predictions)

print(f'Precision: {precision}, Recall: {recall}')

Precision: 0.7026022304832714, Recall: 0.5178082191780822


array([[1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
    

In [90]:
y_true.shape

(527,)

In [103]:
from transformers.data.processors.glue import ColaProcessor


In [126]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
import pandas as pd

# Load and preprocess the dataset
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

max_len = 50  # Adjust as needed
embedding_dim = 50  # Adjust as needed

# Tokenize and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['text'])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data['text']), maxlen=max_len, padding='post')
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data['text']), maxlen=max_len, padding='post')

# Labels
y_train = train_data['label']
y_test = test_data['label']

# RNN model
rnn_model = Sequential()
rnn_model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_dim, input_length=max_len))
rnn_model.add(SimpleRNN(64, activation='relu'))
rnn_model.add(Dense(64, activation='relu'))
rnn_model.add(Dropout(0.5))
rnn_model.add(Dense(1, activation='sigmoid'))

# optimizer = Adam(learning_rate=0.001)
# rnn_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
rnn_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


# Train the model
rnn_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate on test set
loss, accuracy = rnn_model.evaluate(X_test, y_test)
print(f'RNN Test Loss: {loss}, Test Accuracy: {accuracy}')

# Make predictions
rnn_predictions = rnn_model.predict(X_test)




Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
RNN Test Loss: 0.6170089840888977, Test Accuracy: 0.6925995945930481


In [130]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

def evaluate_sentence(sentence, model, tokenizer, max_len):
    # Tokenize and pad the input sentence
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post')

    # Print for debugging
    print(f'Sequence: {sequence}')
    print(f'Padded Sequence: {padded_sequence}')

    # Make prediction using the trained model
    likelihood = model.predict(padded_sequence)[0][0]

    # Print for debugging
    print(f'Likelihood: {likelihood}')

    return likelihood

# Example usage:
# Assuming 'model', 'tokenizer', and 'max_len' are already defined
sentence_to_evaluate = "she swims no to."
result = evaluate_sentence(sentence_to_evaluate, rnn_model, tokenizer, max_len)

# Print the result
print(f'Likelihood that the sentence is grammatically correct: {result}')



Sequence: [[34, 4468, 90, 2]]
Padded Sequence: [[  34 4468   90    2    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0]]
Likelihood: 0.6912722587585449
Likelihood that the sentence is grammatically correct: 0.6912722587585449


In [132]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
import pandas as pd

# Load and preprocess the dataset
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

max_len = 50  # Adjust as needed
embedding_dim = 50  # Adjust as needed

# Tokenize and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_data['text'])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data['text']), maxlen=max_len, padding='post')
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data['text']), maxlen=max_len, padding='post')

# Labels
y_train = train_data['label']
y_test = test_data['label']

# LSTM model
lstm_model = Sequential()
lstm_model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_dim, input_length=max_len))
lstm_model.add(LSTM(64, activation='relu'))
lstm_model.add(Dense(64, activation='relu'))
lstm_model.add(Dropout(0.5))
lstm_model.add(Dense(1, activation='sigmoid'))

lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
lstm_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate on test set
loss, accuracy = lstm_model.evaluate(X_test, y_test)
print(f'LSTM Test Loss: {loss}, Test Accuracy: {accuracy}')

# Make predictions
lstm_predictions = lstm_model.predict(X_test)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
LSTM Test Loss: 0.6173422336578369, Test Accuracy: 0.6925995945930481


In [152]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

def evaluate_sentence(sentence, model, tokenizer, max_len):
    # Tokenize and pad the input sentence
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post')

    # Print for debugging
    print(f'Sequence: {sequence}')
    print(f'Padded Sequence: {padded_sequence}')

    # Make prediction using the trained model
    likelihood = model.predict(padded_sequence)[0][0]

    # Print for debugging
    print(f'Likelihood: {likelihood}')

    return likelihood

# Example usage:
# Assuming 'model', 'tokenizer', and 'max_len' are already defined
sentence_to_evaluate = "I am swim more than dk."
result = evaluate_sentence(sentence_to_evaluate, lstm_model, tokenizer, max_len)

# Print the result
print(f'Likelihood that the sentence is grammatically correct: {result}')



Sequence: [[5, 200, 1978, 25, 48]]
Padded Sequence: [[   5  200 1978   25   48    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0]]
Likelihood: 0.704561173915863
Likelihood that the sentence is grammatically correct: 0.704561173915863
