In [1]:
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np

# load the dataset
#data = ... # load your dataset here

# split the dataset into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# create a tokenizer to convert the text into sequences of integers
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_data['text'])

# convert the text to sequences of integers
train_sequences = tokenizer.texts_to_sequences(train_data['text'])
test_sequences = tokenizer.texts_to_sequences(test_data['text'])

# pad the sequences to make them of equal length
max_length = 100
train_data = pad_sequences(train_sequences, maxlen=max_length)
test_data = pad_sequences(test_sequences, maxlen=max_length)

# create the LSTM model
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=32, input_length=max_length))
model.add(LSTM(units=64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=1, activation='sigmoid'))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# train the model
model.fit(train_data, train_data['sentiment'], validation_data=(test_data, test_data['sentiment']), epochs=5, batch_size=32)

# evaluate the model on the test set
loss, accuracy = model.evaluate(test_data, test_data['sentiment'], verbose=0)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')


KeyboardInterrupt: 