In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

df = pd.read_csv('/content/text_data.csv')

texts = df['text'].tolist()
labels = df['label'].tolist()

max_words = 1000
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

max_sequence_length = 20
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post', truncating='post')

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

print("Preprocessing complete.")
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of X_test: {X_test.shape}")
print(f"Length of y_train: {len(y_train)}")
print(f"Length of y_test: {len(y_test)}")

Preprocessing complete.
Shape of X_train: (40, 20)
Shape of X_test: (10, 20)
Length of y_train: 40
Length of y_test: 10


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, GlobalAveragePooling1D

vocab_size = len(tokenizer.word_index) + 1

embedding_dim = 16

model = Sequential([
    Embedding(input_dim = vocab_size,
              output_dim = embedding_dim,
              input_length = max_sequence_length),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.summary()



In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

y_train = np.array(y_train)
y_test = np.array(y_test)

history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 261ms/step - accuracy: 0.4667 - loss: 0.6935 - val_accuracy: 0.4000 - val_loss: 0.6933
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.5875 - loss: 0.6923 - val_accuracy: 0.4000 - val_loss: 0.6944
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.5167 - loss: 0.6911 - val_accuracy: 0.4000 - val_loss: 0.6957
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.5271 - loss: 0.6901 - val_accuracy: 0.4000 - val_loss: 0.6966
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.5375 - loss: 0.6888 - val_accuracy: 0.4000 - val_loss: 0.6970
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.5167 - loss: 0.6885 - val_accuracy: 0.4000 - val_loss: 0.6971
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━