In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

In [3]:
data = pd.read_csv('/content/drive/MyDrive/americanExpress/fixed_train.csv')

# Preprocess the data
X = data['prompt'] + " " + data['utterance']
y = data['label']

# Tokenize the text
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(X)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(X)

# Pad sequences to ensure uniform length
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

In [4]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, y, test_size=0.2, random_state=42)

In [5]:
model = Sequential([
    Embedding(len(word_index) + 1, 100, input_length=max_sequence_length),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [6]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=2)

Epoch 1/10
2105/2105 - 586s - loss: -2.0083e+05 - accuracy: 0.0270 - val_loss: -5.6747e+05 - val_accuracy: 0.0269 - 586s/epoch - 278ms/step
Epoch 2/10
2105/2105 - 559s - loss: -1.2013e+06 - accuracy: 0.0270 - val_loss: -1.9318e+06 - val_accuracy: 0.0269 - 559s/epoch - 265ms/step
Epoch 3/10
2105/2105 - 547s - loss: -2.8805e+06 - accuracy: 0.0270 - val_loss: -3.8934e+06 - val_accuracy: 0.0269 - 547s/epoch - 260ms/step
Epoch 4/10
2105/2105 - 565s - loss: -5.1328e+06 - accuracy: 0.0270 - val_loss: -6.4093e+06 - val_accuracy: 0.0269 - 565s/epoch - 269ms/step
Epoch 5/10
2105/2105 - 548s - loss: -7.9305e+06 - accuracy: 0.0270 - val_loss: -9.4574e+06 - val_accuracy: 0.0269 - 548s/epoch - 260ms/step
Epoch 6/10
2105/2105 - 562s - loss: -1.1263e+07 - accuracy: 0.0270 - val_loss: -1.3038e+07 - val_accuracy: 0.0269 - 562s/epoch - 267ms/step
Epoch 7/10
2105/2105 - 558s - loss: -1.5125e+07 - accuracy: 0.0270 - val_loss: -1.7137e+07 - val_accuracy: 0.0269 - 558s/epoch - 265ms/step
Epoch 8/10
2105/2105

In [7]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Accuracy: {test_accuracy}')

Test Accuracy: 0.026909824460744858
