In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

start_time=time.time()

train_df = pd.read_csv("totaltr.csv")
test_df = pd.read_csv("totalts.csv")


tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_df['input'])

# 转换文本为序列
train_df['input'] = tokenizer.texts_to_sequences(train_df['input'])
test_df['input'] = tokenizer.texts_to_sequences(test_df['input'])

X_train, X_val, y_train, y_val = train_test_split(
    train_df['input'],
    train_df['output'],
    test_size=0.1,
    random_state=42
)

X_train = pad_sequences(X_train, maxlen=100, dtype='float32')
X_val = pad_sequences(X_val, maxlen=100, dtype='float32')
X_test = pad_sequences(test_df['input'], maxlen=100, dtype='float32')

y_train = np.asarray(y_train).astype('float32').reshape((-1, 1))
y_val = np.asarray(y_val).astype('float32').reshape((-1, 1))
y_test = np.asarray(test_df['output']).astype('float32').reshape((-1, 1))

model = Sequential()
model.add(Embedding(len(tokenizer.index_word) + 1, input_length=100, output_dim=50))
model.add(Bidirectional(LSTM(100)))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer="adam", loss='binary_crossentropy', metrics=["accuracy"])

early_stop = EarlyStopping(monitor="val_loss", patience=5, verbose=True)

history = model.fit(X_train, y_train, batch_size=64, epochs=7,
                    validation_data=(X_val, y_val), callbacks=[early_stop])
model.save('lstm_model.keras')

y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')

print(f'Accuracy: {accuracy:.4f}')
print(f'F1-score: {f1:.4f}')
print(f'Recall: {recall:.4f}')
print(f'Precision: {precision:.4f}')
results = model.evaluate(X_test, y_test, batch_size=64)
print("result: ", results)

end_time=time.time()
total_time = end_time - start_time
print(f"Total runtime of the script: {total_time:.4f} seconds")
