In [None]:
#Importing necessary libraries

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from tensorflow.keras.layers import Input, LSTM, Attention, Flatten, Dense, Bidirectional
from tensorflow.keras.models import Model


def LSTM_Attention_model (X_train, y_train, epochs=10, batch_size=64, verbose=1) :
    inputs = Input(shape=(X_train.shape[1],X_train.shape[-1]))
    lstm_out = LSTM (128, return_sequences=True) (inputs)
    attention_out = Attention () ([lstm_out, lstm_out])
    lstm2 = LSTM(64, return_sequences=True) (attention_out)
    flattened_out = Flatten () (lstm2)
    output2 = Dense (25, activation='sigmoid') (flattened_out)
    output = Dense (1, activation='sigmoid') (output2)

    model = Model (inputs=inputs, outputs=output)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics= ['accuracy'])
    model.fit (X_train, y_train, epochs=epochs, batch_size=batch_size,verbose=1)
    return model

#Import the final dataset

df0=pd.read_csv('finaldata.csv')
df0.fillna(0, inplace=True)
print(df0.shape)

from sklearn.utils import resample


# Divide the majority class and minority class
majority_class = df0[df0['OS_P'] == 1]
minority_class = df0[df0['OS_P'] == 0]

# Randomly sample the data in majority calss  such that the samples in it is 3 times of the ones in minority class
n_samples = len(minority_class)
majority_downsampled = resample(majority_class, replace=False, n_samples=n_samples, random_state=42)

# Combine the two classes od data
df = pd.concat([majority_downsampled, minority_class])

print(df.shape)

# Split the dataset into features (X) and the target variable (y)

X = df.drop('OS_P', axis=1).values
y = df['OS_P'].values

#Adjust the data into the appropriate scale

scaler = MinMaxScaler(feature_range=(0,1))
X_for_scaled = scaler.fit_transform(X)

#Split the dataset into training set and test set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.005, random_state=42)

#Adjust the data format into the the required one 

X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Construct the LSTM model

model = Sequential()
model.add(LSTM(100, input_shape=(1,X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam',metrics=['accuracy'])

md = LSTM_Attention_model (X_train, y_train, epochs=10, batch_size=64, verbose=1)
print(md)

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Use the LSTM model to generate features
X_train_features = md.predict(X_train)
X_test_features = md.predict(X_test)

# Define the RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the RandomForestClassifier using the features generated by the LSTM model
rf.fit(X_train_features, y_train)

# Predict the classes
y_pred = rf.predict(X_test_features)



from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Compute the accuracy
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score (y_test, y_pred)
binary_f1 = f1_score (y_test, y_pred)
# Step 5: AUC
auc = roc_auc_score (y_test, y_pred)

print('Accuracy：{:.4f}'.format(accuracy))
print('Precision：{:.4f}'.format(precision))
print('Recall：{:.4f}'.format(recall))
print('F1：{:.4f}'.format(binary_f1))
print ('AUC: {:.4f}'.format (auc))

(839702, 152)
(140196, 152)
Epoch 1/10


  super().__init__(**kwargs)


[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.5406 - loss: 0.6901
Epoch 2/10
[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5501 - loss: 0.6853
Epoch 3/10
[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5475 - loss: 0.6854
Epoch 4/10
[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5422 - loss: 0.6872
Epoch 5/10
[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5255 - loss: 0.6891
Epoch 6/10
[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.5368 - loss: 0.6886
Epoch 7/10
[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5447 - loss: 0.6873
Epoch 8/10
[1m2180/2180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.5329 - loss: 0.6872
Epoch 9/10
[1m2180/2180[0m [32m━



[1m4360/4360[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 430us/step
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 771us/step
Confusion Matrix:
[[ 64 268]
 [ 39 330]]
准确率：0.5621
精确率：0.5518
召回率：0.8943
二分类F1值：0.6825
AUC: 0.5435


In [2]:
def biLSTM_Attention_model (X_train, y_train, epochs=10, batch_size=64, verbose=1) :
    inputs = Input(shape=(X_train.shape[1],X_train.shape[-1]))
    bilstm_out =  Bidirectional(LSTM(128,return_sequences=True))(inputs)
    attention_out = Attention () ([bilstm_out, bilstm_out])
    bilstm2 = Bidirectional(LSTM(64, return_sequences=True))(attention_out)
    flattened_out = Flatten () (bilstm2)
    output2 = Dense (25, activation='sigmoid') (flattened_out)
    output = Dense (1, activation='sigmoid') (output2)

    model = Model (inputs=inputs, outputs=output)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics= ['accuracy'])
    model.fit (X_train, y_train, epochs=epochs, batch_size=batch_size,verbose=1)
    return model