In [None]:
import pandas as pd
import numpy as np
import sklearn
import torch
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional, LeakyReLU, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split

In [None]:
class BLSTM:
    def __init__(self, data_path, name="graph2vec_BLSTM", batch_size=64, epochs=20):
        self.name = name
        self.batch_size = batch_size
        self.epochs = epochs

        # Load and preprocess data
        self._load_data(data_path)

        # Build and compile model
        self.model = self._build_model()

    def _load_data(self, data_path):
        data = pd.read_csv(data_path)  # Load graph2vec features
        print("Dataset Preview:")
        print(data.head())

        indices = data['type'].values
        y = data.iloc[:, 4]
        y_labels = y.iloc[indices]
        
        X = data.drop(columns="type")
        
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y_labels, test_size=0.2, random_state=42)
        print("Train data shape:", self.X_train.shape)
        print("Test data shape:", self.X_test.shape)
        

    def _build_model(self):
        model = Sequential()
        model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(self.X_train.shape[1], self.X_train.shape[2])))
        model.add(Dropout(0.5))
        model.add(BatchNormalization())
        model.add(Bidirectional(LSTM(64)))
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.3))
        model.add(Dense(1, activation='sigmoid'))

        model.compile(optimizer=Adam(learning_rate=0.02), loss='binary_crossentropy', metrics=['accuracy', 'AUC'])
        return model

    def train(self):
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        model_checkpoint = ModelCheckpoint(self.name + "_best_model.weights.keras", save_best_only=True, monitor='val_loss')

        history = self.model.fit(
            self.X_train, self.y_train,
            validation_data=(self.X_test, self.y_test),
            batch_size=self.batch_size,
            epochs=self.epochs,
            callbacks=[early_stopping, model_checkpoint]
        )

        self._plot_learning_curve(history)

    def _plot_learning_curve(self, history):
        plt.figure(figsize=(10, 6))
        plt.plot(history.history['accuracy'], label='Train Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.ylim(0, 1)
        plt.title('Model Accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(loc='upper left')
        plt.show()

        plt.figure(figsize=(10, 6))
        plt.plot(history.history['loss'], label='Train Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.ylim(0, 2)
        plt.title('Model Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(loc='upper left')
        plt.show()

    def test(self):
        self.model.load_weights(self.name + "_best_model.weights.keras")
        results = self.model.evaluate(self.X_test, self.y_test)
        print("Test loss:", results[0])
        print("Test accuracy:", results[1])
        print("Test AUC:", results[2])

# Usage:
# blstm = BLSTM("graph2vec_features.csv")
# blstm.train()
# blstm.test()