In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import tensorflow as tf
tf.data.experimental.enable_debug_mode()
tf.config.run_functions_eagerly(True)
tf.get_logger().setLevel(tf.compat.v1.logging.DEBUG)
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Embedding, GRU, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import keras.backend as K
from keras.optimizers import Adam

from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from keras.models import load_model
from keras.utils import to_categorical
from numpy import dstack



import pickle
from gensim.models import Word2Vec

def load_files(directory):
    file_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".c"):
                file_path = os.path.join(root, file)
                with open(file_path, "r") as file_content:
                    file_text = file_content.read()
                file_list.append(file_text)
    return file_list
# Load training and test files for vulnerable and non-vulnerable classes
def load_data():
    dataset_dir = '/home/shaurya/BTP/Dataset/Code/C'
    vulnerable_dir = os.path.join(dataset_dir, 'Vulnerable')
    non_vulnerable_dir = os.path.join(dataset_dir, 'Non_vulnerable')

    vulnerable_train_files = load_files(os.path.join(vulnerable_dir, 'TRAIN'))
    vulnerable_test_files = load_files(os.path.join(vulnerable_dir, 'TEST'))
    non_vulnerable_train_files = load_files(os.path.join(non_vulnerable_dir, 'TRAIN'))
    non_vulnerable_test_files = load_files(os.path.join(non_vulnerable_dir, 'TEST'))

    train_codes = vulnerable_train_files + non_vulnerable_train_files
    test_codes = vulnerable_test_files + non_vulnerable_test_files

    labels = np.concatenate((np.ones(len(vulnerable_train_files)), np.zeros(len(non_vulnerable_train_files))), axis=0)

    X_train, X_test, y_train, y_test = train_test_split(train_codes, labels, test_size=0.3, random_state=42)

    return X_train, X_test, y_train, y_test, test_codes


def tokenize_sequences(X_train, X_test, test_codes):
    tokenizer = Tokenizer(num_words=10000, split='\n')
    tokenizer.fit_on_texts(X_train)
    X_train_sequences = tokenizer.texts_to_sequences(X_train)
    X_test_sequences = tokenizer.texts_to_sequences(X_test)
    test_sequences = tokenizer.texts_to_sequences(test_codes)

    average_sequence_length = int(np.mean([len(seq) for seq in X_train_sequences]))

    X_train_padded = pad_sequences(X_train_sequences, maxlen=average_sequence_length)
    X_test_padded = pad_sequences(X_test_sequences, maxlen=average_sequence_length)
    test_padded = pad_sequences(test_sequences, maxlen=average_sequence_length)

    return X_train_padded, X_test_padded, test_padded, tokenizer.word_index



2024-01-24 10:21:34.428862: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-24 10:21:34.578578: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-24 10:21:35.307448: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/shaurya/miniconda3/envs/tf/lib/
2024-01-24 10:21:35.307546: W tensorflow/compile

In [2]:
import os
from keras.models import load_model

def load_all_models():
    all_models = list()
    
    folder_path = '/home/shaurya/BTP/Outputs/Models/LSTM/word2vec'
    model_files = [f for f in os.listdir(folder_path) if f.endswith('.keras')]
    
    for filename in model_files:
        model_path = os.path.join(folder_path, filename)
        model = load_model(model_path)
        all_models.append(model)
        
        print(f'> Loaded {model_path}')
    
    return all_models
def stacked_dataset(members, inputX):
    stackX = None
    for model in members:
        # make prediction
        yhat = model.predict(inputX, verbose=0)
        # stack predictions into [rows, members, probabilities]
        if stackX is None:
            stackX = yhat
        else:
            stackX = dstack((stackX, yhat))
    # flatten predictions to [rows, members x probabilities]
    stackX = stackX.reshape((stackX.shape[0], stackX.shape[1]*stackX.shape[2]))
    return stackX


def fit_stacked_model(members, inputX, inputy):
    # create dataset using ensemble
    stackedX = stacked_dataset(members, inputX)
    # fit standalone model
    model = LogisticRegression()
    model.fit(stackedX, inputy)
    return model

# make a prediction with the stacked model
def stacked_prediction(members, model, inputX):
    # create dataset using ensemble
    stackedX = stacked_dataset(members, inputX)
    # make a prediction
    yhat = model.predict(stackedX)
    return yhat

X_train, X_test, y_train, y_test, test_codes = load_data()

print('Model Accuracy:')
# Tokenize the sequences
X_train_padded, X_test_padded, test_padded, word_index = tokenize_sequences(X_train, X_test, test_codes)

members = load_all_models()
for model in members:
#     testy_enc = to_categorical(y_test)
    _, acc = model.evaluate(X_test_padded, y_test, verbose=0)
    print('Model Accuracy: %.3f' % acc)
# fit stacked model using the ensemble
model = fit_stacked_model(members, X_test_padded, y_test)
# evaluate model on test set
yhat = stacked_prediction(members, model, X_test_padded)
acc = accuracy_score(y_test, yhat)
print('Stacked Test Accuracy: %.3f' % acc)


Model Accuracy:


2024-01-24 09:47:18.428501: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-24 09:47:23.429250: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6748 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:1b:00.0, compute capability: 6.1
2024-01-24 09:47:23.430173: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 3946 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:1d:00.0, compute capability: 6.1
2024-01-24 09:47:23.431024: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:loc

DEBUG:tensorflow:Layer gru_1 will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/GRU_w2v.keras
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/BLSTM_w2v.keras
DEBUG:tensorflow:Layer LSTM_W2V_1 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer LSTM_W2V_2 will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/LSTM_w2v.keras
DEBUG:tensorflow:Layer lstm will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm will use cuDNN

2024-01-24 09:47:28.021941: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100


Model Accuracy: 0.970
Model Accuracy: 0.964
Model Accuracy: 0.968
Model Accuracy: 0.949
Model Accuracy: 0.949
Model Accuracy: 0.954
Stacked Test Accuracy: 0.975


#Integerated

In [4]:
import os
from keras.models import load_model
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input, Dense, concatenate
from numpy import argmax
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from numpy import dstack

import random
import string

def generate_random_name():
    return ''.join(random.choices(string.ascii_letters + string.digits, k=10))

def rename_nested_layers(model, filename, model_index):
    for layer in model.layers:
        original_name = layer.name
        random_name = f"{filename}_{model_index}_{generate_random_name()}"
        layer._name = random_name

        if hasattr(layer, 'layers'):  # Check for nested layers
            rename_nested_layers(layer, filename, model_index)  # Recursive call

# The rest of your code remains unchanged

def load_all_models():
    all_models = list()

    folder_path = '/home/shaurya/BTP/Outputs/Models/LSTM/word2vec'
    model_files = [f for f in os.listdir(folder_path) if f.endswith('.keras')]

    for model_index, filename in enumerate(model_files):
        model_path = os.path.join(folder_path, filename)
        model = load_model(model_path)

        # Rename all layers recursively
#         rename_nested_layers(model, filename, model_index)

        all_models.append(model)
        print(f'> Loaded {model_path}')

    return all_models

def define_stacked_model(members):
    # Update all layers in all models to not be trainable
    for i, model in enumerate(members):
        for layer in model.layers:
            # Make not trainable
            layer.trainable = False
            # Rename to avoid 'unique layer name' issue
#             layer._name = 'ensemble_' + str(i + 1) + '_' + layer.name

        # Rename the input layer
#         model._name = 'ensemble_' + str(i + 1)

    # Define multi-headed input with unique names
    ensemble_visible = [model.input for model in members]

    # Concatenate merge output from each model
    ensemble_outputs = [model.output for model in members]
    merge = concatenate(ensemble_outputs)

    hidden = Dense(10, activation='relu')(merge)
    output = Dense(2, activation='softmax')(hidden)

    model = Model(inputs=ensemble_visible, outputs=output)

    # Plot graph of ensemble
#     plot_model(model, show_shapes=True, to_file='model_graph.png')

    # Compile
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

def fit_stacked_model(model, inputX, inputy):
    # prepare input data
    X = [inputX for _ in range(len(model.input))]
    
    # encode output data
    inputy_enc = to_categorical(inputy)
    
    # fit model
    model.fit(X, inputy_enc, epochs=300, verbose=0)

def predict_stacked_model(model, inputX):
    # prepare input data
    X = [inputX for _ in range(len(model.input))]
    
    # make prediction
    return model.predict(X, verbose=0)

X_train, X_test, y_train, y_test, test_codes = load_data()

# Tokenize the sequences
X_train_padded, X_test_padded, test_padded, word_index = tokenize_sequences(X_train, X_test, test_codes)

members = load_all_models()

# for model in members:
#     _, acc = model.evaluate(X_test_padded, y_test, verbose=0)
#     print('Model Accuracy: %.3f' % acc)

# define ensemble model
stacked_model = define_stacked_model(members)

# fit stacked model on test dataset
fit_stacked_model(stacked_model, X_test_padded, y_test)

# make predictions and evaluate
yhat = predict_stacked_model(stacked_model, X_test_padded)
yhat = argmax(yhat, axis=1)
acc = accuracy_score(y_test, yhat)
print('Stacked Test Accuracy: %.3f' % acc)


DEBUG:tensorflow:Layer GRU_W2V will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/GRU_w2v.keras
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/BLSTM_w2v.keras
DEBUG:tensorflow:Layer LSTM_W2V_1 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer LSTM_W2V_2 will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/LSTM_w2v.keras
DEBUG:tensorflow:Layer lstm will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm will use cuD

2024-01-24 10:23:13.024889: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8100


Stacked Test Accuracy: 0.975


In [3]:
def load_all_models():
    all_models = list()

    folder_path = '/home/shaurya/BTP/Outputs/Models/LSTM/word2vec'
    model_files = [f for f in os.listdir(folder_path) if f.endswith('.keras')]

    for model_index, filename in enumerate(model_files):
        model_path = os.path.join(folder_path, filename)
        model = load_model(model_path)

        # Rename all layers recursively
#         rename_nested_layers(model, filename, model_index)

        all_models.append(model)
        print(f'> Loaded {model_path}')

    return all_models

def list_models_and_layers(members):
    for i, model in enumerate(members):
        print(f"Model {i + 1}:")
        print_layer_names(model)
#         print(f"  Layers:")
#         for layer in model.layers:
#             print(f"    {layer.name}")
#         print("")

def print_layer_names(model, indent=0):
    for layer in model.layers:
        print(" " * indent + layer.name)
        if hasattr(layer, 'layers') and layer.layers:
            print_layer_names(layer, indent + 4)

# Assuming members is a list of loaded models
members = load_all_models()  # Replace with your actual loading mechanism
list_models_and_layers(members)


2024-01-24 10:22:33.316552: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-24 10:22:39.166754: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6748 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:1b:00.0, compute capability: 6.1
2024-01-24 10:22:39.167709: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 3946 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:1d:00.0, compute capability: 6.1
2024-01-24 10:22:39.168554: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:loc

DEBUG:tensorflow:Layer GRU_W2V will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/GRU_w2v.keras
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_2 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm_3 will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/BLSTM_w2v.keras
DEBUG:tensorflow:Layer LSTM_W2V_1 will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer LSTM_W2V_2 will use cuDNN kernels when running on GPU.
> Loaded /home/shaurya/BTP/Outputs/Models/LSTM/word2vec/LSTM_w2v.keras
DEBUG:tensorflow:Layer lstm will use cuDNN kernels when running on GPU.
DEBUG:tensorflow:Layer lstm will use cuD