In [1]:
!pip install lime



In [2]:
import pandas as pd

train_df = pd.read_csv('finaldataset2.csv')


# -1 used as padding for missing values
train_df.fillna(-1, inplace=True)


def hex_to_dec(x):
    x_str = str(x).strip()
    
    if all(c in '0123456789abcdefABCDEF' for c in x_str):
            return int(x_str, 16)
    else:
        return int(float(x_str))  


feature_cols = train_df.columns[:-1]
for col in feature_cols:
    train_df[col] = train_df[col].apply(hex_to_dec)
    
    
    
insert_pos = train_df.shape[1] - 1  
train_df.insert(insert_pos, 'Padding1', -1)
train_df.insert(insert_pos + 1, 'Padding2', -1)



from sklearn.model_selection import train_test_split

X = train_df.drop('label', axis=1).values
y = train_df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


**packets in txt**

In [3]:
import pandas as pd

file_path = 'crash_packets.txt'

with open(file_path, 'r') as file:
    lines = file.readlines()

data = []
for line in lines:
    packets = line.strip().split()
    packet_ints = [int(packet[2:], 16) for packet in packets]
    data.append(packet_ints)

max_length = max(len(packet) for packet in data)

padded_data = [packet + [-1] * (max_length - len(packet)) for packet in data]

df = pd.DataFrame(padded_data)

In [4]:
data = df.to_numpy()

In [5]:
from keras.models import load_model

model = load_model('model.h5')

2024-06-21 12:09:24.825161: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-21 12:09:24.825309: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-21 12:09:24.976771: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [6]:
from lime import lime_tabular

import numpy as np

def predict_fn(x):
    preds = model.predict(x, verbose=0).astype(float)
    return np.hstack((1 - preds, preds))


explainer = lime_tabular.LimeTabularExplainer(X_train, 
    feature_names=[f'byte_{i+1}' for i in range(X_train.shape[1])],
    class_names=['class_0_Normal', 'class_1_UP_Normal'],
    mode='classification')


In [7]:
import numpy as np
from tqdm import tqdm
import pandas as pd

data = data.reshape(data.shape[0], -1)

test_indx_list = np.arange(data.shape[0])
test_dict = {}


for idx in tqdm(test_indx_list):
    try:
        exp = explainer.explain_instance(data[idx], predict_fn=predict_fn, num_features=19, num_samples=2500)
        a = exp.as_list()
        test_dict[idx] = a
    except Exception as e:
        print(f"Failed at index {idx}: {str(e)}")


100%|██████████| 20000/20000 [2:34:00<00:00,  2.16it/s]  


In [8]:
import numpy as np

num_packets = len(test_dict)
num_bytes = 19

lime_scores = np.zeros((num_packets, num_bytes))

for packet_idx, explanations in test_dict.items():
    for explanation in explanations:
        feature, score = explanation
        if 'byte' in feature:
            byte_index = int(feature.split('_')[1].split(' ')[0]) - 1
            lime_scores[packet_idx, byte_index] = score

print("LIME scores for the first packet:")
print(lime_scores[0])


LIME scores for the first packet:
[ 0.03082907  0.18077926  0.          0.          0.          0.
  0.01667288 -0.19063977 -0.03453494  0.54022133  0.51975811  0.120506
  0.          0.          0.          0.          0.          0.
  0.        ]


In [9]:
original_packets = np.copy(data)

In [16]:
def flip_all_bits(byte):
    return byte ^ 0xFF

def mutate_data(packets_with_label_one, lime_scores):
    flipped_packets_hex = []

    for i in range(len(packets_with_label_one)):
        max_lime_score = np.max(lime_scores[i])
        max_indices = np.where(lime_scores[i] == max_lime_score)[0]

        # No mutation for byte that is responsible for size of rest of data bytes
        for idx in max_indices:
            if idx == 5:
                continue
            byte_to_flip = packets_with_label_one[i, idx]
            flipped_byte = flip_all_bits(byte_to_flip)
            packets_with_label_one[i, idx] = flipped_byte
        
        hex_packet = ['0x' + format(byte, '02X') for byte in packets_with_label_one[i]]
        flipped_packets_hex.append(hex_packet)

    return flipped_packets_hex


In [17]:
mutated = mutate_data(data, lime_scores)
mutated_filtered = [['' if item == '0x-1' else item for item in sublist] for sublist in mutated]

df_packets_mutated= pd.DataFrame(mutated_filtered, columns=[f'Byte_{i+1}' for i in range(19)])
df_packets_mutated.to_csv('df_packets_mutated_new_final.csv', index=False, encoding='utf-8', float_format='%.2f')

In [18]:

df_packets_mutated.to_csv('df_packets_mutated_new_final.txt', 
                          sep=' ',       
                          index=False,   
                          header=False,
                          encoding='utf-8', 
                          float_format='%.2f')

print("Data saved to 'df_packets_mutated_new_final.txt'.")


Data saved to 'df_packets_mutated_new_final.txt'.


In [23]:
idx = 20
packet_flat = original_packets[idx].flatten()
flipped_packet_flat = data[idx].flatten()
lime_scores_flat = lime_scores[idx].flatten()

for packet_byte, lime_score, flipped in zip(packet_flat, lime_scores_flat, flipped_packet_flat):
    print(f"{packet_byte:02X}", ' : ', f"{lime_score:.8f}", ' : ', f"{flipped:02X}")


00  :  -0.01951087  :  00
15  :  0.17917474  :  15
00  :  0.00000000  :  00
00  :  0.00000000  :  00
00  :  0.00000000  :  00
06  :  0.00000000  :  06
01  :  0.00000000  :  01
01  :  0.03675229  :  01
00  :  -0.05937493  :  00
1C  :  0.54403118  :  E3
00  :  -0.09135977  :  00
1B  :  -0.52908913  :  1B
-1  :  0.00000000  :  -1
-1  :  0.00000000  :  -1
-1  :  0.00000000  :  -1
-1  :  0.00000000  :  -1
-1  :  0.00000000  :  -1
-1  :  0.00000000  :  -1
-1  :  0.00000000  :  -1


In [None]:
def return_packets_with_one(test_data):

    output_df_prediction_labels= model.predict(test_data.reshape((-1, 17, 1))).astype(int)
    
    output_df_prediction_labels_squeezed = np.squeeze(output_df_prediction_labels)
    output_df_combined = np.column_stack((test_data, output_df_prediction_labels_squeezed))


    one_label_indices = (output_df_combined[:, -1] == 1)
    packets_with_label_one = output_df_combined[one_label_indices]

    packets_with_label_one = packets_with_label_one[:, :-1]
    packets_with_label_one = packets_with_label_one.reshape(packets_with_label_one.shape[0], packets_with_label_one.shape[1], 1)
    
    
    return packets_with_label_one

In [48]:
packets_with_label_one = return_packets_with_one(data)

print("Shape of UpNormal data: ", packets_with_label_one.shape)

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
Shape of UpNormal data:  (11193, 19, 1)


In [None]:
import numpy as np
from tqdm import tqdm
import pandas as pd

packets_with_label_one = packets_with_label_one.reshape(packets_with_label_one.shape[0], -1)

test_indx_list = np.arange(packets_with_label_one.shape[0])
test_dict = {}

for idx in tqdm(test_indx_list):
    try:
        exp = explainer.explain_instance(packets_with_label_one[idx], predict_fn=predict_fn, num_features=19, num_samples=2500)
        a = exp.as_list()
        test_dict[idx] = a
    except Exception as e:
        print(f"Failed at index {idx}: {str(e)}")


In [None]:
mutated_abnormal = mutate_data(packets_with_label_one, lime_scores)
mutated_filtered = [['' if item == '0x-1' else item for item in sublist] for sublist in mutated_abnormal]

df_packets_mutated_abnormal= pd.DataFrame(mutated_filtered, columns=[f'Byte_{i+1}' for i in range(19)])

df_packets_mutated_abnormal.to_csv('df_packets_mutated_abnormal_new_final.txt', 
                          sep=' ',       
                          index=False,   
                          header=False,
                          encoding='utf-8', 
                          float_format='%.2f')

print("Data saved to 'df_packets_mutated_abnormal_new_final.txt'.")
