In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import LSTM,GRU

data = pd.read_csv('output.csv')

In [3]:
def hash_to_features(hash_str):
    # Convert hex to integer representation, then normalize between 0 and 1
    return [int(char, 16) / 15.0 for char in hash_str]

data['SHA1_features'] = data['SHA1'].apply(hash_to_features)
data['MD5_features'] = data['MD5'].apply(hash_to_features)

X = np.hstack([data['SHA1_features'].to_list(), data['MD5_features'].to_list()])
y = data['Ransomware']

y = to_categorical(y, num_classes=2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
model = Sequential()
model.add(GRU(128, input_shape=(X_train.shape[1], 1)))  # GRU layer with 128 units
model.add(Dropout(0.5))  # Add dropout to reduce overfitting
model.add(Dense(64, activation='relu'))  # Fully connected layer with 64 units
model.add(Dense(32, activation='relu'))  # Fully connected layer with 32 units
model.add(Dense(2, activation='softmax')) 

In [7]:
# Define LSTM model
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1], 1)))  # LSTM layer
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [4]:
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5)) 
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))  

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6069 - loss: 0.6808 - val_accuracy: 0.6426 - val_loss: 0.6582
Epoch 2/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5886 - loss: 0.6796 - val_accuracy: 0.6426 - val_loss: 0.6556
Epoch 3/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6109 - loss: 0.6787 - val_accuracy: 0.6426 - val_loss: 0.6631
Epoch 4/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6097 - loss: 0.6649 - val_accuracy: 0.6426 - val_loss: 0.6605
Epoch 5/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6340 - loss: 0.6600 - val_accuracy: 0.6393 - val_loss: 0.6666
Epoch 6/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6404 - loss: 0.6565 - val_accuracy: 0.6426 - val_loss: 0.6540
Epoch 7/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x27a63ae7da0>

In [18]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 37ms/step - accuracy: 0.5811 - loss: 0.6845 - val_accuracy: 0.6426 - val_loss: 0.6548
Epoch 2/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.5955 - loss: 0.6681 - val_accuracy: 0.6426 - val_loss: 0.6454
Epoch 3/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.6242 - loss: 0.6540 - val_accuracy: 0.6426 - val_loss: 0.6510
Epoch 4/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.6196 - loss: 0.6503 - val_accuracy: 0.6426 - val_loss: 0.6424
Epoch 5/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.6085 - loss: 0.6564 - val_accuracy: 0.6426 - val_loss: 0.6380
Epoch 6/20
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.6376 - loss: 0.6401 - val_accuracy: 0.7049 - val_loss: 0.6409
Epoch 7/20
[1m39/39[0m [32m━━━━

<keras.src.callbacks.history.History at 0x27a63895c40>

In [19]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy}')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6967 - loss: 0.6184 
Test accuracy: 0.6950819492340088
