In [None]:
from google.colab import drive
drive.mount('/content/drive/')

import pandas as pd

# Reading the csv file
df = pd.read_csv('/content/drive/MyDrive/RansomwareData.csv')


Mounted at /content/drive/


In [None]:
X = df.drop(df.columns[0], axis=1)
X = X.drop(df.columns[1], axis=1)
X = X.drop(df.columns[2], axis=1)
y = df[df.columns[2]]

y_binary = df[df.columns[1]]

# Create group labels
def convert_to_group(label):
    if 1 <= label <= 3:
        return 1
    elif 4 <= label <= 6:
        return 2
    elif 7 <= label <= 9:
        return 3
    elif 10 <= label <= 12:
        return 4
    else:
        return 0  # Assuming 0 is for goodware

y_group = y.apply(convert_to_group)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test, y_train_binary, y_test_binary, y_train_group, y_test_group = train_test_split(X, y, y_binary, y_group, test_size=0.2, random_state=42)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, MultiHeadAttention, LayerNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

# Define the TransformerBlock class
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"), Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1, training=training)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Input layer
input_layer = Input(shape=(X_train.shape[1], 1))

# Shared layers
x = Conv1D(filters=256, kernel_size=2, activation='relu')(input_layer)
x = MaxPooling1D(pool_size=2)(x)
x = Conv1D(filters=128, kernel_size=2, activation='relu')(x)
x = MaxPooling1D(pool_size=2)(x)
x = TransformerBlock(embed_dim=128, num_heads=4, ff_dim=256)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dense(100, activation='relu')(x)
x = Dense(64, activation='relu')(x)

# Binary Classification Head (Goodware vs Malicious)
binary_output = Dense(1, activation='sigmoid', name='binary_output')(x)

# Specific Classification Head
specific_output = Dense(12, activation='softmax', name='specific_output')(x)

# Create the model
model = Model(inputs=input_layer, outputs=[binary_output, specific_output])

from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
from tensorflow.keras import backend as K

def custom_precision(y_true, y_pred):
    y_true = K.cast(y_true, 'float32')
    y_pred = K.cast(y_pred, 'float32')
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

# Custom recall metric for multi-class classification
def custom_recall(y_true, y_pred):
    y_true = K.cast(y_true, 'float32')
    y_pred = K.cast(y_pred, 'float32')
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

# Custom F1 score metric for multi-class classification
def custom_f1(y_true, y_pred):
    precision = custom_precision(y_true, y_pred)
    recall = custom_recall(y_true, y_pred)
    f1 = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return f1

# Compile the model
model.compile(optimizer='adam',
              loss={'binary_output': 'binary_crossentropy', 'specific_output': 'categorical_crossentropy'},
              metrics={'binary_output': ['accuracy', Precision(), Recall(), custom_f1],
                  'specific_output': [CategoricalAccuracy(), custom_precision, custom_recall, custom_f1]})

# Summary of the model
model.summary()

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Mapping original labels to group labels
# Assuming original labels are in the range [0, 11]
label_to_group = {
    0: 0,  # Goodware
    1: 1,  # Critroni
    2: 1,  # CryptLocker
    3: 1,  # CryptoWall
    4: 2,  # KOLLAH
    5: 2,  # Kovter
    6: 2,  # Locker
    7: 3,  # MATSNU
    8: 3,  # PGPCODER
    9: 3,  # Reveton
    10: 3,  # TeslaCrypt
    11: 3,  # Trojan-Ransom
}

y_train_group = y_train.map(label_to_group)
y_test_group = y_test.map(label_to_group)

# One-hot encode the group and specific labels
y_train_categorical = to_categorical(y_train, num_classes=12)
y_test_categorical = to_categorical(y_test, num_classes=12)

# Train the model
history = model.fit(X_train,
                    {'binary_output': y_train_binary, 'specific_output': y_train_categorical},
                    epochs=15,
                    batch_size=4,
                    validation_data=(X_test, {'binary_output': y_test_binary, 'specific_output': y_test_categorical}),
                    callbacks=[early_stopping])


Epoch 1/15
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3845s[0m 13s/step - binary_output_accuracy: 0.5349 - binary_output_custom_f1: 0.2136 - binary_output_precision_1: 0.3852 - binary_output_recall_1: 0.4065 - loss: 159.4024 - specific_output_categorical_accuracy: 0.3799 - specific_output_custom_f1: 0.3804 - specific_output_custom_precision: 0.3821 - specific_output_custom_recall: 0.3799 - val_binary_output_accuracy: 0.5082 - val_binary_output_custom_f1: 0.5521 - val_binary_output_precision_1: 0.4382 - val_binary_output_recall_1: 1.0000 - val_loss: 13.2575 - val_specific_output_categorical_accuracy: 0.4262 - val_specific_output_custom_f1: 0.4538 - val_specific_output_custom_precision: 0.4946 - val_specific_output_custom_recall: 0.4253
Epoch 2/15
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3795s[0m 12s/step - binary_output_accuracy: 0.7403 - binary_output_custom_f1: 0.4768 - binary_output_precision_1: 0.6496 - binary_output_recall_1: 0.6793 - loss: 8.087

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Generate predictions
predictions = model.predict(X_test)

# Binary predictions and metrics
binary_predictions = (predictions[0] > 0.5).astype(int)  # Convert probabilities to 0 or 1
binary_accuracy = accuracy_score(y_test_binary, binary_predictions)
binary_precision = precision_score(y_test_binary, binary_predictions)
binary_recall = recall_score(y_test_binary, binary_predictions)
binary_f1 = f1_score(y_test_binary, binary_predictions)

# Specific predictions and metrics
specific_predictions = np.argmax(predictions[1], axis=1)
specific_accuracy = accuracy_score(y_test, specific_predictions)
specific_precision = precision_score(y_test, specific_predictions, average='macro')
specific_recall = recall_score(y_test, specific_predictions, average='macro')
specific_f1 = f1_score(y_test, specific_predictions, average='macro')

# Print the results
print(f"Binary Classification Metrics:\n"
      f"Accuracy: {binary_accuracy:.4f}\n"
      f"Precision: {binary_precision:.4f}\n"
      f"Recall: {binary_recall:.4f}\n"
      f"F1 Score: {binary_f1:.4f}")

print(f"\nSpecific Classification Metrics:\n"
      f"Accuracy: {specific_accuracy:.4f}\n"
      f"Precision: {specific_precision:.4f}\n"
      f"Recall: {specific_recall:.4f}\n"
      f"F1 Score: {specific_f1:.4f}")

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, MultiHeadAttention, LayerNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

# Define the TransformerBlock class
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential(
            [Dense(ff_dim, activation="relu"), Dense(embed_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1, training=training)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Input layer
input_layer = Input(shape=(X_train.shape[1], 1))

# Shared layers
x = Conv1D(filters=256, kernel_size=2, activation='relu')(input_layer)
x = MaxPooling1D(pool_size=2)(x)
x = Conv1D(filters=128, kernel_size=2, activation='relu')(x)
x = MaxPooling1D(pool_size=2)(x)
x = TransformerBlock(embed_dim=128, num_heads=4, ff_dim=256)(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dense(100, activation='relu')(x)
x = Dense(64, activation='relu')(x)

# Binary Classification Head (Goodware vs Malicious)
binary_output = Dense(1, activation='sigmoid', name='binary_output')(x)

# Specific Classification Head
specific_output = Dense(12, activation='softmax', name='specific_output')(x)

# Create the model
model = Model(inputs=input_layer, outputs=[binary_output, specific_output])

# Compile the model
model.compile(optimizer='adam',
              loss={'binary_output': 'binary_crossentropy', 'specific_output': 'categorical_crossentropy'},
              metrics={'binary_output': 'accuracy', 'specific_output': 'accuracy'})

# Summary of the model
model.summary()

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Mapping original labels to group labels
# Assuming original labels are in the range [0, 11]
label_to_group = {
    0: 0,  # Goodware
    1: 1,  # Critroni
    2: 1,  # CryptLocker
    3: 1,  # CryptoWall
    4: 2,  # KOLLAH
    5: 2,  # Kovter
    6: 2,  # Locker
    7: 3,  # MATSNU
    8: 3,  # PGPCODER
    9: 3,  # Reveton
    10: 3,  # TeslaCrypt
    11: 3,  # Trojan-Ransom
}

y_train_group = y_train.map(label_to_group)
y_test_group = y_test.map(label_to_group)

# One-hot encode the group and specific labels
y_train_categorical = to_categorical(y_train, num_classes=12)
y_test_categorical = to_categorical(y_test, num_classes=12)

# Train the model
history = model.fit(X_train,
                    {'binary_output': y_train_binary, 'specific_output': y_train_categorical},
                    epochs=50,
                    batch_size=4,
                    validation_data=(X_test, {'binary_output': y_test_binary, 'specific_output': y_test_categorical}),
                    callbacks=[early_stopping])


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Generate predictions
predictions = model.predict(X_test)

# Binary predictions and metrics
binary_predictions = (predictions[0] > 0.5).astype(int)  # Convert probabilities to 0 or 1
binary_accuracy = accuracy_score(y_test_binary, binary_predictions)
binary_precision = precision_score(y_test_binary, binary_predictions)
binary_recall = recall_score(y_test_binary, binary_predictions)
binary_f1 = f1_score(y_test_binary, binary_predictions)

# Specific predictions and metrics
specific_predictions = np.argmax(predictions[1], axis=1)
specific_accuracy = accuracy_score(y_test, specific_predictions)
specific_precision = precision_score(y_test, specific_predictions, average='macro')
specific_recall = recall_score(y_test, specific_predictions, average='macro')
specific_f1 = f1_score(y_test, specific_predictions, average='macro')

# Print the results
print(f"Binary Classification Metrics:\n"
      f"Accuracy: {binary_accuracy:.4f}\n"
      f"Precision: {binary_precision:.4f}\n"
      f"Recall: {binary_recall:.4f}\n"
      f"F1 Score: {binary_f1:.4f}")

print(f"\nSpecific Classification Metrics:\n"
      f"Accuracy: {specific_accuracy:.4f}\n"
      f"Precision: {specific_precision:.4f}\n"
      f"Recall: {specific_recall:.4f}\n"
      f"F1 Score: {specific_f1:.4f}")

In [None]:
import matplotlib.pyplot as plt

# Extract the history for each output
binary_loss = history.history['binary_output_loss']
val_binary_loss = history.history['val_binary_output_loss']
specific_loss = history.history['specific_output_loss']
val_specific_loss = history.history['val_specific_output_loss']

binary_acc = history.history['binary_output_accuracy']
val_binary_acc = history.history['val_binary_output_accuracy']
specific_acc = history.history['specific_output_accuracy']
val_specific_acc = history.history['val_specific_output_accuracy']

# Plot loss
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
plt.plot(binary_loss, linestyle='--', label='Binary Output Loss')
plt.plot(val_binary_loss, label='Val Binary Output Loss')
plt.plot(specific_loss, linestyle='--', label='Specific Output Loss')
plt.plot(val_specific_loss, label='Val Specific Output Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot accuracy
plt.subplot(2, 1, 2)
plt.plot(binary_acc, linestyle='--', label='Binary Output Accuracy')
plt.plot(val_binary_acc, label='Val Binary Output Accuracy')
plt.plot(specific_acc, linestyle='--', label='Specific Output Accuracy')
plt.plot(val_specific_acc, label='Val Specific Output Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Predict on the test set
predictions = model.predict(X_test)

# Binary classification confusion matrix
binary_preds = np.round(predictions[0]).astype(int)
binary_cm = confusion_matrix(y_test_binary, binary_preds)
# Specific classification confusion matrix
specific_preds = np.argmax(predictions[2], axis=1)
specific_cm = confusion_matrix(y_test, specific_preds)

# Plot confusion matrices
def plot_confusion_matrix(cm, classes, title='Confusion Matrix', cmap=plt.cm.Blues):
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap=cmap, xticklabels=classes, yticklabels=classes)
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

# Plot Binary classification confusion matrix
plot_confusion_matrix(binary_cm, classes=['Goodware', 'Malicious'], title='Binary Classification Confusion Matrix')


# Plot Specific classification confusion matrix
specific_labels = ['Goodware', 'Critroni', 'CryptLocker', 'CryptoWall', 'KOLLAH', 'Kovter', 'Locker', 'MATSNU', 'PGPCODER', 'Reveton', 'TeslaCrypt', 'Trojan-Ransom']
plot_confusion_matrix(specific_cm, classes=specific_labels, title='Specific Classification Confusion Matrix')


In [None]:

from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
# Plot the model
plot_model(model, to_file='cnt_model.png', show_shapes=True, show_layer_names=True)


import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Display the model
img = mpimg.imread('cnt_model.png')
plt.figure(figsize=(10, 10))
plt.imshow(img)
plt.axis('off')
plt.show()
