In [None]:
'''SVM model with class label is Benign'''
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC  # Importing SVM classifier
import pandas as pd
import numpy as np

# Load Dataset
df = pd.read_csv("/content/Dataset.txt", header=None)
data = df.values
x = data[:, :-1].astype(float)  # Convert features to numeric values
y = data[:, -1].astype(str)

# Define class labels and their corresponding original sample counts
class_labels = {
    "Benign": 438,
    "Reveton": 948,
    "Cerber": 897,
    "teslacrypt": 914,
    "Locky": 944,
    "Yakes": 925
}

# Number of records for testing
num_testing_records = 200

# Iterate over different test ratios
for test_ratio in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Calculate the number of training records
    num_training_records = int((1 - test_ratio) * len(y)) - num_testing_records

    # Split data into train and test sets
    x_train_benign, x_test, y_train_benign, y_test = train_test_split(
        x, y, train_size=num_training_records, test_size=num_testing_records, random_state=42, stratify=y
    )

    # Selecting only 'Benign' class for training
    x_train, y_train = x_train_benign, y_train_benign

    # SVM classifier
    svm_classifier = SVC()
    svm_classifier.fit(x_train, y_train)
    y_pred = svm_classifier.predict(x_test)

    # Calculate TP, TN, FP, FN for each class label
    results = {}
    for label in class_labels:
        # Find indices of samples with current label
        indices = np.where(y_test == label)[0]
        # True positives: predicted as label and actually label
        TP = np.sum(y_pred[indices] == label)
        # False positives: predicted as label but actually not label
        FP = np.sum(y_pred == label) - TP
        # False negatives: not predicted as label but actually label
        FN = len(indices) - TP
        # True negatives: not predicted as label and actually not label
        # Summing up all instances not belonging to current label and not predicted as current label
        TN = np.sum((y_pred != label) & (y_test != label))

        results[label] = {"TP": TP, "TN": TN, "FP": FP, "FN": FN}

    # Number of records of each family used for testing
    family_records_testing = {label: len(np.where(y_test == label)[0]) for label in class_labels}

    # Calculate overall accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate accuracy considering only the "Benign" class
    benign_indices = np.where(y_test == "Benign")[0]
    accuracy_benign = np.sum(y_pred[benign_indices] == "Benign") / len(benign_indices) if len(benign_indices) > 0 else 0

    # Print test ratio and number of records of each family used for testing
    print(f"Test Ratio: {int(test_ratio * 100)}:{int((1 - test_ratio) * 100)}")
    print("Number of Records of Each Family Used for Testing:")
    for label, count in family_records_testing.items():
        print(label + ":", count)

    # Print accuracies and TN, TP, FP, FN for each class label
    print("Overall Accuracy:", accuracy)
    print("Accuracy for 'Benign' class only:", accuracy_benign)
    print("Number of Testing Records:", num_testing_records)
    for label, metrics in results.items():
        print(label + ":")
        print("\tTrue Positives:", metrics["TP"])
        print("\tTrue Negatives:", metrics["TN"])
        print("\tFalse Positives:", metrics["FP"])
        print("\tFalse Negatives:", metrics["FN"])


Test Ratio: 10:90
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.865
Accuracy for 'Benign' class only: 0.6470588235294118
Number of Testing Records: 200
Benign:
	True Positives: 11
	True Negatives: 181
	False Positives: 2
	False Negatives: 6
Reveton:
	True Positives: 32
	True Negatives: 159
	False Positives: 4
	False Negatives: 5
Cerber:
	True Positives: 34
	True Negatives: 163
	False Positives: 1
	False Negatives: 2
teslacrypt:
	True Positives: 33
	True Negatives: 161
	False Positives: 3
	False Negatives: 3
Locky:
	True Positives: 34
	True Negatives: 154
	False Positives: 9
	False Negatives: 3
Yakes:
	True Positives: 29
	True Negatives: 155
	False Positives: 8
	False Negatives: 8
Test Ratio: 20:80
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.9
Accuracy for 'Benign' class only: 0.5294117647058824
Numb

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC  # Importing SVM classifier
import pandas as pd
import numpy as np

# Load Dataset
df = pd.read_csv("/content/Dataset.txt", header=None)
data = df.values
x = data[:, :-1].astype(float)  # Convert features to numeric values
y = data[:, -1].astype(str)

# Define class labels and their corresponding original sample counts
class_labels = {
    "Benign": 438,
    "Reveton": 948,
    "Cerber": 897,
    "teslacrypt": 914,
    "Locky": 944,
    "Yakes": 925
}

# Number of records for testing
num_testing_records = 200

# Iterate over different test ratios
for test_ratio in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Calculate the number of training records
    num_training_records = int((1 - test_ratio) * len(y)) - num_testing_records

    # Split data into train and test sets
    x_train_cerber, x_test, y_train_cerber, y_test = train_test_split(
        x, y, train_size=num_training_records, test_size=num_testing_records, random_state=42, stratify=y
    )

    # Selecting only 'Cerber' class for training
    x_train, y_train = x_train_cerber, y_train_cerber

    # SVM classifier
    svm_classifier = SVC()
    svm_classifier.fit(x_train, y_train)
    y_pred = svm_classifier.predict(x_test)

    # Calculate TP, TN, FP, FN for each class label
    results = {}
    for label in class_labels:
        # Find indices of samples with current label
        indices = np.where(y_test == label)[0]
        # True positives: predicted as label and actually label
        TP = np.sum(y_pred[indices] == label)
        # False positives: predicted as label but actually not label
        FP = np.sum(y_pred == label) - TP
        # False negatives: not predicted as label but actually label
        FN = len(indices) - TP
        # True negatives: not predicted as label and actually not label
        # Summing up all instances not belonging to current label and not predicted as current label
        TN = np.sum((y_pred != label) & (y_test != label))

        results[label] = {"TP": TP, "TN": TN, "FP": FP, "FN": FN}

    # Number of records of each family used for testing
    family_records_testing = {label: len(np.where(y_test == label)[0]) for label in class_labels}

    # Calculate overall accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate accuracy considering only the "Cerber" class
    cerber_indices = np.where(y_test == "Cerber")[0]
    accuracy_cerber = np.sum(y_pred[cerber_indices] == "Cerber") / len(cerber_indices) if len(cerber_indices) > 0 else 0

    # Print test ratio and number of records of each family used for testing
    print(f"Test Ratio: {int(test_ratio * 100)}:{int((1 - test_ratio) * 100)}")
    print("Number of Records of Each Family Used for Testing:")
    for label, count in family_records_testing.items():
        print(label + ":", count)

    # Print accuracies and TN, TP, FP, FN for each class label
    print("Overall Accuracy:", accuracy)
    print("Accuracy for 'Cerber' class only:", accuracy_cerber)
    print("Number of Testing Records:", num_testing_records)
    for label, metrics in results.items():
        print(label + ":")
        print("\tTrue Positives:", metrics["TP"])
        print("\tTrue Negatives:", metrics["TN"])
        print("\tFalse Positives:", metrics["FP"])
        print("\tFalse Negatives:", metrics["FN"])


Test Ratio: 10:90
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.865
Accuracy for 'Cerber' class only: 0.9444444444444444
Number of Testing Records: 200
Benign:
	True Positives: 11
	True Negatives: 181
	False Positives: 2
	False Negatives: 6
Reveton:
	True Positives: 32
	True Negatives: 159
	False Positives: 4
	False Negatives: 5
Cerber:
	True Positives: 34
	True Negatives: 163
	False Positives: 1
	False Negatives: 2
teslacrypt:
	True Positives: 33
	True Negatives: 161
	False Positives: 3
	False Negatives: 3
Locky:
	True Positives: 34
	True Negatives: 154
	False Positives: 9
	False Negatives: 3
Yakes:
	True Positives: 29
	True Negatives: 155
	False Positives: 8
	False Negatives: 8
Test Ratio: 20:80
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.9
Accuracy for 'Cerber' class only: 1.0
Number of Testing R

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC  # Importing SVM classifier
import pandas as pd
import numpy as np

# Load Dataset
df = pd.read_csv("/content/Dataset.txt", header=None)
data = df.values
x = data[:, :-1].astype(float)  # Convert features to numeric values
y = data[:, -1].astype(str)

# Define class labels and their corresponding original sample counts
class_labels = {
    "Benign": 438,
    "Reveton": 948,
    "Cerber": 897,
    "teslacrypt": 914,
    "Locky": 944,
    "Yakes": 925
}

# Number of records for testing
num_testing_records = 200

# Iterate over different test ratios
for test_ratio in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Calculate the number of training records
    num_training_records = int((1 - test_ratio) * len(y)) - num_testing_records

    # Split data into train and test sets
    x_train_locky, x_test, y_train_locky, y_test = train_test_split(
        x, y, train_size=num_training_records, test_size=num_testing_records, random_state=42, stratify=y
    )

    # Selecting only 'Locky' class for training
    x_train, y_train = x_train_locky, y_train_locky

    # SVM classifier
    svm_classifier = SVC()
    svm_classifier.fit(x_train, y_train)
    y_pred = svm_classifier.predict(x_test)

    # Calculate TP, TN, FP, FN for each class label
    results = {}
    for label in class_labels:
        # Find indices of samples with current label
        indices = np.where(y_test == label)[0]
        # True positives: predicted as label and actually label
        TP = np.sum(y_pred[indices] == label)
        # False positives: predicted as label but actually not label
        FP = np.sum(y_pred == label) - TP
        # False negatives: not predicted as label but actually label
        FN = len(indices) - TP
        # True negatives: not predicted as label and actually not label
        # Summing up all instances not belonging to current label and not predicted as current label
        TN = np.sum((y_pred != label) & (y_test != label))

        results[label] = {"TP": TP, "TN": TN, "FP": FP, "FN": FN}

    # Number of records of each family used for testing
    family_records_testing = {label: len(np.where(y_test == label)[0]) for label in class_labels}

    # Calculate overall accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate accuracy considering only the "Locky" class
    locky_indices = np.where(y_test == "Locky")[0]
    accuracy_locky = np.sum(y_pred[locky_indices] == "Locky") / len(locky_indices) if len(locky_indices) > 0 else 0

    # Print test ratio and number of records of each family used for testing
    print(f"Test Ratio: {int(test_ratio * 100)}:{int((1 - test_ratio) * 100)}")
    print("Number of Records of Each Family Used for Testing:")
    for label, count in family_records_testing.items():
        print(label + ":", count)

    # Print accuracies and TN, TP, FP, FN for each class label
    print("Overall Accuracy:", accuracy)
    print("Accuracy for 'Locky' class only:", accuracy_locky)
    print("Number of Testing Records:", num_testing_records)
    for label, metrics in results.items():
        print(label + ":")
        print("\tTrue Positives:", metrics["TP"])
        print("\tTrue Negatives:", metrics["TN"])
        print("\tFalse Positives:", metrics["FP"])
        print("\tFalse Negatives:", metrics["FN"])


Test Ratio: 10:90
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.865
Accuracy for 'Locky' class only: 0.918918918918919
Number of Testing Records: 200
Benign:
	True Positives: 11
	True Negatives: 181
	False Positives: 2
	False Negatives: 6
Reveton:
	True Positives: 32
	True Negatives: 159
	False Positives: 4
	False Negatives: 5
Cerber:
	True Positives: 34
	True Negatives: 163
	False Positives: 1
	False Negatives: 2
teslacrypt:
	True Positives: 33
	True Negatives: 161
	False Positives: 3
	False Negatives: 3
Locky:
	True Positives: 34
	True Negatives: 154
	False Positives: 9
	False Negatives: 3
Yakes:
	True Positives: 29
	True Negatives: 155
	False Positives: 8
	False Negatives: 8
Test Ratio: 20:80
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.9
Accuracy for 'Locky' class only: 0.972972972972973
Number o

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC  # Importing SVM classifier
import pandas as pd
import numpy as np

# Load Dataset
df = pd.read_csv("/content/Dataset.txt", header=None)
data = df.values
x = data[:, :-1].astype(float)  # Convert features to numeric values
y = data[:, -1].astype(str)

# Define class labels and their corresponding original sample counts
class_labels = {
    "Benign": 438,
    "Reveton": 948,
    "Cerber": 897,
    "teslacrypt": 914,
    "Locky": 944,
    "Yakes": 925
}

# Number of records for testing
num_testing_records = 200

# Iterate over different test ratios
for test_ratio in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Calculate the number of training records
    num_training_records = int((1 - test_ratio) * len(y)) - num_testing_records

    # Split data into train and test sets
    x_train_reveton, x_test, y_train_reveton, y_test = train_test_split(
        x, y, train_size=num_training_records, test_size=num_testing_records, random_state=42, stratify=y
    )

    # Selecting only 'Reveton' class for training
    x_train, y_train = x_train_reveton, y_train_reveton

    # SVM classifier
    svm_classifier = SVC()
    svm_classifier.fit(x_train, y_train)
    y_pred = svm_classifier.predict(x_test)

    # Calculate TP, TN, FP, FN for each class label
    results = {}
    for label in class_labels:
        # Find indices of samples with current label
        indices = np.where(y_test == label)[0]
        # True positives: predicted as label and actually label
        TP = np.sum(y_pred[indices] == label)
        # False positives: predicted as label but actually not label
        FP = np.sum(y_pred == label) - TP
        # False negatives: not predicted as label but actually label
        FN = len(indices) - TP
        # True negatives: not predicted as label and actually not label
        # Summing up all instances not belonging to current label and not predicted as current label
        TN = np.sum((y_pred != label) & (y_test != label))

        results[label] = {"TP": TP, "TN": TN, "FP": FP, "FN": FN}

    # Number of records of each family used for testing
    family_records_testing = {label: len(np.where(y_test == label)[0]) for label in class_labels}

    # Calculate overall accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate accuracy considering only the "Reveton" class
    reveton_indices = np.where(y_test == "Reveton")[0]
    accuracy_reveton = np.sum(y_pred[reveton_indices] == "Reveton") / len(reveton_indices) if len(reveton_indices) > 0 else 0

    # Print test ratio and number of records of each family used for testing
    print(f"Test Ratio: {int(test_ratio * 100)}:{int((1 - test_ratio) * 100)}")
    print("Number of Records of Each Family Used for Testing:")
    for label, count in family_records_testing.items():
        print(label + ":", count)

    # Print accuracies and TN, TP, FP, FN for each class label
    print("Overall Accuracy:", accuracy)
    print("Accuracy for 'Reveton' class only:", accuracy_reveton)
    print("Number of Testing Records:", num_testing_records)
    for label, metrics in results.items():
        print(label + ":")
        print("\tTrue Positives:", metrics["TP"])
        print("\tTrue Negatives:", metrics["TN"])
        print("\tFalse Positives:", metrics["FP"])
        print("\tFalse Negatives:", metrics["FN"])


Test Ratio: 10:90
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.865
Accuracy for 'Reveton' class only: 0.8648648648648649
Number of Testing Records: 200
Benign:
	True Positives: 11
	True Negatives: 181
	False Positives: 2
	False Negatives: 6
Reveton:
	True Positives: 32
	True Negatives: 159
	False Positives: 4
	False Negatives: 5
Cerber:
	True Positives: 34
	True Negatives: 163
	False Positives: 1
	False Negatives: 2
teslacrypt:
	True Positives: 33
	True Negatives: 161
	False Positives: 3
	False Negatives: 3
Locky:
	True Positives: 34
	True Negatives: 154
	False Positives: 9
	False Negatives: 3
Yakes:
	True Positives: 29
	True Negatives: 155
	False Positives: 8
	False Negatives: 8
Test Ratio: 20:80
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.9
Accuracy for 'Reveton' class only: 0.918918918918919
Num

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC  # Importing SVM classifier
import pandas as pd
import numpy as np

# Load Dataset
df = pd.read_csv("/content/Dataset.txt", header=None)
data = df.values
x = data[:, :-1].astype(float)  # Convert features to numeric values
y = data[:, -1].astype(str)

# Define class labels and their corresponding original sample counts
class_labels = {
    "Benign": 438,
    "Reveton": 948,
    "Cerber": 897,
    "teslacrypt": 914,
    "Locky": 944,
    "Yakes": 925
}

# Number of records for testing
num_testing_records = 200

# Iterate over different test ratios
for test_ratio in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Calculate the number of training records
    num_training_records = int((1 - test_ratio) * len(y)) - num_testing_records

    # Split data into train and test sets
    x_train_teslacrypt, x_test, y_train_teslacrypt, y_test = train_test_split(
        x, y, train_size=num_training_records, test_size=num_testing_records, random_state=42, stratify=y
    )

    # Selecting only 'teslacrypt' class for training
    x_train, y_train = x_train_teslacrypt, y_train_teslacrypt

    # SVM classifier
    svm_classifier = SVC()
    svm_classifier.fit(x_train, y_train)
    y_pred = svm_classifier.predict(x_test)

    # Calculate TP, TN, FP, FN for each class label
    results = {}
    for label in class_labels:
        # Find indices of samples with current label
        indices = np.where(y_test == label)[0]
        # True positives: predicted as label and actually label
        TP = np.sum(y_pred[indices] == label)
        # False positives: predicted as label but actually not label
        FP = np.sum(y_pred == label) - TP
        # False negatives: not predicted as label but actually label
        FN = len(indices) - TP
        # True negatives: not predicted as label and actually not label
        # Summing up all instances not belonging to current label and not predicted as current label
        TN = np.sum((y_pred != label) & (y_test != label))

        results[label] = {"TP": TP, "TN": TN, "FP": FP, "FN": FN}

    # Number of records of each family used for testing
    family_records_testing = {label: len(np.where(y_test == label)[0]) for label in class_labels}

    # Calculate overall accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate accuracy considering only the "teslacrypt" class
    teslacrypt_indices = np.where(y_test == "teslacrypt")[0]
    accuracy_teslacrypt = np.sum(y_pred[teslacrypt_indices] == "teslacrypt") / len(teslacrypt_indices) if len(teslacrypt_indices) > 0 else 0

    # Print test ratio and number of records of each family used for testing
    print(f"Test Ratio: {int(test_ratio * 100)}:{int((1 - test_ratio) * 100)}")
    print("Number of Records of Each Family Used for Testing:")
    for label, count in family_records_testing.items():
        print(label + ":", count)

    # Print accuracies and TN, TP, FP, FN for each class label
    print("Overall Accuracy:", accuracy)
    print("Accuracy for 'teslacrypt' class only:", accuracy_teslacrypt)
    print("Number of Testing Records:", num_testing_records)
    for label, metrics in results.items():
        print(label + ":")
        print("\tTrue Positives:", metrics["TP"])
        print("\tTrue Negatives:", metrics["TN"])
        print("\tFalse Positives:", metrics["FP"])
        print("\tFalse Negatives:", metrics["FN"])


Test Ratio: 10:90
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.865
Accuracy for 'teslacrypt' class only: 0.9166666666666666
Number of Testing Records: 200
Benign:
	True Positives: 11
	True Negatives: 181
	False Positives: 2
	False Negatives: 6
Reveton:
	True Positives: 32
	True Negatives: 159
	False Positives: 4
	False Negatives: 5
Cerber:
	True Positives: 34
	True Negatives: 163
	False Positives: 1
	False Negatives: 2
teslacrypt:
	True Positives: 33
	True Negatives: 161
	False Positives: 3
	False Negatives: 3
Locky:
	True Positives: 34
	True Negatives: 154
	False Positives: 9
	False Negatives: 3
Yakes:
	True Positives: 29
	True Negatives: 155
	False Positives: 8
	False Negatives: 8
Test Ratio: 20:80
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.9
Accuracy for 'teslacrypt' class only: 0.9166666666666

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC  # Importing SVM classifier
import pandas as pd
import numpy as np

# Load Dataset
df = pd.read_csv("/content/Dataset.txt", header=None)
data = df.values
x = data[:, :-1].astype(float)  # Convert features to numeric values
y = data[:, -1].astype(str)

# Define class labels and their corresponding original sample counts
class_labels = {
    "Benign": 438,
    "Reveton": 948,
    "Cerber": 897,
    "teslacrypt": 914,
    "Locky": 944,
    "Yakes": 925
}

# Number of records for testing
num_testing_records = 200

# Iterate over different test ratios
for test_ratio in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Calculate the number of training records
    num_training_records = int((1 - test_ratio) * len(y)) - num_testing_records

    # Split data into train and test sets
    x_train_yakes, x_test, y_train_yakes, y_test = train_test_split(
        x, y, train_size=num_training_records, test_size=num_testing_records, random_state=42, stratify=y
    )

    # Selecting only 'Yakes' class for training
    x_train, y_train = x_train_yakes, y_train_yakes

    # SVM classifier
    svm_classifier = SVC()
    svm_classifier.fit(x_train, y_train)
    y_pred = svm_classifier.predict(x_test)

    # Calculate TP, TN, FP, FN for each class label
    results = {}
    for label in class_labels:
        # Find indices of samples with current label
        indices = np.where(y_test == label)[0]
        # True positives: predicted as label and actually label
        TP = np.sum(y_pred[indices] == label)
        # False positives: predicted as label but actually not label
        FP = np.sum(y_pred == label) - TP
        # False negatives: not predicted as label but actually label
        FN = len(indices) - TP
        # True negatives: not predicted as label and actually not label
        # Summing up all instances not belonging to current label and not predicted as current label
        TN = np.sum((y_pred != label) & (y_test != label))

        results[label] = {"TP": TP, "TN": TN, "FP": FP, "FN": FN}

    # Number of records of each family used for testing
    family_records_testing = {label: len(np.where(y_test == label)[0]) for label in class_labels}

    # Calculate overall accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate accuracy considering only the "Yakes" class
    yakes_indices = np.where(y_test == "Yakes")[0]
    accuracy_yakes = np.sum(y_pred[yakes_indices] == "Yakes") / len(yakes_indices) if len(yakes_indices) > 0 else 0

    # Print test ratio and number of records of each family used for testing
    print(f"Test Ratio: {int(test_ratio * 100)}:{int((1 - test_ratio) * 100)}")
    print("Number of Records of Each Family Used for Testing:")
    for label, count in family_records_testing.items():
        print(label + ":", count)

    # Print accuracies and TN, TP, FP, FN for each class label
    print("Overall Accuracy:", accuracy)
    print("Accuracy for 'Yakes' class only:", accuracy_yakes)
    print("Number of Testing Records:", num_testing_records)
    for label, metrics in results.items():
        print(label + ":")
        print("\tTrue Positives:", metrics["TP"])
        print("\tTrue Negatives:", metrics["TN"])
        print("\tFalse Positives:", metrics["FP"])
        print("\tFalse Negatives:", metrics["FN"])


Test Ratio: 10:90
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.865
Accuracy for 'Yakes' class only: 0.7837837837837838
Number of Testing Records: 200
Benign:
	True Positives: 11
	True Negatives: 181
	False Positives: 2
	False Negatives: 6
Reveton:
	True Positives: 32
	True Negatives: 159
	False Positives: 4
	False Negatives: 5
Cerber:
	True Positives: 34
	True Negatives: 163
	False Positives: 1
	False Negatives: 2
teslacrypt:
	True Positives: 33
	True Negatives: 161
	False Positives: 3
	False Negatives: 3
Locky:
	True Positives: 34
	True Negatives: 154
	False Positives: 9
	False Negatives: 3
Yakes:
	True Positives: 29
	True Negatives: 155
	False Positives: 8
	False Negatives: 8
Test Ratio: 20:80
Number of Records of Each Family Used for Testing:
Benign: 17
Reveton: 37
Cerber: 36
teslacrypt: 36
Locky: 37
Yakes: 37
Overall Accuracy: 0.9
Accuracy for 'Yakes' class only: 0.8648648648648649
Number