In [1]:
import os
import numpy as np
from PIL import Image


In [2]:
def binary_file_to_image(filepath, size=(64, 64)):
    with open(filepath, 'rb') as fname:
        byte_arr = np.frombuffer(fname.read(), dtype=np.uint8)
    
    # Flatten or pad the array to fit the target image size
    desired_size = size[0] * size[1]
    if len(byte_arr) > desired_size:
        byte_arr = byte_arr[:desired_size]
    else:
        byte_arr = np.pad(byte_arr, (0, desired_size - len(byte_arr)), 'constant', constant_values=0)

    image = byte_arr.reshape(size)
    return image

In [6]:
def load_dataset(folder_path, size=(64, 64)):
    X, y = [], []
    for label, subfolder in enumerate(['ben', 'mal']):
        class_folder = os.path.join(folder_path, subfolder)
        for file_name in os.listdir(class_folder):
            file_path = os.path.join(class_folder, file_name)
            try:
                img = binary_file_to_image(file_path, size)
                X.append(img)
                y.append(label)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    return np.array(X), np.array(y)

In [8]:
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models

# Load datasets with 10
train_X, train_y = load_dataset('dataset/train/')
val_X, val_y = load_dataset('dataset/valid/')
test_X, test_y = load_dataset('dataset/test/')

# Load datasets with 500
#train_X, train_y = load_dataset('dataset1/train/')
#val_X, val_y = load_dataset('dataset1/valid/')
#test_X, test_y = load_dataset('dataset1/test/')

# Normalize and reshape for CNN
train_X = train_X / 255.0
val_X = val_X / 255.0
test_X = test_X / 255.0

train_X = train_X[..., np.newaxis]
val_X = val_X[..., np.newaxis]
test_X = test_X[..., np.newaxis]

# CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=train_X.shape[1:]),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(train_X, train_y, validation_data=(val_X, val_y), epochs=10, batch_size=32)

# Evaluate
test_loss, test_acc = model.evaluate(test_X, test_y)
print(f'Test accuracy: {test_acc:.4f}')


Error processing dataset/valid/ben\0f55c132ec68d90ceb0fa6a7016e598d7223186a8b152c9f694f64954e1ace28: [Errno 22] Invalid argument: 'dataset/valid/ben\\0f55c132ec68d90ceb0fa6a7016e598d7223186a8b152c9f694f64954e1ace28'
Error processing dataset/valid/ben\e81adf93f61a5484671cb5fa8a58fcb10eb2b7fbcb0bba98ccfd7a0c8c29417f: [Errno 22] Invalid argument: 'dataset/valid/ben\\e81adf93f61a5484671cb5fa8a58fcb10eb2b7fbcb0bba98ccfd7a0c8c29417f'
Error processing dataset/test/mal\75f17f504ef17a7cac90e4e0f314a1a1a4835603a51d8a95d06f55bab7ba3c0d: [Errno 22] Invalid argument: 'dataset/test/mal\\75f17f504ef17a7cac90e4e0f314a1a1a4835603a51d8a95d06f55bab7ba3c0d'


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.8000 - loss: 0.6518 - val_accuracy: 0.7273 - val_loss: 0.9059
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 654ms/step - accuracy: 1.0000 - loss: 0.2255 - val_accuracy: 0.7273 - val_loss: 1.8077
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 260ms/step - accuracy: 1.0000 - loss: 0.1063 - val_accuracy: 0.7273 - val_loss: 2.9796
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 203ms/step - accuracy: 1.0000 - loss: 0.0439 - val_accuracy: 0.7273 - val_loss: 4.3896
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step - accuracy: 1.0000 - loss: 0.0139 - val_accuracy: 0.7273 - val_loss: 5.9913
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 1.0000 - loss: 0.0036 - val_accuracy: 0.7273 - val_loss: 7.7096
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━