In [3]:
pip install GPUtil scikit-learn torch torchvision

Collecting torch
  Downloading torch-2.6.0-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading torchvision-0.21.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.1 kB)
Collecting filelock (from torch)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting networkx (from torch)
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 

In [28]:
import os
import time
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import psutil
import GPUtil
import numpy as np
from torch.utils.data import DataLoader, random_split, Dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

In [39]:
# Define dataset class (assuming images are already extracted into folders)
class MalwareDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.image_paths = []
        self.labels = []
        
        for label, class_dir in enumerate(self.classes):
            class_path = os.path.join(root_dir, class_dir)
            for img_file in os.listdir(class_path):
                self.image_paths.append(os.path.join(class_path, img_file))
                self.labels.append(label)
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
    
        if not isinstance(image, torch.Tensor):  # Ensure it's not already a tensor
            image = transforms.ToTensor()(image)
    
        if self.transform:
            image = self.transform(image)
    
        return image, label

In [40]:
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Ensure 1 channel
    transforms.Resize((224, 224)),  # Resize to match ResNet/DenseNet input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Adjust for single-channel images
])

In [41]:
# Load dataset
dataset_path = "./malimg_paper_dataset_imgs/"
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Split into train/validation sets
from torch.utils.data import DataLoader, random_split

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders
batch_size = 32  # Adjust as needed
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [42]:
# Define function to track resource usage
def get_system_metrics():
    return {
        "cpu_usage": psutil.cpu_percent(),
        "memory_usage": psutil.virtual_memory().percent,
        "disk_usage": psutil.disk_usage('/').percent,
        "gpu_usage": GPUtil.getGPUs()[0].load if torch.cuda.is_available() else None
    }

In [44]:
from tqdm import tqdm  # Import tqdm for progress bar

def train_model(model, train_loader, val_loader, epochs=10, lr=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        start_time = time.time()
        
        # Wrap DataLoader with tqdm for progress bar
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False)

        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
            # Update tqdm progress bar description with current loss
            progress_bar.set_postfix(loss=loss.item())

        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        accuracy = accuracy_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds, average='macro')
        recall = recall_score(all_labels, all_preds, average='macro')
        system_metrics = get_system_metrics()
        
        print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss/len(train_loader):.4f}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, Time: {time.time() - start_time:.2f}s")
        print(f"System Metrics: {system_metrics}")

# For ResNet
resnet = models.resnet18(pretrained=True)
resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
resnet.fc = nn.Linear(resnet.fc.in_features, len(dataset.classes))

# For DenseNet
densenet = models.densenet121(pretrained=True)
densenet.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
densenet.classifier = nn.Linear(densenet.classifier.in_features, len(dataset.classes))

print("Training ResNet:")
train_model(resnet, train_loader, val_loader, epochs=10)

print("Training DenseNet:")
train_model(densenet, train_loader, val_loader, epochs=10)



Training ResNet:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/10 - Loss: 0.2325, Accuracy: 0.4829, Precision: 0.5577, Recall: 0.4806, Time: 35.68s
System Metrics: {'cpu_usage': 42.7, 'memory_usage': 1.6, 'disk_usage': 40.3, 'gpu_usage': 0.04}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 2/10 - Loss: 0.1171, Accuracy: 0.9652, Precision: 0.8879, Recall: 0.8850, Time: 36.54s
System Metrics: {'cpu_usage': 54.4, 'memory_usage': 1.6, 'disk_usage': 40.3, 'gpu_usage': 0.04}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 3/10 - Loss: 0.1055, Accuracy: 0.9722, Precision: 0.8831, Recall: 0.9040, Time: 35.79s
System Metrics: {'cpu_usage': 53.8, 'memory_usage': 1.6, 'disk_usage': 40.3, 'gpu_usage': 0.05}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 4/10 - Loss: 0.0806, Accuracy: 0.9818, Precision: 0.9345, Recall: 0.9377, Time: 35.61s
System Metrics: {'cpu_usage': 53.6, 'memory_usage': 1.6, 'disk_usage': 40.3, 'gpu_usage': 0.02}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 5/10 - Loss: 0.0783, Accuracy: 0.9791, Precision: 0.9272, Recall: 0.9309, Time: 35.45s
System Metrics: {'cpu_usage': 53.5, 'memory_usage': 1.6, 'disk_usage': 40.3, 'gpu_usage': 0.05}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 6/10 - Loss: 0.0728, Accuracy: 0.9754, Precision: 0.9147, Recall: 0.9188, Time: 36.22s
System Metrics: {'cpu_usage': 53.5, 'memory_usage': 1.6, 'disk_usage': 40.3, 'gpu_usage': 0.02}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 7/10 - Loss: 0.0664, Accuracy: 0.9791, Precision: 0.9274, Recall: 0.9362, Time: 35.57s
System Metrics: {'cpu_usage': 53.2, 'memory_usage': 1.6, 'disk_usage': 40.3, 'gpu_usage': 0.04}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 8/10 - Loss: 0.0592, Accuracy: 0.9797, Precision: 0.9279, Recall: 0.9348, Time: 35.86s
System Metrics: {'cpu_usage': 53.4, 'memory_usage': 1.7, 'disk_usage': 40.3, 'gpu_usage': 0.04}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 9/10 - Loss: 0.0557, Accuracy: 0.9695, Precision: 0.9107, Recall: 0.9101, Time: 35.36s
System Metrics: {'cpu_usage': 53.3, 'memory_usage': 1.7, 'disk_usage': 40.3, 'gpu_usage': 0.03}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 10/10 - Loss: 0.0696, Accuracy: 0.9513, Precision: 0.9151, Recall: 0.8696, Time: 35.59s
System Metrics: {'cpu_usage': 53.6, 'memory_usage': 1.7, 'disk_usage': 40.3, 'gpu_usage': 0.05}
Training DenseNet:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/10 - Loss: 0.2415, Accuracy: 0.9036, Precision: 0.8995, Recall: 0.8058, Time: 51.21s
System Metrics: {'cpu_usage': 40.8, 'memory_usage': 1.7, 'disk_usage': 40.3, 'gpu_usage': 0.15}


                                                                                                                                                                                       

KeyboardInterrupt: 