# Install dependencies if not on system already (pip freeze)

In [1]:
pip install GPUtil scikit-learn torch torchvision tensorboard

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import time
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import psutil
import GPUtil
import numpy as np
from torch.utils.data import DataLoader, random_split, Dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

%load_ext tensorboard

In [3]:
# Define dataset class (assuming images are already extracted into folders)
class MalwareDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.image_paths = []
        self.labels = []
        
        for label, class_dir in enumerate(self.classes):
            class_path = os.path.join(root_dir, class_dir)
            for img_file in os.listdir(class_path):
                self.image_paths.append(os.path.join(class_path, img_file))
                self.labels.append(label)
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
    
        if not isinstance(image, torch.Tensor):  # Ensure it's not already a tensor
            image = transforms.ToTensor()(image)
    
        if self.transform:
            image = self.transform(image)
    
        return image, label

In [4]:
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Ensure 1 channel
    transforms.Resize((224, 224)),  # Resize to match ResNet/DenseNet input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Adjust for single-channel images
])

In [5]:
# Load dataset
dataset_path = "./malimg_paper_dataset_imgs/"
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Split into train/validation sets
from torch.utils.data import DataLoader, random_split

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders
batch_size = 32  # Adjust as needed
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [6]:
# Define function to track resource usage
def get_system_metrics():
    return {
        "cpu_usage": psutil.cpu_percent(),
        "memory_usage": psutil.virtual_memory().percent,
        "disk_usage": psutil.disk_usage('/').percent,
        "gpu_usage": GPUtil.getGPUs()[0].load if torch.cuda.is_available() else None
    }

In [None]:
import torch
import time
import psutil
import GPUtil
from torch import nn, optim
from torch.utils.tensorboard import SummaryWriter
from torchvision.models import resnet18, ResNet18_Weights
from torchvision.models import densenet121, DenseNet121_Weights
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def get_system_metrics():
    """Collect CPU, memory, and GPU metrics."""
    cpu_usage = psutil.cpu_percent()
    memory_usage = psutil.virtual_memory().percent

    if torch.cuda.is_available():
        gpu_memory = torch.cuda.memory_allocated() / (1024 ** 3)  # GB
        gpus = GPUtil.getGPUs()
        if gpus:
            gpu = gpus[0]  # First GPU
            gpu_utilization = gpu.memoryUtil * 100  # Memory utilization %
            gpu_load = gpu.load * 100  # Compute load %
        else:
            gpu_utilization = gpu_load = 0.0
        return cpu_usage, memory_usage, gpu_utilization, gpu_memory, gpu_load
    else:
        return cpu_usage, memory_usage, 0.0, 0.0, 0.0

def train_model(model, train_loader, val_loader, epochs=10, lr=0.001, log_dir='runs/experiment'):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    writer = SummaryWriter(log_dir=log_dir)
    global_step = 0

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        epoch_start_time = time.time()

        # Training loop with progress bar
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False)
        for batch_idx, (images, labels) in enumerate(progress_bar):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            global_step += 1

            # Log training loss every batch
            writer.add_scalar("Loss/Train", loss.item(), global_step)

            # Log system metrics every 10 batches (adjustable)
            if batch_idx % 10 == 0:
                cpu, mem, gpu_util, gpu_mem, gpu_load = get_system_metrics()
                writer.add_scalar("System/CPU_Usage", cpu, global_step)
                writer.add_scalar("System/Memory_Usage", mem, global_step)
                if torch.cuda.is_available():
                    writer.add_scalar("GPU/Utilization", gpu_util, global_step)
                    writer.add_scalar("GPU/Memory_GB", gpu_mem, global_step)
                    writer.add_scalar("GPU/Load", gpu_load, global_step)

            progress_bar.set_postfix(loss=loss.item())

        # Validation phase
        model.eval()
        val_loss = 0.0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # Collect predictions for metrics
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Compute metrics
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = accuracy_score(all_labels, all_preds)
        val_precision = precision_score(all_labels, all_preds, average='macro')
        val_recall = recall_score(all_labels, all_preds, average='macro')
        val_f1 = f1_score(all_labels, all_preds, average='macro')

        # Log epoch-level metrics
        writer.add_scalar("Loss/Validation", avg_val_loss, epoch)
        writer.add_scalar("Metrics/Accuracy", val_accuracy, epoch)
        writer.add_scalar("Metrics/Precision", val_precision, epoch)
        writer.add_scalar("Metrics/Recall", val_recall, epoch)
        writer.add_scalar("Metrics/F1", val_f1, epoch)

        # Print epoch summary
        epoch_time = time.time() - epoch_start_time
        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Loss: {avg_train_loss:.4f} | "
              f"Val Loss: {avg_val_loss:.4f} | "
              f"Accuracy: {val_accuracy:.4f} | "
              f"Time: {epoch_time:.2f}s")

    writer.close()

# Example usage with ResNet/DenseNet (modify dataset.classes as needed)
if __name__ == "__main__":
    # Example dataset setup (replace with your actual dataset)
    # dataset = YourDataset(...)
    # train_loader = DataLoader(...)
    # val_loader = DataLoader(...)

    # ResNet18 (grayscale adaptation)
    resnet = resnet18(weights=ResNet18_Weights.DEFAULT)
    resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
    resnet.fc = nn.Linear(resnet.fc.in_features, len(dataset.classes))  # Replace dataset.classes

    # DenseNet121 (grayscale adaptation)
    densenet = densenet121(weights=DenseNet121_Weights.DEFAULT)
    densenet.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
    densenet.classifier = nn.Linear(densenet.classifier.in_features, len(dataset.classes))  # Replace dataset.classes

    print("Training ResNet18...")
    train_model(resnet, train_loader, val_loader, epochs=10, log_dir='runs/resnet')

    print("Training DenseNet121...")
    train_model(densenet, train_loader, val_loader, epochs=10, log_dir='runs/densenet')

Training ResNet18...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1/10 | Train Loss: 0.2294 | Val Loss: 0.3740 | Accuracy: 0.8925 | Time: 39.19s


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 2/10 | Train Loss: 0.1182 | Val Loss: 0.1325 | Accuracy: 0.9599 | Time: 40.61s


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 3/10 | Train Loss: 0.1014 | Val Loss: 0.0715 | Accuracy: 0.9791 | Time: 48.16s


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 4/10 | Train Loss: 0.1118 | Val Loss: 0.1091 | Accuracy: 0.9658 | Time: 46.06s


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 5/10 | Train Loss: 0.0749 | Val Loss: 0.0679 | Accuracy: 0.9781 | Time: 38.41s


Epoch 6/10:  12%|████████████▊                                                                                              | 28/234 [00:03<00:29,  7.08it/s, loss=0.0691]

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs