## Helios Preliminaries Benchmark
CIFAR100 was chosen as a Benchmark. The dataset is classified with a ResNet34 implementation. Time, Power draw of GPU, and Accuracy are measured.

In [1]:
import os
import csv
import torch
import torchvision
from torchvision.datasets import CIFAR100
from torch.utils.data import random_split
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import subprocess
import time
import threading
import re
import datetime
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [3]:
transform1 = transforms.Compose([transforms.RandomResizedCrop(size=32, scale=(0.08, 1.0), ratio=(0.75, 1.33), interpolation=2), ToTensor()])

dataset = CIFAR100(root='data/', download=True, transform=ToTensor())
datasetplus = CIFAR100(root='data/', download=True, transform=transform1) 
# This "datasetplus" is to increase the dataset with more images

test_dataset = CIFAR100(root='data/', train=False, transform=ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [4]:
dataset_size = len(dataset) + len(datasetplus)
test_dataset_size = len(test_dataset)
dataset_size, test_dataset_size, dataset[0][0].shape

(100000, 10000, torch.Size([3, 32, 32]))

classes = dataset.classes
num_classes = len(dataset.classes)
print(num_classes)
print(classes)

In [6]:
random_seed = 42
torch.manual_seed(random_seed);

val_size = 10000
train_size = dataset_size - val_size

train_ds, val_ds = random_split((dataset+datasetplus), [train_size, val_size])
#test_dl = DataLoader(test_ds, batch_size*2, num_workers=4, pin_memory=True) 
len(train_ds), len(val_ds)

(90000, 10000)

In [7]:
batch_size = 64

train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size*2, num_workers=4, pin_memory=True)

In [8]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def test_step(self, batch):  # Add test step
        images, labels = batch 
        out = self(images)
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)
        return {'test_loss': loss.detach(), 'test_acc': acc}

    def test_epoch_end(self, outputs):  # Add test epoch end
        batch_losses = [x['test_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['test_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {'test_loss': epoch_loss.item(), 'test_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))

In [9]:
# Test to get the initial powerdraw of the GPU
command= "nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits"
result= subprocess.run(command,capture_output=True, text=True, shell=True)

In [11]:
class CIFAR100Model(ImageClassificationBase):
        def __init__(self):
                super().__init__()
                self.network = torchvision.models.resnet34()
                num_ftrs = self.network.fc.in_features
                self.network.fc = nn.Linear(num_ftrs, 100)
        
        def forward(self, xb):
                return self.network(xb)
            
model = CIFAR100Model()
model

CIFAR100Model(
  (network): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [12]:
for images, labels in train_dl:
    print('images.shape:', images.shape)
    out = model(images)
    print('out.shape:', out.shape)
    print('out[0]:', out[0])
    break

images.shape: torch.Size([64, 3, 32, 32])
out.shape: torch.Size([64, 100])
out[0]: tensor([ 1.8112, -0.2146, -0.8967, -0.6784, -0.4871,  0.0668,  0.9443, -0.4087,
        -0.5206, -1.0655, -0.3351, -0.4734,  1.2425, -0.3854, -0.2971, -1.1744,
         0.2245, -0.8259,  0.9947, -1.0448,  0.3576,  0.7000,  0.8041,  0.6753,
        -0.2387,  1.9195,  0.3182,  0.7623,  0.4288, -1.1722, -0.6673, -0.2042,
         0.7197, -0.4425,  1.3796,  0.1309,  0.2097,  0.3618, -0.6044,  0.5886,
        -1.0910, -1.1205,  0.4699,  0.5436,  1.0770,  0.1230,  1.2365,  0.0356,
        -0.7345,  0.3674, -1.0584, -0.4703, -0.0423,  1.5577,  1.0864, -0.1525,
         0.2287, -1.0355,  0.5037,  0.6677, -0.0932,  0.5188,  0.1218,  0.4515,
        -0.2174,  0.9724,  0.1336,  0.4303, -0.1276,  0.7841,  1.3627,  1.6611,
         1.4098,  1.3904,  0.9581, -0.2808, -0.5457, -0.5696, -1.2677, -0.3882,
         0.5250,  0.5674,  0.0378,  0.6583, -0.8223, -0.6401, -0.8914, -0.3036,
         0.3326,  0.5455,  1.0180, -1

In [13]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        raise Exception("No GPU available. Make sure you have a GPU and appropriate drivers installed.")
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

device = get_default_device()
device

device(type='cuda')

In [14]:

train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
to_device(model, device);

@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, 
        weight_decay=0, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr, weight_decay=weight_decay)
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [15]:
model = to_device(CIFAR100Model(), device)
evaluate(model, val_dl)

{'val_loss': 4.8306097984313965, 'val_acc': 0.009889240376651287}

In [16]:
# For this model we gonna use Adam Optimization
opt_func = torch.optim.Adam

In [17]:
def get_gpu_power():
    '''Obtain the GPU power draw'''
    command= "nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits"
    result= subprocess.run(command,capture_output=True, text=True, shell=True)
    power_output=result.stdout.strip()
    power_watts=float(power_output)
    return power_watts

def resetclocks():
    '''Reset the GPU clock frequency to the default value'''
    resetcommand="nvidia-smi -rgc"
    reset_clocks=subprocess.run(resetcommand,capture_output=True, text=True, shell=True)
    return reset_clocks

def setclocks_80(throttle):
    '''Set the GPU clock frequency to the defined throttle value'''
    output = subprocess.check_output(['nvidia-smi', '-q', '-d', 'CLOCK']).decode('utf-8')
    print(output)
    max_output=output[840:1070]
    #print(output[840:1070])
    pattern = r'(?P<name>\w+)\s+:\s+(?P<value>\d+)\sMHz'
    clock_speeds = {match.group('name'): int(match.group('value')) for match in re.finditer(pattern, max_output)}

    newggraphics_clocks=int(clock_speeds['Graphics']*throttle)
    setcommand="nvidia-smi -lgc " + str(newggraphics_clocks)
    set_clocks=subprocess.run(setcommand,capture_output=True, text=True, shell=True)
    print(newggraphics_clocks)
    return set_clocks



In [20]:
resetclocks()
print(subprocess.check_output(['nvidia-smi', '-q', '-d', 'CLOCK']).decode('utf-8'))



Timestamp                                 : Sun Jan 28 17:19:20 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1920 MHz
        SM                                : 1920 MHz
        Memory                            : 6801 MHz
        Video                             : 1785 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950

In [21]:
# 1st
def train_with_powerdraw(epochs, clocks):
    setclocks_80(clocks)
    print(f"set clocks to: {subprocess.check_output(['nvidia-smi', '-q', '-d', 'CLOCK']).decode('utf-8')}")
    stop_flag = threading.Event()
    power_measurement=[]
    def measure_power_consumption1():

        while not stop_flag.is_set():
            power_watts = get_gpu_power()
            power_measurement.append(power_watts)
            #time.sleep(1)
        
        return power_measurement
    def model_fit():

        history = fit(epochs, 1e-3, model, train_dl, val_dl, 5e-4, opt_func)
        stop_flag.set()


    thread_one = threading.Thread(target=model_fit)
    thread_two = threading.Thread(target=measure_power_consumption1)
    
    start = datetime.datetime.now()
    thread_two.start()
    thread_one.start()

    thread_one.join()

    stop_flag.set()
    finish = datetime.datetime.now()
    thread_two.join()
    #print((power_measurement))
    print("length:"+str((len(power_measurement))))
    average_power_draw = sum(power_measurement)/len(power_measurement)
    time_value = str(finish-start)
    hours, minutes, seconds = map(float, time_value.split(':'))
    duration = (minutes*60) + seconds
    print("duration:"+ str(duration))
    total_watt_hours = (average_power_draw * duration)/3600
    print("watthours:"+ str(total_watt_hours))

In [22]:
train_with_powerdraw(5, 0.50)



Timestamp                                 : Sun Jan 28 17:20:26 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 420 MHz
        SM                                : 420 MHz
        Memory                            : 810 MHz
        Video                             : 540 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950 MHz

In [23]:
train_with_powerdraw(5, 0.55)



Timestamp                                 : Sun Jan 28 17:31:55 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1065 MHz
        SM                                : 1065 MHz
        Memory                            : 810 MHz
        Video                             : 990 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950 M

In [30]:
train_with_powerdraw(5, 0.60)



Timestamp                                 : Sun Jan 28 17:49:40 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1935 MHz
        SM                                : 1935 MHz
        Memory                            : 810 MHz
        Video                             : 1785 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950 

In [31]:
train_with_powerdraw(5, 0.65)



Timestamp                                 : Sun Jan 28 17:56:23 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1275 MHz
        SM                                : 1275 MHz
        Memory                            : 6801 MHz
        Video                             : 1185 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950

In [32]:
train_with_powerdraw(5, 0.70)



Timestamp                                 : Sun Jan 28 18:02:59 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1380 MHz
        SM                                : 1380 MHz
        Memory                            : 6801 MHz
        Video                             : 1275 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950

In [33]:
train_with_powerdraw(5, 0.75)



Timestamp                                 : Sun Jan 28 18:09:28 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1485 MHz
        SM                                : 1485 MHz
        Memory                            : 6801 MHz
        Video                             : 1380 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950

In [42]:
train_with_powerdraw(5, 0.80)



Timestamp                                 : Sun Jan 28 18:42:54 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1920 MHz
        SM                                : 1920 MHz
        Memory                            : 810 MHz
        Video                             : 1785 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950 

In [35]:
train_with_powerdraw(5, 0.85)



Timestamp                                 : Sun Jan 28 18:22:29 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1695 MHz
        SM                                : 1695 MHz
        Memory                            : 6801 MHz
        Video                             : 1575 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950

In [36]:
train_with_powerdraw(5, 0.90)



Timestamp                                 : Sun Jan 28 18:29:10 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1800 MHz
        SM                                : 1800 MHz
        Memory                            : 6801 MHz
        Video                             : 1665 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950

In [37]:
train_with_powerdraw(5, 0.95)



Timestamp                                 : Sun Jan 28 18:35:31 2024
Driver Version                            : 545.92
CUDA Version                              : 12.3

Attached GPUs                             : 1
GPU 00000000:02:00.0
    Clocks
        Graphics                          : 1905 MHz
        SM                                : 1905 MHz
        Memory                            : 6801 MHz
        Video                             : 1770 MHz
    Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Default Applications Clocks
        Graphics                          : N/A
        Memory                            : N/A
    Deferred Clocks
        Memory                            : N/A
    Max Clocks
        Graphics                          : 2115 MHz
        SM                                : 2115 MHz
        Memory                            : 7001 MHz
        Video                             : 1950