Skip to content

Commit

Permalink
Modifications to support Habana Gaudi HPUs
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Oct 28, 2023
1 parent 5ef669d commit 385309d
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 40 deletions.
6 changes: 6 additions & 0 deletions env.txt
@@ -0,0 +1,6 @@
export LOG_LEVEL_PT_FALLBACK=1
export ENABLE_CONSOLE=true
export LOG_LEVEL_ALL=4
export GPU_MIGRATION_LOG_LEVEL=3
export PT_HPU_ENABLE_LAZY_COLLECTIVES=true
#export PT_HPU_LAZY_MODE=1
14 changes: 10 additions & 4 deletions modules/devices.py
Expand Up @@ -20,7 +20,8 @@ def get_cuda_device_string():
if shared.cmd_opts.device_id is not None:
return f"cuda:{shared.cmd_opts.device_id}"

return "cuda"
# return "cuda"
return "hpu"


def get_optimal_device_name():
Expand All @@ -30,7 +31,8 @@ def get_optimal_device_name():
if has_mps():
return "mps"

return "cpu"
# return "cpu"
return "hpu"


def get_optimal_device():
Expand Down Expand Up @@ -99,11 +101,14 @@ def autocast(disable=False):
if dtype == torch.float32 or shared.cmd_opts.precision == "full":
return contextlib.nullcontext()

return torch.autocast("cuda")
# return torch.autocast("cuda")
# return torch.autocast("hpu", dtype=torch.bfloat16, enabled=True)
return torch.autocast("hpu", enabled=True)


def without_autocast(disable=False):
return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext()
# return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext()
return torch.autocast("hpu", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext()


class NansException(Exception):
Expand All @@ -114,6 +119,7 @@ def test_for_nans(x, where):
if shared.cmd_opts.disable_nan_check:
return

# print(x)
if not torch.all(torch.isnan(x)).item():
return

Expand Down
45 changes: 45 additions & 0 deletions modules/habana.py
@@ -0,0 +1,45 @@
import torch
import habana_frameworks.torch.hpu.random as htrandom

class HPUGenerator:
def __init__(self):
self.state = htrandom.get_rng_state()
self.initial_seed_value = htrandom.initial_seed()

def get_state(self):
# PyTorch’s Generator.get_state returns a tensor, same as htrandom.get_rng_state
return htrandom.get_rng_state()

def set_state(self, state):
htrandom.set_rng_state(state)
self.state = state

def manual_seed(self, seed):
htrandom.manual_seed(seed)
self.initial_seed_value = seed
self.state = htrandom.get_rng_state()
return self

def seed(self):
# Assuming htrandom.seed generates a new seed internally and sets it
htrandom.seed()
self.state = htrandom.get_rng_state()
self.initial_seed_value = htrandom.initial_seed() # Update initial_seed based on new state

def initial_seed(self):
return self.initial_seed_value

# Usage Example
#generator = HPUGenerator()
#state = generator.get_state()
#print(f"Initial State: {state}")
#
#generator.set_state(state)
#print("State is set back to its initial value.")
#
#generator.manual_seed(42)
#print(f"Manual Seed: {generator.initial_seed()}")
#
#generator.seed()
#print(f"Seed is set to a new value. New Initial Seed: {generator.initial_seed()}")

25 changes: 19 additions & 6 deletions modules/memmon.py
@@ -1,6 +1,7 @@
import threading
import time
from collections import defaultdict
import habana_frameworks.torch as htorch

import torch

Expand Down Expand Up @@ -78,12 +79,24 @@ def read(self):
self.data["free"] = free
self.data["total"] = total

torch_stats = torch.cuda.memory_stats(self.device)
self.data["active"] = torch_stats["active.all.current"]
self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
self.data["reserved"] = torch_stats["reserved_bytes.all.current"]
self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
self.data["system_peak"] = total - self.data["min_free"]
#torch_stats = torch.cuda.memory_stats(self.device)
torch_stats = htorch.hpu.memory_stats()
# {'Limit': 32374775808, 'InUse': 12222592, 'MaxInUse': 12222592, 'NumAllocs': 2, 'NumFrees': 0, 'ActiveAllocs': 2, 'MaxAllocSize': 4014080, 'TotalSystemAllocs': 3, 'TotalSystemFrees': 0, 'TotalActiveAllocs': 3}
self.data["active"] = torch_stats["InUse"]
self.data["active_peak"] = torch_stats["MaxInUse"]
self.data["reserved"] = torch_stats["InUse"]
self.data["reserved_peak"] = torch_stats["MaxInUse"]
self.data["system_peak"] = total - self.data["Limit"]
#self.data["active"] = 0
#self.data["active_peak"] = 0
#self.data["reserved"] = 0
#self.data["reserved_peak"] = 0
#self.data["system_peak"] = 0
#self.data["active"] = torch_stats["active.all.current"]
#self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
#self.data["reserved"] = torch_stats["reserved_bytes.all.current"]
#self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
#self.data["system_peak"] = total - self.data["min_free"]

return self.data

Expand Down
147 changes: 118 additions & 29 deletions modules/rng.py
@@ -1,84 +1,173 @@
import torch
import habana_frameworks.torch.hpu.random as htrandom

from modules import devices, rng_philox, shared

from modules import devices, rng_philox, shared, habana
#from habana import HPUGenerator

def randn(seed, shape, generator=None):
"""Generate a tensor with random numbers from a normal distribution using seed.
Uses the seed parameter to set the global torch seed; to generate more with that seed, use randn_like/randn_without_seed."""

"""Generate a tensor with random numbers from a normal distribution using seed."""
manual_seed(seed)

if shared.opts.randn_source == "NV":
return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)

if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
elif shared.opts.randn_source == "HPU":
return torch.asarray(hpu_rng.randn(shape), device=devices.device)
# return torch.randn(shape, device=devices.device, generator=(generator or hpu_rng))

elif shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)

return torch.randn(shape, device=devices.device, generator=generator)


def randn_local(seed, shape):
"""Generate a tensor with random numbers from a normal distribution using seed.
Does not change the global random number generator. You can only generate the seed's first tensor using this function."""
"""Generate a tensor with random numbers from a normal distribution using seed."""
local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device

if shared.opts.randn_source == "NV":
rng = rng_philox.Generator(seed)
return torch.asarray(rng.randn(shape), device=devices.device)

local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
elif shared.opts.randn_source == "HPU":
local_generator = habana.HPUGenerator().manual_seed(seed)
return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device)

local_generator = torch.Generator(local_device).manual_seed(int(seed))
return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device)


def randn_like(x):
"""Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
Use either randn() or manual_seed() to initialize the generator."""

"""Generate a tensor with random numbers from a normal distribution using the previously initialized generator."""
if shared.opts.randn_source == "NV":
return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype)

if shared.opts.randn_source == "CPU" or x.device.type == 'mps':
elif shared.opts.randn_source == "HPU":
return torch.randn_like(x, generator=hpu_rng).to(x.device)

elif shared.opts.randn_source == "CPU" or x.device.type == 'mps':
return torch.randn_like(x, device=devices.cpu).to(x.device)

return torch.randn_like(x)


def randn_without_seed(shape, generator=None):
"""Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
"""Generate a tensor with random numbers from a normal distribution using the previously initialized generator.
Use either randn() or manual_seed() to initialize the generator."""

if shared.opts.randn_source == "NV":
return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)

if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
elif shared.opts.randn_source == "HPU":
return torch.asarray(hpu_rng.randn(shape), device=devices.device)
# return torch.randn(shape, device=devices.device, generator=(generator or hpu_rng))

elif shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)

return torch.randn(shape, device=devices.device, generator=generator)


def manual_seed(seed):
"""Set up a global random number generator using the specified seed."""
seed = int(seed)

if shared.opts.randn_source == "NV":
global nv_rng
nv_rng = rng_philox.Generator(seed)
return

torch.manual_seed(seed)

elif shared.opts.randn_source == "HPU":
global hpu_rng
hpu_rng = rng_philox.Generator(seed)
# hpu_rng = habana.HPUGenerator().manual_seed(seed)
else:
torch.manual_seed(seed)

def create_generator(seed):
seed = int(seed)

if shared.opts.randn_source == "NV":
return rng_philox.Generator(seed)

device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
generator = torch.Generator(device).manual_seed(int(seed))
return generator
elif shared.opts.randn_source == "HPU":
generator = habana.HPUGenerator()
return generator.manual_seed(seed)
else:
device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
generator = torch.Generator(device=device).manual_seed(seed)
return generator

#def randn(seed, shape, generator=None):
# """Generate a tensor with random numbers from a normal distribution using seed.
#
# Uses the seed parameter to set the global torch seed; to generate more with that seed, use randn_like/randn_without_seed."""
#
# manual_seed(seed)
#
# if shared.opts.randn_source == "NV":
# return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)
#
# if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
# return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)
#
# return torch.randn(shape, device=devices.device, generator=generator)
#
#
#def randn_local(seed, shape):
# """Generate a tensor with random numbers from a normal distribution using seed.
#
# Does not change the global random number generator. You can only generate the seed's first tensor using this function."""
#
# if shared.opts.randn_source == "NV":
# rng = rng_philox.Generator(seed)
# return torch.asarray(rng.randn(shape), device=devices.device)
#
# local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
# local_generator = torch.Generator(local_device).manual_seed(int(seed))
# return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device)
#
#
#def randn_like(x):
# """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
#
# Use either randn() or manual_seed() to initialize the generator."""
#
# if shared.opts.randn_source == "NV":
# return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype)
#
# if shared.opts.randn_source == "CPU" or x.device.type == 'mps':
# return torch.randn_like(x, device=devices.cpu).to(x.device)
#
# return torch.randn_like(x)
#
#
#def randn_without_seed(shape, generator=None):
# """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
#
# Use either randn() or manual_seed() to initialize the generator."""
#
# if shared.opts.randn_source == "NV":
# return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)
#
# if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
# return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)
#
# return torch.randn(shape, device=devices.device, generator=generator)
#
#def manual_seed(seed):
# """Set up a global random number generator using the specified seed."""
#
# if shared.opts.randn_source == "NV":
# global nv_rng
# nv_rng = rng_philox.Generator(seed)
# return
#
# torch.manual_seed(seed)
#
#
#def create_generator(seed):
# if shared.opts.randn_source == "NV":
# return rng_philox.Generator(seed)
#
# device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
# generator = torch.Generator(device).manual_seed(int(seed))
# return generator


# from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3
Expand Down
2 changes: 1 addition & 1 deletion modules/shared_options.py
Expand Up @@ -144,7 +144,7 @@
"comma_padding_backtrack": OptionInfo(20, "Prompt word wrap length limit", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1}).info("in tokens - for texts shorter than specified, if they don't fit into 75 token limit, move them to the next 75 token chunk"),
"CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}, infotext="Clip skip").link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#clip-skip").info("ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer"),
"upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
"randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"),
"randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV", "HPU"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards; use HPU to produce same picture as on Habana Processing Units"),
"tiling": OptionInfo(False, "Tiling", infotext='Tiling').info("produce a tileable picture"),
"hires_fix_refiner_pass": OptionInfo("second pass", "Hires fix: which pass to enable refiner for", gr.Radio, {"choices": ["first pass", "second pass", "both passes"]}, infotext="Hires refiner"),
}))
Expand Down
5 changes: 5 additions & 0 deletions webui.py
Expand Up @@ -6,6 +6,11 @@
from modules import timer
from modules import initialize_util
from modules import initialize
import habana_frameworks.torch.gpu_migration
import habana_frameworks.torch.core as htcore
#import habana_frameworks.torch as htorch
#import torch.nn as nn
#import torch.nn.functional as F

startup_timer = timer.startup_timer
startup_timer.record("launcher")
Expand Down

0 comments on commit 385309d

Please sign in to comment.