diff --git a/env.txt b/env.txt new file mode 100644 index 00000000..8f4ffaff --- /dev/null +++ b/env.txt @@ -0,0 +1,6 @@ +export LOG_LEVEL_PT_FALLBACK=1 +export ENABLE_CONSOLE=true +export LOG_LEVEL_ALL=4 +export GPU_MIGRATION_LOG_LEVEL=3 +export PT_HPU_ENABLE_LAZY_COLLECTIVES=true +#export PT_HPU_LAZY_MODE=1 diff --git a/modules/devices.py b/modules/devices.py index c01f0602..b98e1d6e 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -20,7 +20,8 @@ def get_cuda_device_string(): if shared.cmd_opts.device_id is not None: return f"cuda:{shared.cmd_opts.device_id}" - return "cuda" + # return "cuda" + return "hpu" def get_optimal_device_name(): @@ -30,7 +31,8 @@ def get_optimal_device_name(): if has_mps(): return "mps" - return "cpu" + # return "cpu" + return "hpu" def get_optimal_device(): @@ -99,11 +101,14 @@ def autocast(disable=False): if dtype == torch.float32 or shared.cmd_opts.precision == "full": return contextlib.nullcontext() - return torch.autocast("cuda") + # return torch.autocast("cuda") + # return torch.autocast("hpu", dtype=torch.bfloat16, enabled=True) + return torch.autocast("hpu", enabled=True) def without_autocast(disable=False): - return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() + # return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() + return torch.autocast("hpu", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() class NansException(Exception): @@ -114,6 +119,7 @@ def test_for_nans(x, where): if shared.cmd_opts.disable_nan_check: return + # print(x) if not torch.all(torch.isnan(x)).item(): return diff --git a/modules/habana.py b/modules/habana.py new file mode 100644 index 00000000..2a514661 --- /dev/null +++ b/modules/habana.py @@ -0,0 +1,45 @@ +import torch +import habana_frameworks.torch.hpu.random as htrandom + +class HPUGenerator: + def __init__(self): + self.state = htrandom.get_rng_state() + self.initial_seed_value = htrandom.initial_seed() + + def get_state(self): + # PyTorch’s Generator.get_state returns a tensor, same as htrandom.get_rng_state + return htrandom.get_rng_state() + + def set_state(self, state): + htrandom.set_rng_state(state) + self.state = state + + def manual_seed(self, seed): + htrandom.manual_seed(seed) + self.initial_seed_value = seed + self.state = htrandom.get_rng_state() + return self + + def seed(self): + # Assuming htrandom.seed generates a new seed internally and sets it + htrandom.seed() + self.state = htrandom.get_rng_state() + self.initial_seed_value = htrandom.initial_seed() # Update initial_seed based on new state + + def initial_seed(self): + return self.initial_seed_value + +# Usage Example +#generator = HPUGenerator() +#state = generator.get_state() +#print(f"Initial State: {state}") +# +#generator.set_state(state) +#print("State is set back to its initial value.") +# +#generator.manual_seed(42) +#print(f"Manual Seed: {generator.initial_seed()}") +# +#generator.seed() +#print(f"Seed is set to a new value. New Initial Seed: {generator.initial_seed()}") + diff --git a/modules/memmon.py b/modules/memmon.py index 4018edcc..90a1ffef 100644 --- a/modules/memmon.py +++ b/modules/memmon.py @@ -1,6 +1,7 @@ import threading import time from collections import defaultdict +import habana_frameworks.torch as htorch import torch @@ -78,12 +79,24 @@ def read(self): self.data["free"] = free self.data["total"] = total - torch_stats = torch.cuda.memory_stats(self.device) - self.data["active"] = torch_stats["active.all.current"] - self.data["active_peak"] = torch_stats["active_bytes.all.peak"] - self.data["reserved"] = torch_stats["reserved_bytes.all.current"] - self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"] - self.data["system_peak"] = total - self.data["min_free"] + #torch_stats = torch.cuda.memory_stats(self.device) + torch_stats = htorch.hpu.memory_stats() + # {'Limit': 32374775808, 'InUse': 12222592, 'MaxInUse': 12222592, 'NumAllocs': 2, 'NumFrees': 0, 'ActiveAllocs': 2, 'MaxAllocSize': 4014080, 'TotalSystemAllocs': 3, 'TotalSystemFrees': 0, 'TotalActiveAllocs': 3} + self.data["active"] = torch_stats["InUse"] + self.data["active_peak"] = torch_stats["MaxInUse"] + self.data["reserved"] = torch_stats["InUse"] + self.data["reserved_peak"] = torch_stats["MaxInUse"] + self.data["system_peak"] = total - self.data["Limit"] + #self.data["active"] = 0 + #self.data["active_peak"] = 0 + #self.data["reserved"] = 0 + #self.data["reserved_peak"] = 0 + #self.data["system_peak"] = 0 + #self.data["active"] = torch_stats["active.all.current"] + #self.data["active_peak"] = torch_stats["active_bytes.all.peak"] + #self.data["reserved"] = torch_stats["reserved_bytes.all.current"] + #self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"] + #self.data["system_peak"] = total - self.data["min_free"] return self.data diff --git a/modules/rng.py b/modules/rng.py index 9e8ba2ee..4b1ef820 100644 --- a/modules/rng.py +++ b/modules/rng.py @@ -1,84 +1,173 @@ import torch +import habana_frameworks.torch.hpu.random as htrandom -from modules import devices, rng_philox, shared - +from modules import devices, rng_philox, shared, habana +#from habana import HPUGenerator def randn(seed, shape, generator=None): - """Generate a tensor with random numbers from a normal distribution using seed. - - Uses the seed parameter to set the global torch seed; to generate more with that seed, use randn_like/randn_without_seed.""" - + """Generate a tensor with random numbers from a normal distribution using seed.""" manual_seed(seed) if shared.opts.randn_source == "NV": return torch.asarray((generator or nv_rng).randn(shape), device=devices.device) - if shared.opts.randn_source == "CPU" or devices.device.type == 'mps': + elif shared.opts.randn_source == "HPU": + return torch.asarray(hpu_rng.randn(shape), device=devices.device) + # return torch.randn(shape, device=devices.device, generator=(generator or hpu_rng)) + + elif shared.opts.randn_source == "CPU" or devices.device.type == 'mps': return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device) return torch.randn(shape, device=devices.device, generator=generator) def randn_local(seed, shape): - """Generate a tensor with random numbers from a normal distribution using seed. - - Does not change the global random number generator. You can only generate the seed's first tensor using this function.""" + """Generate a tensor with random numbers from a normal distribution using seed.""" + local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device if shared.opts.randn_source == "NV": rng = rng_philox.Generator(seed) return torch.asarray(rng.randn(shape), device=devices.device) - local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device + elif shared.opts.randn_source == "HPU": + local_generator = habana.HPUGenerator().manual_seed(seed) + return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device) + local_generator = torch.Generator(local_device).manual_seed(int(seed)) return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device) def randn_like(x): - """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator. - - Use either randn() or manual_seed() to initialize the generator.""" - + """Generate a tensor with random numbers from a normal distribution using the previously initialized generator.""" if shared.opts.randn_source == "NV": return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype) - if shared.opts.randn_source == "CPU" or x.device.type == 'mps': + elif shared.opts.randn_source == "HPU": + return torch.randn_like(x, generator=hpu_rng).to(x.device) + + elif shared.opts.randn_source == "CPU" or x.device.type == 'mps': return torch.randn_like(x, device=devices.cpu).to(x.device) return torch.randn_like(x) - def randn_without_seed(shape, generator=None): - """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator. - + """Generate a tensor with random numbers from a normal distribution using the previously initialized generator. Use either randn() or manual_seed() to initialize the generator.""" if shared.opts.randn_source == "NV": return torch.asarray((generator or nv_rng).randn(shape), device=devices.device) - if shared.opts.randn_source == "CPU" or devices.device.type == 'mps': + elif shared.opts.randn_source == "HPU": + return torch.asarray(hpu_rng.randn(shape), device=devices.device) + # return torch.randn(shape, device=devices.device, generator=(generator or hpu_rng)) + + elif shared.opts.randn_source == "CPU" or devices.device.type == 'mps': return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device) return torch.randn(shape, device=devices.device, generator=generator) - def manual_seed(seed): """Set up a global random number generator using the specified seed.""" + seed = int(seed) if shared.opts.randn_source == "NV": global nv_rng nv_rng = rng_philox.Generator(seed) - return - - torch.manual_seed(seed) - + elif shared.opts.randn_source == "HPU": + global hpu_rng + hpu_rng = rng_philox.Generator(seed) + # hpu_rng = habana.HPUGenerator().manual_seed(seed) + else: + torch.manual_seed(seed) def create_generator(seed): + seed = int(seed) + if shared.opts.randn_source == "NV": return rng_philox.Generator(seed) - - device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device - generator = torch.Generator(device).manual_seed(int(seed)) - return generator + elif shared.opts.randn_source == "HPU": + generator = habana.HPUGenerator() + return generator.manual_seed(seed) + else: + device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device + generator = torch.Generator(device=device).manual_seed(seed) + return generator + +#def randn(seed, shape, generator=None): +# """Generate a tensor with random numbers from a normal distribution using seed. +# +# Uses the seed parameter to set the global torch seed; to generate more with that seed, use randn_like/randn_without_seed.""" +# +# manual_seed(seed) +# +# if shared.opts.randn_source == "NV": +# return torch.asarray((generator or nv_rng).randn(shape), device=devices.device) +# +# if shared.opts.randn_source == "CPU" or devices.device.type == 'mps': +# return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device) +# +# return torch.randn(shape, device=devices.device, generator=generator) +# +# +#def randn_local(seed, shape): +# """Generate a tensor with random numbers from a normal distribution using seed. +# +# Does not change the global random number generator. You can only generate the seed's first tensor using this function.""" +# +# if shared.opts.randn_source == "NV": +# rng = rng_philox.Generator(seed) +# return torch.asarray(rng.randn(shape), device=devices.device) +# +# local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device +# local_generator = torch.Generator(local_device).manual_seed(int(seed)) +# return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device) +# +# +#def randn_like(x): +# """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator. +# +# Use either randn() or manual_seed() to initialize the generator.""" +# +# if shared.opts.randn_source == "NV": +# return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype) +# +# if shared.opts.randn_source == "CPU" or x.device.type == 'mps': +# return torch.randn_like(x, device=devices.cpu).to(x.device) +# +# return torch.randn_like(x) +# +# +#def randn_without_seed(shape, generator=None): +# """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator. +# +# Use either randn() or manual_seed() to initialize the generator.""" +# +# if shared.opts.randn_source == "NV": +# return torch.asarray((generator or nv_rng).randn(shape), device=devices.device) +# +# if shared.opts.randn_source == "CPU" or devices.device.type == 'mps': +# return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device) +# +# return torch.randn(shape, device=devices.device, generator=generator) +# +#def manual_seed(seed): +# """Set up a global random number generator using the specified seed.""" +# +# if shared.opts.randn_source == "NV": +# global nv_rng +# nv_rng = rng_philox.Generator(seed) +# return +# +# torch.manual_seed(seed) +# +# +#def create_generator(seed): +# if shared.opts.randn_source == "NV": +# return rng_philox.Generator(seed) +# +# device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device +# generator = torch.Generator(device).manual_seed(int(seed)) +# return generator # from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3 diff --git a/modules/shared_options.py b/modules/shared_options.py index 00b273fa..b4191c0a 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -144,7 +144,7 @@ "comma_padding_backtrack": OptionInfo(20, "Prompt word wrap length limit", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1}).info("in tokens - for texts shorter than specified, if they don't fit into 75 token limit, move them to the next 75 token chunk"), "CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}, infotext="Clip skip").link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#clip-skip").info("ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer"), "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"), - "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"), + "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV", "HPU"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards; use HPU to produce same picture as on Habana Processing Units"), "tiling": OptionInfo(False, "Tiling", infotext='Tiling').info("produce a tileable picture"), "hires_fix_refiner_pass": OptionInfo("second pass", "Hires fix: which pass to enable refiner for", gr.Radio, {"choices": ["first pass", "second pass", "both passes"]}, infotext="Hires refiner"), })) diff --git a/webui.py b/webui.py index 12328423..3470ff54 100644 --- a/webui.py +++ b/webui.py @@ -6,6 +6,11 @@ from modules import timer from modules import initialize_util from modules import initialize +import habana_frameworks.torch.gpu_migration +import habana_frameworks.torch.core as htcore +#import habana_frameworks.torch as htorch +#import torch.nn as nn +#import torch.nn.functional as F startup_timer = timer.startup_timer startup_timer.record("launcher")