Modifications to support Habana Gaudi HPUs

CloudBrigade · Oct 28, 2023 · 385309d · 385309d
1 parent 5ef669d
commit 385309d
Show file tree

Hide file tree

Showing 7 changed files with 204 additions and 40 deletions.
diff --git a/env.txt b/env.txt
@@ -0,0 +1,6 @@
+export LOG_LEVEL_PT_FALLBACK=1
+export ENABLE_CONSOLE=true
+export LOG_LEVEL_ALL=4
+export GPU_MIGRATION_LOG_LEVEL=3
+export PT_HPU_ENABLE_LAZY_COLLECTIVES=true
+#export PT_HPU_LAZY_MODE=1
diff --git a/modules/devices.py b/modules/devices.py
@@ -20,7 +20,8 @@ def get_cuda_device_string():
     if shared.cmd_opts.device_id is not None:
         return f"cuda:{shared.cmd_opts.device_id}"
 
-    return "cuda"
+    # return "cuda"
+    return "hpu"
 
 
 def get_optimal_device_name():
@@ -30,7 +31,8 @@ def get_optimal_device_name():
     if has_mps():
         return "mps"
 
-    return "cpu"
+    # return "cpu"
+    return "hpu"
 
 
 def get_optimal_device():
@@ -99,11 +101,14 @@ def autocast(disable=False):
     if dtype == torch.float32 or shared.cmd_opts.precision == "full":
         return contextlib.nullcontext()
 
-    return torch.autocast("cuda")
+    # return torch.autocast("cuda")
+    # return torch.autocast("hpu", dtype=torch.bfloat16, enabled=True)
+    return torch.autocast("hpu", enabled=True)
 
 
 def without_autocast(disable=False):
-    return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext()
+    # return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext()
+    return torch.autocast("hpu", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext()
 
 
 class NansException(Exception):
@@ -114,6 +119,7 @@ def test_for_nans(x, where):
     if shared.cmd_opts.disable_nan_check:
         return
 
+    # print(x)
     if not torch.all(torch.isnan(x)).item():
         return
 

diff --git a/modules/habana.py b/modules/habana.py
@@ -0,0 +1,45 @@
+import torch
+import habana_frameworks.torch.hpu.random as htrandom
+
+class HPUGenerator:
+    def __init__(self):
+        self.state = htrandom.get_rng_state()
+        self.initial_seed_value = htrandom.initial_seed()
+
+    def get_state(self):
+        # PyTorch’s Generator.get_state returns a tensor, same as htrandom.get_rng_state
+        return htrandom.get_rng_state()
+
+    def set_state(self, state):
+        htrandom.set_rng_state(state)
+        self.state = state
+
+    def manual_seed(self, seed):
+        htrandom.manual_seed(seed)
+        self.initial_seed_value = seed
+        self.state = htrandom.get_rng_state()
+        return self
+
+    def seed(self):
+        # Assuming htrandom.seed generates a new seed internally and sets it
+        htrandom.seed()
+        self.state = htrandom.get_rng_state()
+        self.initial_seed_value = htrandom.initial_seed()  # Update initial_seed based on new state
+
+    def initial_seed(self):
+        return self.initial_seed_value
+
+# Usage Example
+#generator = HPUGenerator()
+#state = generator.get_state()
+#print(f"Initial State: {state}")
+#
+#generator.set_state(state)
+#print("State is set back to its initial value.")
+#
+#generator.manual_seed(42)
+#print(f"Manual Seed: {generator.initial_seed()}")
+#
+#generator.seed()
+#print(f"Seed is set to a new value. New Initial Seed: {generator.initial_seed()}")
+
diff --git a/modules/memmon.py b/modules/memmon.py
@@ -1,6 +1,7 @@
 import threading
 import time
 from collections import defaultdict
+import habana_frameworks.torch as htorch
 
 import torch
 
@@ -78,12 +79,24 @@ def read(self):
             self.data["free"] = free
             self.data["total"] = total
 
-            torch_stats = torch.cuda.memory_stats(self.device)
-            self.data["active"] = torch_stats["active.all.current"]
-            self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
-            self.data["reserved"] = torch_stats["reserved_bytes.all.current"]
-            self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
-            self.data["system_peak"] = total - self.data["min_free"]
+            #torch_stats = torch.cuda.memory_stats(self.device)
+            torch_stats = htorch.hpu.memory_stats()
+            # {'Limit': 32374775808, 'InUse': 12222592, 'MaxInUse': 12222592, 'NumAllocs': 2, 'NumFrees': 0, 'ActiveAllocs': 2, 'MaxAllocSize': 4014080, 'TotalSystemAllocs': 3, 'TotalSystemFrees': 0, 'TotalActiveAllocs': 3}
+            self.data["active"] = torch_stats["InUse"]
+            self.data["active_peak"] = torch_stats["MaxInUse"]
+            self.data["reserved"] = torch_stats["InUse"]
+            self.data["reserved_peak"] = torch_stats["MaxInUse"]
+            self.data["system_peak"] = total - self.data["Limit"]
+            #self.data["active"] = 0
+            #self.data["active_peak"] = 0
+            #self.data["reserved"] = 0
+            #self.data["reserved_peak"] = 0
+            #self.data["system_peak"] = 0
+            #self.data["active"] = torch_stats["active.all.current"]
+            #self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
+            #self.data["reserved"] = torch_stats["reserved_bytes.all.current"]
+            #self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
+            #self.data["system_peak"] = total - self.data["min_free"]
 
         return self.data
 

diff --git a/modules/rng.py b/modules/rng.py
@@ -1,84 +1,173 @@
 import torch
+import habana_frameworks.torch.hpu.random as htrandom
 
-from modules import devices, rng_philox, shared
-
+from modules import devices, rng_philox, shared, habana
+#from habana import HPUGenerator
 
 def randn(seed, shape, generator=None):
-    """Generate a tensor with random numbers from a normal distribution using seed.
-
-    Uses the seed parameter to set the global torch seed; to generate more with that seed, use randn_like/randn_without_seed."""
-
+    """Generate a tensor with random numbers from a normal distribution using seed."""
     manual_seed(seed)
 
     if shared.opts.randn_source == "NV":
         return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)
 
-    if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
+    elif shared.opts.randn_source == "HPU":
+        return torch.asarray(hpu_rng.randn(shape), device=devices.device)
+        # return torch.randn(shape, device=devices.device, generator=(generator or hpu_rng))
+
+    elif shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
         return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)
 
     return torch.randn(shape, device=devices.device, generator=generator)
 
 
 def randn_local(seed, shape):
-    """Generate a tensor with random numbers from a normal distribution using seed.
-
-    Does not change the global random number generator. You can only generate the seed's first tensor using this function."""
+    """Generate a tensor with random numbers from a normal distribution using seed."""
+    local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
 
     if shared.opts.randn_source == "NV":
         rng = rng_philox.Generator(seed)
         return torch.asarray(rng.randn(shape), device=devices.device)
 
-    local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
+    elif shared.opts.randn_source == "HPU":
+        local_generator = habana.HPUGenerator().manual_seed(seed)
+        return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device)
+
     local_generator = torch.Generator(local_device).manual_seed(int(seed))
     return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device)
 
 
 def randn_like(x):
-    """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
-
-    Use either randn() or manual_seed() to initialize the generator."""
-
+    """Generate a tensor with random numbers from a normal distribution using the previously initialized generator."""
     if shared.opts.randn_source == "NV":
         return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype)
 
-    if shared.opts.randn_source == "CPU" or x.device.type == 'mps':
+    elif shared.opts.randn_source == "HPU":
+        return torch.randn_like(x, generator=hpu_rng).to(x.device)
+
+    elif shared.opts.randn_source == "CPU" or x.device.type == 'mps':
         return torch.randn_like(x, device=devices.cpu).to(x.device)
 
     return torch.randn_like(x)
 
-
 def randn_without_seed(shape, generator=None):
-    """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
-
+    """Generate a tensor with random numbers from a normal distribution using the previously initialized generator.
     Use either randn() or manual_seed() to initialize the generator."""
 
     if shared.opts.randn_source == "NV":
         return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)
 
-    if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
+    elif shared.opts.randn_source == "HPU":
+        return torch.asarray(hpu_rng.randn(shape), device=devices.device)
+        # return torch.randn(shape, device=devices.device, generator=(generator or hpu_rng))
+
+    elif shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
         return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)
 
     return torch.randn(shape, device=devices.device, generator=generator)
 
-
 def manual_seed(seed):
     """Set up a global random number generator using the specified seed."""
+    seed = int(seed)
 
     if shared.opts.randn_source == "NV":
         global nv_rng
         nv_rng = rng_philox.Generator(seed)
-        return
-
-    torch.manual_seed(seed)
-
+    elif shared.opts.randn_source == "HPU":
+        global hpu_rng
+        hpu_rng = rng_philox.Generator(seed)
+        # hpu_rng = habana.HPUGenerator().manual_seed(seed)
+    else:
+        torch.manual_seed(seed)
 
 def create_generator(seed):
+    seed = int(seed)
+
     if shared.opts.randn_source == "NV":
         return rng_philox.Generator(seed)
-
-    device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
-    generator = torch.Generator(device).manual_seed(int(seed))
-    return generator
+    elif shared.opts.randn_source == "HPU":
+        generator = habana.HPUGenerator()
+        return generator.manual_seed(seed)
+    else:
+        device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
+        generator = torch.Generator(device=device).manual_seed(seed)
+        return generator
+
+#def randn(seed, shape, generator=None):
+#    """Generate a tensor with random numbers from a normal distribution using seed.
+#
+#    Uses the seed parameter to set the global torch seed; to generate more with that seed, use randn_like/randn_without_seed."""
+#
+#    manual_seed(seed)
+#
+#    if shared.opts.randn_source == "NV":
+#        return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)
+#
+#    if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
+#        return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)
+#
+#    return torch.randn(shape, device=devices.device, generator=generator)
+#
+#
+#def randn_local(seed, shape):
+#    """Generate a tensor with random numbers from a normal distribution using seed.
+#
+#    Does not change the global random number generator. You can only generate the seed's first tensor using this function."""
+#
+#    if shared.opts.randn_source == "NV":
+#        rng = rng_philox.Generator(seed)
+#        return torch.asarray(rng.randn(shape), device=devices.device)
+#
+#    local_device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
+#    local_generator = torch.Generator(local_device).manual_seed(int(seed))
+#    return torch.randn(shape, device=local_device, generator=local_generator).to(devices.device)
+#
+#
+#def randn_like(x):
+#    """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
+#
+#    Use either randn() or manual_seed() to initialize the generator."""
+#
+#    if shared.opts.randn_source == "NV":
+#        return torch.asarray(nv_rng.randn(x.shape), device=x.device, dtype=x.dtype)
+#
+#    if shared.opts.randn_source == "CPU" or x.device.type == 'mps':
+#        return torch.randn_like(x, device=devices.cpu).to(x.device)
+#
+#    return torch.randn_like(x)
+#
+#
+#def randn_without_seed(shape, generator=None):
+#    """Generate a tensor with random numbers from a normal distribution using the previously initialized genrator.
+#
+#    Use either randn() or manual_seed() to initialize the generator."""
+#
+#    if shared.opts.randn_source == "NV":
+#        return torch.asarray((generator or nv_rng).randn(shape), device=devices.device)
+#
+#    if shared.opts.randn_source == "CPU" or devices.device.type == 'mps':
+#        return torch.randn(shape, device=devices.cpu, generator=generator).to(devices.device)
+#
+#    return torch.randn(shape, device=devices.device, generator=generator)
+#
+#def manual_seed(seed):
+#    """Set up a global random number generator using the specified seed."""
+#
+#    if shared.opts.randn_source == "NV":
+#        global nv_rng
+#        nv_rng = rng_philox.Generator(seed)
+#        return
+#
+#    torch.manual_seed(seed)
+#
+#
+#def create_generator(seed):
+#    if shared.opts.randn_source == "NV":
+#        return rng_philox.Generator(seed)
+#
+#    device = devices.cpu if shared.opts.randn_source == "CPU" or devices.device.type == 'mps' else devices.device
+#    generator = torch.Generator(device).manual_seed(int(seed))
+#    return generator
 
 
 # from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3

diff --git a/modules/shared_options.py b/modules/shared_options.py
@@ -144,7 +144,7 @@
     "comma_padding_backtrack": OptionInfo(20, "Prompt word wrap length limit", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1}).info("in tokens - for texts shorter than specified, if they don't fit into 75 token limit, move them to the next 75 token chunk"),
     "CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}, infotext="Clip skip").link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#clip-skip").info("ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer"),
     "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
-    "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"),
+    "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV", "HPU"]}, infotext="RNG").info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards; use HPU to produce same picture as on Habana Processing Units"),
     "tiling": OptionInfo(False, "Tiling", infotext='Tiling').info("produce a tileable picture"),
     "hires_fix_refiner_pass": OptionInfo("second pass", "Hires fix: which pass to enable refiner for", gr.Radio, {"choices": ["first pass", "second pass", "both passes"]}, infotext="Hires refiner"),
 }))

diff --git a/webui.py b/webui.py
@@ -6,6 +6,11 @@
 from modules import timer
 from modules import initialize_util
 from modules import initialize
+import habana_frameworks.torch.gpu_migration
+import habana_frameworks.torch.core as htcore
+#import habana_frameworks.torch as htorch
+#import torch.nn as nn
+#import torch.nn.functional as F
 
 startup_timer = timer.startup_timer
 startup_timer.record("launcher")