In [1]:
import torch

# Clear the cache (for PyTorch)
torch.cuda.empty_cache()

# Also, collect unused memory
torch.cuda.ipc_collect()


In [3]:
import torch
import triton
import triton.language as tl

@triton.jit
def add_kernel(x_ptr, y_ptr, output_ptr, n_elements, BLOCK_SIZE: tl.constexpr):
    pid = tl.program_id(axis=0)
    block_start = pid * BLOCK_SIZE
    offsets = block_start + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n_elements
    x = tl.load(x_ptr + offsets, mask=mask)
    y = tl.load(y_ptr + offsets, mask=mask)
    output = x + y
    tl.store(output_ptr + offsets, output, mask=mask)

def add(x: torch.Tensor, y: torch.Tensor):
    output = torch.empty_like(x)
    n_elements = output.numel()
    grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
    add_kernel[grid](x, y, output, n_elements, BLOCK_SIZE=1024)
    return output

a = torch.rand(3, device="cuda")
b = a + a
b_compiled = add(a, a)
print(b_compiled - b)
print("If you see tensor([0., 0., 0.], device='cuda:0'), then it works")

tensor([0., 0., 0.], device='cuda:0')
If you see tensor([0., 0., 0.], device='cuda:0'), then it works


In [6]:
import torch

print(torch.version.cuda)
print(torch.cuda.is_available())

if torch.cuda.is_available():
    print("CUDA is available")
    print(f"Number of devices: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"{i}: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available")

12.8
True
CUDA is available
Number of devices: 2
0: NVIDIA GeForce RTX 5090
1: NVIDIA GeForce RTX 5070 Ti


In [7]:
print("PyTorch CUDA available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
print("Torch Version:", torch.__version__)
print("GPU Name:", torch.cuda.get_device_name(0))
print("Compute Capability:", torch.cuda.get_device_capability(0))


PyTorch CUDA available: True
CUDA Version: 12.8
Torch Version: 2.8.0.dev20250405+cu128
GPU Name: NVIDIA GeForce RTX 5090
Compute Capability: (12, 0)


In [8]:
#pip install --upgrade pip

In [4]:
#pip uninstall torch torchvision torchaudio -y

Found existing installation: torch 2.6.0
Uninstalling torch-2.6.0:
  Successfully uninstalled torch-2.6.0
Found existing installation: torchvision 0.21.0
Uninstalling torchvision-0.21.0:
  Successfully uninstalled torchvision-0.21.0
Found existing installation: torchaudio 2.6.0
Uninstalling torchaudio-2.6.0:
  Successfully uninstalled torchaudio-2.6.0
Note: you may need to restart the kernel to use updated packages.


You can safely remove it manually.


In [11]:
#pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128


In [None]:
#pip install triton-windows

In [13]:
pip list

Package                 Version
----------------------- ------------------------
aiohappyeyeballs        2.4.6
aiohttp                 3.11.12
aiosignal               1.3.2
asttokens               3.0.0
astunparse              1.6.3
attrs                   25.1.0
av                      14.2.0
certifi                 2025.1.31
cffi                    1.17.1
chardet                 5.2.0
charset-normalizer      3.4.1
click                   8.1.8
cmake                   3.31.6
color-matcher           0.5.0
colorama                0.4.6
comm                    0.2.2
contourpy               1.3.1
cryptography            44.0.0
cycler                  0.12.1
ddt                     1.7.2
debugpy                 1.8.13
decorator               5.2.1
Deprecated              1.2.18
docutils                0.21.2
einops                  0.8.0
executing               2.2.0
expecttest              0.3.0
filelock                3.17.0
fonttools               4.56.0
frozenlist              1.5.0
fs

In [10]:
#pip install -U --pre triton-windows

In [None]:
conda create -n comfyui python=3.12.9 -y

conda activate comfyui

git clone https://github.com/comfyanonymous/ComfyUI.git
cd ComfyUI

pip install -r requirements.txt

pip uninstall torch torchvision torchaudio -y

pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128

pip install sageattention

pip install triton-windows==3.3.0.post19

#https://github.com/thu-ml/SageAttention
#there is a new version for rtx 4000 and 5000 series but requires to be manually compiled
# git clone https://github.com/thu-ml/SageAttention.git
# cd sageattention 
# python setup.py install  # or pip install -e .

cd custom_nodes
git clone https://github.com/ltdrdata/ComfyUI-Manager.git
cd ComfyUI-Manager

cd ..
cd ..

python main.py --use-sage-attention

#add photos of starting server, open web browser and installing nodes via the manager having 

#add note than when importing custom nodes, we need to install the dependencies of the custom node
#pip install -r requirements.txt