In [1]:
import numpy as np
import cupy as cp
import time
import sys, os
path_current = '/home/huzuntao/PycharmProjects/MPP_Powersystem/'
path_ = os.getcwd()
if path_current not in sys.path:
    sys.path.insert(1, '/home/huzuntao/PycharmProjects/MPP_Powersystem/')
elif path_ not in sys.path:
    sys.path.insert(1, path_)
import pandapower as pp
from PPOPT_main.PPOPT_main.src.ppopt.mpQCQP_program import MPQCQP_Program

In [20]:
import torch
from torch import tensor, ones, arange

def sparse(*args, **kwargs):
    if len(args) == 1: # sparse((values, crow_ccol))
        values, crow_ccol = args[0]
    elif len(args) == 2: # sparse(values, crow_ccol)
        if len(args[0]) != 2 and (not isinstance(args[1][0], int)):
            values, crow_ccol = args[0], args[1]
        else:
            return torch.sparse_coo_tensor(args[0][1],
                                           values=args[0][0], size=args[1]
                                           ).to_sparse(layout=torch.sparse_csr)
    else:
        raise ValueError("sparse() takes 1 or 2 positional arguments but {} were given".format(len(args)))

    # crow = crow_ccol[0]
    # ccol = crow_ccol[1]

    return torch.sparse_coo_tensor(crow_ccol, values, (max(crow_ccol[0])+1, max(crow_ccol[1])+1)).to_sparse(layout=torch.sparse_csr)




gen = tensor([[500.-0.j,  40.-0.j]], dtype=torch.complex128)
nb = 6
ngon = 2
gbus = [0,2]
## connection matrix, element i, j is 1 if gen on(j) at bus i is ON
Cg = sparse((ones(ngon), (gbus, arange(ngon))), (nb, ngon))

Cg = torch.sparse_coo_tensor([gbus, list(range(ngon))], values=ones(ngon), size=(nb, ngon)).to_sparse(layout=torch.sparse_csr)
Cg.to_dense()

tensor([[1., 0.],
        [0., 0.],
        [0., 1.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])

In [12]:
from scipy.sparse import csr_matrix

csr_matrix((ones(ngon), (gbus, arange(ngon))), (nb, ngon)).toarray()

array([[1., 0.],
       [0., 0.],
       [0., 1.],
       [0., 0.],
       [0., 0.],
       [0., 0.]], dtype=float32)

In [2]:
# 创建一个5000*5000的随机矩阵
size = 5000
A_np = np.random.rand(size, size)

# 使用numpy求解矩阵的逆
start_time = time.time()
A_inv_np = np.linalg.inv(A_np)
numpy_time = time.time() - start_time
print("Numpy time: {:.5f} seconds".format(numpy_time))

# 创建一个在GPU上的相同的矩阵
A_cp = cp.array(A_np)

# 使用cupy求解矩阵的逆
start_time = time.time()
A_inv_cp = cp.linalg.inv(A_cp)
cupy_time = time.time() - start_time
print("CuPy time: {:.5f} seconds".format(cupy_time))

Numpy time: 1.04764 seconds
CuPy time: 7.38341 seconds


In [3]:
import cupy as cp
from concurrent.futures import ThreadPoolExecutor

def compute(array):
    # 进行某些计算，例如，计算数组的平均值
    return cp.mean(array)
start_time = time.time()
# 创建随机数组列表
arrays = [cp.random.rand(10000, 10000) for _ in range(10)]

# 使用线程池并发计算数组
with ThreadPoolExecutor(max_workers=4) as executor:
    results = executor.map(compute, arrays)

# 输出结果
for i, result in enumerate(results):
    print(f"Array {i}: {result}")
cupy_time = time.time() - start_time
print("CuPy time: {:.5f} seconds".format(cupy_time))

Array 0: 0.5000407201853567
Array 1: 0.49999926493497654
Array 2: 0.5000011522116486
Array 3: 0.5000027345254558
Array 4: 0.49999722897403165
Array 5: 0.49999356236711995
Array 6: 0.4999626574009674
Array 7: 0.4999982962141725
Array 8: 0.5000430950377951
Array 9: 0.5000216948573987
CuPy time: 1.25657 seconds


In [4]:
import numpy as np
from concurrent.futures import ThreadPoolExecutor

def compute(array):
    # 进行某些计算，例如，计算数组的平均值
    return np.mean(array)

start_time = time.time()
# 创建随机数组列表
arrays = [np.random.rand(10000, 10000) for _ in range(10)]

# 使用线程池并发计算数组
with ThreadPoolExecutor(max_workers=4) as executor:
    results = executor.map(compute, arrays)

# 输出结果
for i, result in enumerate(results):
    print(f"Array {i}: {result}")
numpy_time = time.time() - start_time
print("NumPy time: {:.5f} seconds".format(numpy_time))

Array 0: 0.5000347623585583
Array 1: 0.49999509936487035
Array 2: 0.5000061808148327
Array 3: 0.5000198488576899
Array 4: 0.49998476501677364
Array 5: 0.4999975425440861
Array 6: 0.499949577621757
Array 7: 0.49997838469925576
Array 8: 0.4999486261512285
Array 9: 0.5000139524684339
NumPy time: 8.14746 seconds


In [5]:
net = pp.create_empty_network()

# create buses
bus1 = pp.create_bus(net, vn_kv=110.)
bus2 = pp.create_bus(net, vn_kv=110.)
bus3 = pp.create_bus(net, vn_kv=110.)
bus4 = pp.create_bus(net, vn_kv=110.)
bus5 = pp.create_bus(net, vn_kv=110.)
bus6 = pp.create_bus(net, vn_kv=110.)

# create 110 kV lines
pp.create_line(net, bus4, bus5, length_km=90., std_type='149-AL1/24-ST1A 110.0')
pp.create_line(net, bus3, bus4, length_km=90., std_type='149-AL1/24-ST1A 110.0')
pp.create_line(net, bus2, bus3, length_km=90., std_type='149-AL1/24-ST1A 110.0')
pp.create_line(net, bus1, bus2, length_km=70., std_type='149-AL1/24-ST1A 110.0')
pp.create_line(net, bus6, bus3, length_km=70., std_type='149-AL1/24-ST1A 110.0')

# create loads
p_load_1 = 10
p_load_2 = 30
pp.create_load(net, bus2, p_mw=p_load_1, controllable=False)
pp.create_load(net, bus4, p_mw=p_load_2/2, controllable=False)
pp.create_load(net, bus5, p_mw=p_load_2/2, controllable=False)
pp.create_load(net, bus6, p_mw=p_load_2/2, controllable=False)
# create generators
eg = pp.create_ext_grid(net, bus1, min_p_mw=0, max_p_mw=1000, vm_pu=1.05)
g0 = pp.create_gen(net, bus3, p_mw=80, min_p_mw=0, max_p_mw=80, vm_pu=1.00, controllable=True)

costeg = pp.create_poly_cost(net, 0, 'ext_grid', cp1_eur_per_mw=20)
costgen1 = pp.create_poly_cost(net, 0, 'gen', cp1_eur_per_mw=10)
costgen2 = pp.create_poly_cost(net, 1, 'gen', cp1_eur_per_mw=10)

net.bus["min_vm_pu"] = 0.96
net.bus["max_vm_pu"] = 1.04
net.line["max_loading_percent"] = 100
om, ppopt, raw = pp.runopp(net, delta=1e-16, RETURN_RAW_DER=1)

  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):
  for item, dtype in list(dtypes.iteritems()):


In [6]:
import time
import numpy as np
import cupy as cp
from scipy.sparse import coo_matrix

# 随机生成稀疏矩阵
n = 10000   # 矩阵维数
density = 0.0001   # 矩阵稀疏度
data = np.random.rand(int(n**2 * density))   # 随机矩阵元素值
row = np.random.randint(0, n, data.size)   # 随机矩阵元素所在的行
col = np.random.randint(0, n, data.size)   # 随机矩阵元素所在的列

# 使用numpy创建COO稀疏矩阵，并记录时间
print("Start creating COO sparse matrix with numpy...")
start = time.time()
coo_np = coo_matrix((data, (row, col)), shape=(n,n)).tocsr()
print(f"Time for creating COO sparse matrix with numpy: {time.time() - start:.5f}s")

# 使用cupy创建COO稀疏矩阵，并记录时间
print("Start creating COO sparse matrix with cupy...")

data_gpu = cp.asarray(data)
row_gpu = cp.asarray(row)
col_gpu = cp.asarray(col)
start = time.time()
coo_cp = cp.sparse.coo_matrix((data_gpu, (row_gpu, col_gpu)), shape=(n,n)).tocsr()
cp.cuda.Stream.null.synchronize()
print(f"Time for creating COO sparse matrix with cupy: {time.time() - start:.5f}s")

# 对COO稀疏矩阵进行乘法操作，记录时间
print("Start matrix multiplication with numpy...")
start = time.time()
ans_np = coo_np @ coo_np
numpy_time = time.time() - start
print(f"Time for computing operations on sparse matrix with numpy: {numpy_time:.5f}s")

print("Start matrix multiplication with cupy...")
start = time.time()
ans_cp = coo_cp @ coo_cp
cp.cuda.Stream.null.synchronize()  # 必须等待cupy结果，因为它是异步计算的
cupy_time = time.time() - start
print(f"Time for computing operations on sparse matrix with cupy: {cupy_time:.5f}s")

# 比较numpy和cupy实现的乘法操作的时间
print(f"Ratio of computation time(numpy/cupy): {numpy_time/cupy_time:.5f}")



Start creating COO sparse matrix with numpy...
Time for creating COO sparse matrix with numpy: 0.00128s
Start creating COO sparse matrix with cupy...
Time for creating COO sparse matrix with cupy: 1.77718s
Start matrix multiplication with numpy...
Time for computing operations on sparse matrix with numpy: 0.00212s
Start matrix multiplication with cupy...
Time for computing operations on sparse matrix with cupy: 0.00721s
Ratio of computation time(numpy/cupy): 0.29358


In [7]:
### Numpy and CPU
s = time.time()
x_cpu = np.ones((1000,1000,1000))
e = time.time()
print(f'GPU:{e - s}')
s = time.time()
x_gpu = cp.ones((1000,1000,1000))
e = time.time()
print(f'GPU:{e - s}')

GPU:2.3604259490966797
GPU:0.021862506866455078


In [8]:
import numpy as np
import torch
from scipy.sparse import random as sp_random

# 创建稀疏矩阵的参数
n = 1000  # 矩阵维度
density = 0.01  # 非零元素的密度

# 使用NumPy创建稀疏矩阵
def create_sparse_matrix_numpy():
    sparse_matrix = sp_random(n, n, density=density, format='csr')
    return sparse_matrix

# 使用PyTorch创建稀疏矩阵
def create_sparse_matrix_pytorch():
    values = torch.randn(int(n * n * density))
    indices = torch.randint(n, (2, int(n * n * density)))
    sparse_matrix = torch.sparse_coo_tensor(indices, values, size=(n, n))
    return sparse_matrix

# 测试NumPy创建稀疏矩阵的速度
numpy_start_time = time.time()
sparse_matrix_numpy = create_sparse_matrix_numpy()
numpy_end_time = time.time()
numpy_execution_time = numpy_end_time - numpy_start_time

# 测试PyTorch创建稀疏矩阵的速度
torch_start_time = time.time()
sparse_matrix_pytorch = create_sparse_matrix_pytorch()
torch_end_time = time.time()
torch_execution_time = torch_end_time - torch_start_time

# 输出结果
print("NumPy创建稀疏矩阵的时间：", numpy_execution_time)
print("PyTorch创建稀疏矩阵的时间：", torch_execution_time)


NumPy创建稀疏矩阵的时间： 0.017729997634887695
PyTorch创建稀疏矩阵的时间： 0.03204011917114258


In [9]:
import time
import numpy as np
import torch
import cupy as cp
from scipy.sparse import random as sp_random

# 创建稀疏矩阵的参数
n = 1000  # 矩阵维度
density = 0.01  # 非零元素的密度

# 使用NumPy创建稀疏矩阵
def create_sparse_matrix_numpy():
    sparse_matrix = sp_random(n, n, density=density, format='csr')
    return sparse_matrix

# 使用PyTorch创建稀疏矩阵
def create_sparse_matrix_pytorch():
    values = torch.randn(int(n * n * density))
    indices = torch.randint(n, (2, int(n * n * density)))
    sparse_matrix = torch.sparse_coo_tensor(indices, values, size=(n, n))
    return sparse_matrix

# 使用CuPy创建稀疏矩阵
def create_sparse_matrix_cupy():
    values = cp.random.randn(int(n * n * density))
    indices = cp.random.randint(n, size=(2, int(n * n * density)))
    sparse_matrix = cp.sparse.coo_matrix((values, indices), shape=(n, n))
    return sparse_matrix

# 测试NumPy创建稀疏矩阵的速度
numpy_start_time = time.time()
sparse_matrix_numpy = create_sparse_matrix_numpy()
numpy_end_time = time.time()
numpy_execution_time = numpy_end_time - numpy_start_time

# 测试PyTorch创建稀疏矩阵的速度
torch_start_time = time.time()
sparse_matrix_pytorch = create_sparse_matrix_pytorch()
torch_end_time = time.time()
torch_execution_time = torch_end_time - torch_start_time

# 测试CuPy创建稀疏矩阵的速度
cupy_start_time = time.time()
sparse_matrix_cupy = create_sparse_matrix_cupy()
cupy_end_time = time.time()
cupy_execution_time = cupy_end_time - cupy_start_time

# 输出结果
print("NumPy创建稀疏矩阵的时间：", numpy_execution_time)
print("PyTorch创建稀疏矩阵的时间：", torch_execution_time)
print("CuPy创建稀疏矩阵的时间：", cupy_execution_time)


NumPy创建稀疏矩阵的时间： 0.02418231964111328
PyTorch创建稀疏矩阵的时间： 0.00040841102600097656
CuPy创建稀疏矩阵的时间： 6.891504526138306


In [16]:
import numpy as np
import torch
from scipy.sparse import random as sp_random
import time

# 创建稀疏矩阵的参数
n = 1000  # 矩阵维度
density = 0.01  # 非零元素的密度

# 使用NumPy创建稀疏矩阵
def create_sparse_matrix_numpy():
    sparse_matrix = sp_random(n, n, density=density, format='csr')
    return sparse_matrix

# 使用PyTorch创建稀疏矩阵
def create_sparse_matrix_pytorch():
    values = torch.randn(int(n * n * density))
    indices = torch.randint(n, (2, int(n * n * density)))
    sparse_matrix = torch.sparse_coo_tensor(indices, values, size=(n, n))
    return sparse_matrix

# 使用PyTorch在GPU上创建稀疏矩阵
def create_sparse_matrix_pytorch_gpu():
    values = torch.randn(int(n * n * density)).cuda()
    indices = torch.randint(n, (2, int(n * n * density))).cuda()
    sparse_matrix = torch.sparse_coo_tensor(indices, values, size=(n, n))
    return sparse_matrix

def create_sparse_matrix_cupy():
    values = cp.random.randn(int(n * n * density))
    indices = cp.random.randint(n, size=(2, int(n * n * density)))
    sparse_matrix = cp.sparse.coo_matrix((values, indices), shape=(n, n))
    return sparse_matrix

# 测试NumPy创建稀疏矩阵的速度
numpy_start_time = time.time()
sparse_matrix_numpy = create_sparse_matrix_numpy()
numpy_end_time = time.time()
numpy_execution_time = numpy_end_time - numpy_start_time

# 测试CuPy创建稀疏矩阵的速度
cupy_start_time = time.time()
sparse_matrix_pytorch = create_sparse_matrix_cupy()
cupy_end_time = time.time()
cupy_execution_time = cupy_end_time - cupy_start_time

# 测试PyTorch创建稀疏矩阵的速度
torch_start_time = time.time()
sparse_matrix_pytorch = create_sparse_matrix_pytorch()
torch_end_time = time.time()
torch_execution_time = torch_end_time - torch_start_time

# 测试PyTorch在GPU上创建稀疏矩阵的速度
torch_gpu_start_time = time.time()
sparse_matrix_pytorch_gpu = create_sparse_matrix_pytorch_gpu()
torch_gpu_end_time = time.time()
torch_gpu_execution_time = torch_gpu_end_time - torch_gpu_start_time

# 输出结果
print("NumPy创建稀疏矩阵的时间：", numpy_execution_time)
print("CuPy创建稀疏矩阵的时间：", cupy_execution_time)
print("PyTorch创建稀疏矩阵的时间：", torch_execution_time)
print("PyTorch在GPU上创建稀疏矩阵的时间：", torch_gpu_execution_time)


NumPy创建稀疏矩阵的时间： 0.02886819839477539
CuPy创建稀疏矩阵的时间： 0.0017554759979248047
PyTorch创建稀疏矩阵的时间： 0.000438690185546875
PyTorch在GPU上创建稀疏矩阵的时间： 0.00054168701171875


In [11]:
import time
import numpy as np
import torch
import cupy as cp
# import jax
# import jax.numpy as jnp
# from jax.scipy.sparse import coo_matrix as jax_coo_matrix

# 创建稀疏矩阵的参数
n = 1000  # 矩阵维度
density = 0.01  # 非零元素的密度

# 使用NumPy创建稀疏矩阵
def create_sparse_matrix_numpy():
    sparse_matrix = sp_random(n, n, density=density, format='csr')
    return sparse_matrix

# 使用PyTorch创建稀疏矩阵
def create_sparse_matrix_pytorch():
    values = torch.randn(int(n * n * density))
    indices = torch.randint(n, (2, int(n * n * density)))
    sparse_matrix = torch.sparse_coo_tensor(indices, values, size=(n, n))
    return sparse_matrix

# 使用CuPy创建稀疏矩阵
def create_sparse_matrix_cupy():
    values = cp.random.randn(int(n * n * density))
    indices = cp.random.randint(n, size=(2, int(n * n * density)))
    sparse_matrix = cp.sparse.coo_matrix((values, indices), shape=(n, n))
    return sparse_matrix

# 使用JAX创建稀疏矩阵
# def create_sparse_matrix_jax():
#     values = jax.random.randn(int(n * n * density))
#     indices = jax.random.randint(jax.device_get(jax.random.PRNGKey(0)), n, shape=(2, int(n * n * density)))
#     sparse_matrix = jax_random(indices, values, shape=(n, n))
#     return sparse_matrix

# 测试NumPy创建稀疏矩阵的速度
numpy_start_time = time.time()
sparse_matrix_numpy = create_sparse_matrix_numpy()
numpy_end_time = time.time()
numpy_execution_time = numpy_end_time - numpy_start_time

# 测试PyTorch创建稀疏矩阵的速度
torch_start_time = time.time()
sparse_matrix_pytorch = create_sparse_matrix_pytorch()
torch_end_time = time.time()
torch_execution_time = torch_end_time - torch_start_time

# 测试CuPy创建稀疏矩阵的速度
cupy_start_time = time.time()
sparse_matrix_cupy = create_sparse_matrix_cupy()
cupy_end_time = time.time()
cupy_execution_time = cupy_end_time - cupy_start_time

# # 测试JAX创建稀疏矩阵的速度
# jax_start_time = time.time()
# sparse_matrix_jax = create_sparse_matrix_jax()
# jax_end_time = time.time()
# jax_execution_time = jax_end_time - jax_start_time

# 输出结果
print("NumPy创建稀疏矩阵的时间：", numpy_execution_time)
print("PyTorch创建稀疏矩阵的时间：", torch_execution_time)
print("CuPy创建稀疏矩阵的时间：", cupy_execution_time)
# print("JAX创建稀疏矩阵的时间：", jax_execution_time)



NumPy创建稀疏矩阵的时间： 0.025313854217529297
PyTorch创建稀疏矩阵的时间： 0.0004525184631347656
CuPy创建稀疏矩阵的时间： 0.0013811588287353516


In [12]:
import torch

# 创建复数矩阵
real_part = torch.randn(3, 3)  # 实部
imaginary_part = torch.randn(3, 3)  # 虚部
complex_matrix = torch.view_as_complex(torch.stack([real_part, imaginary_part], dim=-1))

# 输出实部
real_matrix = complex_matrix.real
print(real_matrix)


tensor([[-1.4526, -2.2701,  0.1741],
        [-0.6773,  0.0356,  0.4223],
        [-0.7377, -1.8514,  1.7771]])


In [13]:
complex_matrix.imag

tensor([[-0.2868, -2.1745,  0.9911],
        [ 0.2809, -0.0405, -0.7632],
        [-1.1362, -0.1940, -1.2684]])

In [17]:
import torch

# 创建稀疏矩阵
indices = torch.tensor([[0, 1, 1, 2],
                        [1, 0, 2, 1]], dtype=torch.long)
values = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float)
sparse_matrix = torch.sparse_coo_tensor(indices, values, size=(3, 3))

# 输出稀疏矩阵及其转置
print("稀疏矩阵:")
print(sparse_matrix)
print("转置矩阵:")
transpose_matrix = sparse_matrix.t()
print(transpose_matrix)

# 判断矩阵是否为稀疏矩阵
is_sparse = torch.sparse.is_sparse(sparse_matrix)
is_transpose_sparse = torch.sparse.is_sparse(transpose_matrix)

print("原始矩阵是否为稀疏矩阵:", is_sparse)
print("转置矩阵是否为稀疏矩阵:", is_transpose_sparse)


稀疏矩阵:
tensor(indices=tensor([[0, 1, 1, 2],
                       [1, 0, 2, 1]]),
       values=tensor([1., 2., 3., 4.]),
       size=(3, 3), nnz=4, layout=torch.sparse_coo)
转置矩阵:
tensor(indices=tensor([[1, 0, 2, 1],
                       [0, 1, 1, 2]]),
       values=tensor([1., 2., 3., 4.]),
       size=(3, 3), nnz=4, layout=torch.sparse_coo)


AttributeError: module 'torch.sparse' has no attribute 'is_sparse'

In [1]:
import torch

# 生成 CSR 稀疏矩阵
values = torch.tensor([1 + 2j, 3 + 4j, 5 + 6j, 7 + 8j, 9 + 10j], dtype=torch.complex64)
row_indices = torch.tensor([0, 0, 1, 2, 3], dtype=torch.long)
col_indices = torch.tensor([1, 2, 0, 3, 4], dtype=torch.long)
size = (4, 5)
Cf = torch.sparse_csr_tensor(row_indices, col_indices, values, size)

# 计算 Cf 的转置矩阵
# Cf_transpose = Cf.transpose(0, 1)

# 计算 Cf 转置与 Cf 的乘积
product = Cf.transpose(0, 1).mm(Cf)

# 输出结果
print("Cf 转置与 Cf 的乘积:")
print(product.to_dense())  # 转换为稠密矩阵并打印


Cf 转置与 Cf 的乘积:
tensor([[-11.+60.j,   0.+0.j,   0.+0.j,   0.+0.j,   0.+0.j],
        [  0.+0.j,  -3.+4.j,   0.+0.j,   0.+0.j,   0.+0.j],
        [  0.+0.j,   0.+0.j,  -7.+24.j,   0.+0.j,   0.+0.j],
        [-13.+82.j,   0.+0.j,   0.+0.j,   0.+0.j,   0.+0.j],
        [-15.+104.j,   0.+0.j,   0.+0.j,   0.+0.j,   0.+0.j]])


  Cf = torch.sparse_csr_tensor(row_indices, col_indices, values, size)


In [26]:
Cf.T

NotImplementedError: Could not run 'aten::as_strided' with arguments from the 'SparseCsrCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::as_strided' is only available for these backends: [CPU, CUDA, Meta, QuantizedCPU, QuantizedCUDA, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMeta, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradNestedTensor, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].

CPU: registered at aten/src/ATen/RegisterCPU.cpp:31034 [kernel]
CUDA: registered at aten/src/ATen/RegisterCUDA.cpp:43986 [kernel]
Meta: registered at aten/src/ATen/RegisterMeta.cpp:26824 [kernel]
QuantizedCPU: registered at aten/src/ATen/RegisterQuantizedCPU.cpp:929 [kernel]
QuantizedCUDA: registered at aten/src/ATen/RegisterQuantizedCUDA.cpp:459 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:144 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:491 [backend fallback]
Functionalize: registered at aten/src/ATen/RegisterFunctionalization_0.cpp:20475 [kernel]
Named: fallthrough registered at ../aten/src/ATen/core/NamedRegistrations.cpp:11 [kernel]
Conjugate: fallthrough registered at ../aten/src/ATen/ConjugateFallback.cpp:21 [kernel]
Negative: fallthrough registered at ../aten/src/ATen/native/NegateFallback.cpp:23 [kernel]
ZeroTensor: registered at aten/src/ATen/RegisterZeroTensor.cpp:161 [kernel]
ADInplaceOrView: registered at ../torch/csrc/autograd/generated/ADInplaceOrViewType_0.cpp:4733 [kernel]
AutogradOther: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradCPU: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradCUDA: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradHIP: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradXLA: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradMPS: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradIPU: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradXPU: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradHPU: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradVE: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradLazy: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradMeta: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradMTIA: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradPrivateUse1: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradPrivateUse2: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradPrivateUse3: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
AutogradNestedTensor: registered at ../torch/csrc/autograd/generated/VariableType_0.cpp:15232 [autograd kernel]
Tracer: registered at ../torch/csrc/autograd/generated/TraceType_0.cpp:16728 [kernel]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:487 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:354 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:819 [kernel]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1077 [kernel]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:210 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:152 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:487 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:148 [backend fallback]
