In [11]:
from torch_geometric.datasets import Planetoid
import torch
from torch_geometric.data import Data

In [12]:
dataset_cora = Planetoid(root='./cora/', name='Cora')
# dataset = Planetoid(root='./citeseer',name='Citeseer')
# dataset = Planetoid(root='./pubmed/',name='Pubmed')
print(dataset_cora[0])

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [1]:
import transformers
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from transformers import AutoTokenizer, AutoModel
import torch

# 定义模型和tokenizer的本地路径
local_model_path = "/home/btr/bpmn/model/safetensors/Meta-Llama-3-8B-Instruct"

# 加载预训练的tokenizer和模型
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
model = AutoModel.from_pretrained(local_model_path)

# 要处理的输入文本
text = "这是一个示例文本"

# 将文本转换为tokenizer的输入格式
inputs = tokenizer(text, return_tensors="pt")

# 获取模型的最后一层隐藏状态作为嵌入表示
with torch.no_grad():
    outputs = model(**inputs)

# 获取最后一层隐藏状态
last_hidden_states = outputs.last_hidden_state

# 输出嵌入表示
print(last_hidden_states)      

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.47s/it]


tensor([[[ 4.1851, -0.2059, -1.8382,  ..., -2.8908,  1.3605,  0.3109],
         [-1.8622, -0.9529, -2.4024,  ...,  1.0467,  1.3186,  1.0996],
         [-0.2761, -0.4898, -0.9106,  ...,  1.2816,  0.6039,  1.7683],
         ...,
         [-0.3205, -2.7684, -2.1332,  ...,  1.1992,  0.9619,  1.0644],
         [ 2.1264, -4.0235, -3.8833,  ..., -1.4924,  1.3204,  1.5962],
         [-2.7754, -2.8158, -1.3051,  ..., -1.8278, -0.0244,  1.6202]]])


In [7]:
last_hidden_states.shape

torch.Size([1, 7, 4096])

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv
from torch_geometric.nn import GATConv

In [14]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = SAGEConv(dataset_cora.num_node_features, 16, 'lstm')
        self.conv2 = SAGEConv(16, dataset_cora.num_classes, 'lstm')

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.softmax(x, dim=1)

        return x
    
class GAT(nn.Module):
    def __init__(self):
        super(GAT, self).__init__()
        self.conv1 = GATConv(dataset_cora.num_node_features, 16, heads=2)
        self.conv2 = GATConv(2*16, dataset_cora.num_classes, heads=1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.softmax(x, dim=1)

        return x

In [23]:
model = GAT()
print(model)

GAT(
  (conv1): GATConv(1433, 16, heads=2)
  (conv2): GATConv(32, 7, heads=1)
)


In [24]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
print(device)
model.to(device)
data = dataset_cora[0].to(device)
print(data)

cuda:1
Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [25]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [26]:
model.train()
for epoch in range(200):
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    _, pred = torch.max(out[data.train_mask], dim=1)
    correct = (pred == data.y[data.train_mask]).sum().item()
    acc = correct/data.train_mask.sum().item()

    print('Epoch {:03d} train_loss: {:.4f} train_acc: {:.4f}'.format(
        epoch, loss.item(), acc))

Epoch 000 train_loss: 1.9451 train_acc: 0.1857
Epoch 001 train_loss: 1.9166 train_acc: 0.7214
Epoch 002 train_loss: 1.8723 train_acc: 0.8071
Epoch 003 train_loss: 1.8173 train_acc: 0.7786
Epoch 004 train_loss: 1.7426 train_acc: 0.8000
Epoch 005 train_loss: 1.6530 train_acc: 0.8929
Epoch 006 train_loss: 1.5969 train_acc: 0.8714
Epoch 007 train_loss: 1.5263 train_acc: 0.8571
Epoch 008 train_loss: 1.4645 train_acc: 0.9143
Epoch 009 train_loss: 1.4076 train_acc: 0.9500
Epoch 010 train_loss: 1.3652 train_acc: 0.9429
Epoch 011 train_loss: 1.3142 train_acc: 0.9714
Epoch 012 train_loss: 1.2917 train_acc: 0.9571
Epoch 013 train_loss: 1.2634 train_acc: 0.9714
Epoch 014 train_loss: 1.2558 train_acc: 0.9714
Epoch 015 train_loss: 1.2349 train_acc: 0.9857
Epoch 016 train_loss: 1.2239 train_acc: 0.9786
Epoch 017 train_loss: 1.2058 train_acc: 0.9857
Epoch 018 train_loss: 1.2125 train_acc: 0.9714
Epoch 019 train_loss: 1.2058 train_acc: 0.9786
Epoch 020 train_loss: 1.1977 train_acc: 0.9786
Epoch 021 tra

: 

In [29]:
from transformers import AutoTokenizer, AutoModel

checkpoint = "/home/btr/bpmn/model/safetensors/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModel.from_pretrained(checkpoint)

print('vocabulary size:', len(tokenizer))
num_added_toks = tokenizer.add_tokens(['[ENT_START]', '[ENT_END]'], special_tokens=True)
print("After we add", num_added_toks, "tokens")
print('vocabulary size:', len(tokenizer))

model.resize_token_embeddings(len(tokenizer))
print(model.embed_tokens.weight.size())

# Randomly generated matrix
print(model.embed_tokens.weight[-2:, :])

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

vocabulary size: 128256
After we add 2 tokens
vocabulary size: 128258
torch.Size([128258, 4096])
tensor([[-0.0186, -0.0514,  0.0089,  ...,  0.0045,  0.0186, -0.0358],
        [ 0.0263, -0.0249,  0.0034,  ..., -0.0169,  0.0155, -0.0097]],
       grad_fn=<SliceBackward0>)


In [30]:
with torch.no_grad():
    model.embed_tokens.weight[-2:, :] = torch.zeros([2, model.config.hidden_size], requires_grad=True)
print(model.embed_tokens.weight[-2:, :])

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], grad_fn=<SliceBackward0>)


In [25]:
descriptions = ['start of entity', 'end of entity']

with torch.no_grad():
    for i, token in enumerate(reversed(descriptions), start=1):
        tokenized = tokenizer.tokenize(token)
        print(tokenized)
        tokenized_ids = tokenizer.convert_tokens_to_ids(tokenized)
        new_embedding = model.embeddings.word_embeddings.weight[tokenized_ids].mean(axis=0)
        model.embeddings.word_embeddings.weight[-i, :] = new_embedding.clone().detach().requires_grad_(True)
print(model.embeddings.word_embeddings.weight[-2:, :])

['end', 'of', 'entity']
['start', 'of', 'entity']
tensor([[-0.0340, -0.0144, -0.0441,  ..., -0.0016,  0.0318, -0.0151],
        [-0.0060, -0.0202, -0.0312,  ..., -0.0084,  0.0193, -0.0296]],
       grad_fn=<SliceBackward0>)


In [26]:
print(model.embeddings.word_embeddings.weight[-2:, :] * 10 + model.embeddings.word_embeddings.weight[-2:, :]*10)

tensor([[-0.6809, -0.2879, -0.8822,  ..., -0.0319,  0.6351, -0.3029],
        [-0.1190, -0.4035, -0.6236,  ..., -0.1687,  0.3868, -0.5921]],
       grad_fn=<AddBackward0>)


In [16]:
import torch

# 假设我们有两个特征张量，分别是文本嵌入和节点类型嵌入
# 假设它们的形状是 [batch_size, feature_size]
text_embedding = torch.randn(10, 3)  # 随机生成示例数据
node_type_embedding = torch.randn(10, 3)  # 同上

# 定义权重向量，权重可以根据需要进行调整
# 假设权重是可学习的参数，这里我们随机初始化
weights = torch.nn.Parameter(torch.randn(2))

# 计算权重的指数，使得权重的和为1（softmax）
weights = torch.softmax(weights, dim=0)
print("权重向量:", weights)
# 将权重扩展到特征张量的形状
weights_text = weights[0].expand_as(text_embedding)
print("扩展后的权重向量:", weights_text)
weights_node_type = weights[1].expand_as(node_type_embedding)

# 计算加权求和
weighted_sum = (text_embedding * weights_text) + (node_type_embedding * weights_node_type)

print("加权求和结果:", weighted_sum)

权重向量: tensor([0.5544, 0.4456], grad_fn=<SoftmaxBackward0>)
扩展后的权重向量: tensor([[0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544],
        [0.5544, 0.5544, 0.5544]], grad_fn=<ExpandBackward0>)
加权求和结果: tensor([[ 2.1220, -0.7132,  0.3025],
        [-0.2825,  0.2893,  0.3864],
        [ 0.0700,  1.7125, -1.0699],
        [ 0.7106,  0.9815, -0.2224],
        [-0.1663, -0.2572,  0.0807],
        [ 0.4351,  1.0679,  0.3167],
        [-0.1876,  0.3540,  0.6517],
        [-0.6215, -0.5523,  0.1861],
        [ 0.0147,  1.1826,  0.8368],
        [ 1.2769,  0.4824,  0.1356]], grad_fn=<AddBackward0>)


In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

class FeatureFusionModule(nn.Module):
    def __init__(self, text_embedding_dim, node_type_dim, output_dim):
        super(FeatureFusionModule, self).__init__()
        # 假设文本嵌入和节点类型嵌入的维度分别是text_embedding_dim和node_type_dim
        self.fc_text = nn.Linear(text_embedding_dim, output_dim)
        self.fc_node_type = nn.Linear(node_type_dim, output_dim)
        self.fc_fusion = nn.Linear(output_dim, output_dim)
        
        # 初始化权重参数
        self.weights = nn.Parameter(torch.randn(2))

    def forward(self, text_embedding, node_type_embedding):
        # 分别对文本嵌入和节点类型嵌入应用全连接层
        text_output = self.fc_text(text_embedding)
        node_type_output = self.fc_node_type(node_type_embedding)
        
        # 使用权重对输出进行加权
        text_weighted = text_output * self.weights[0]
        node_type_weighted = node_type_output * self.weights[1]
        
        # 将加权结果合并并再次通过全连接层
        fused_output = self.fc_fusion(text_weighted + node_type_weighted)
        return fused_output

class GCNClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCNClassifier, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        # 第一层图卷积
        x = self.conv1(x, edge_index).relu()
        # 第二层图卷积
        x = self.conv2(x, edge_index)
        return x

# 假设我们有输入数据和目标
# 这里我们使用随机数据作为示例
num_nodes = 1000
num_edges = 2000
edge_index = torch.randint(0, num_nodes, (2, num_edges))
text_embedding = torch.randn((num_nodes, 16))  # 假设每个节点有16维的文本嵌入
node_type_embedding = torch.randn((num_nodes, 8))  # 假设每个节点有8维的节点类型嵌入
y = torch.randint(0, 4, (num_nodes,))  # 假设有4个类别

# 实例化特征融合模块和GCN分类器
feature_fusion = FeatureFusionModule(16, 8, 32)  # 假设输出融合特征维度为32
gcn_classifier = GCNClassifier(32, 64, 4)  # 假设GCN的隐藏层维度为64，输出类别数为4

# 将特征融合模块的输出作为GCN分类器的输入
combined_features = feature_fusion(text_embedding, node_type_embedding)

# 将图结构信息和特征信息整合到Data对象中
data = Data(x=combined_features, edge_index=edge_index, y=y)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(list(feature_fusion.parameters()) + list(gcn_classifier.parameters()), lr=0.01)

# 训练模型
num_epochs = 200
for epoch in range(num_epochs):
    optimizer.zero_grad()
    out = gcn_classifier(data.x, data.edge_index)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    
    if epoch % 20 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss.item():.4f}')

# 训练完成后，feature_fusion.weights 和 gcn_classifier 的参数都学习到了
print("学习到的特征融合权重:", feature_fusion.weights)

Epoch: 000, Loss: 1.3854


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [34]:
from transformers import AutoTokenizer, AutoModel
import torch

# 定义模型和tokenizer的本地路径
local_model_path = "/home/btr/bpmn/model/safetensors/bert-base-uncased"

# 加载预训练的tokenizer和模型
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
model = AutoModel.from_pretrained(local_model_path)

raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
outputs = model(**inputs)
print(outputs.last_hidden_state)  # 输出形状 (batch_size, sequence_length, hidden_size)
print(outputs.last_hidden_state[:, 0, :])  # 输出形状 (batch_size, hidden_sizequence_length, hidden_size)
print(outputs.last_hidden_state[:, 0, :].shape)  # 输出形状 (batch_size, hidden_size)
print(outputs.pooler_output)  # 输出形状 (batch_size, hidden_size)
# # 获取嵌入表示
# embeddings = []
# for text in texts:
#     inputs = tokenizer(text, return_tensors="pt")
#     with torch.no_grad():
#         outputs = model(**inputs)
#     # 取最后一层的[CLS] token作为文本的表示
#     cls_embedding = outputs.last_hidden_state[:, 0, :]  # (batch_size, hidden_size)
#     embeddings.append(cls_embedding)
# print(embeddings[0].shape)  # (1, hidden_size)
# # 整理为一个Tensor
# text_embeddings = torch.cat(embeddings, dim=0)
# print(text_embeddings.shape)  # (num_texts, hidden_size)  

tensor([[[ 1.3948e-01, -1.2277e-01, -1.0967e-01,  ..., -2.0057e-01,
           3.4953e-01,  7.2549e-01],
         [ 3.9157e-01,  1.0379e-01, -6.5234e-01,  ..., -5.1784e-04,
           7.4655e-01,  4.0899e-01],
         [ 7.3401e-01,  3.4777e-01, -3.3854e-03,  ..., -2.0010e-02,
          -5.1007e-01, -4.2176e-01],
         ...,
         [ 6.5899e-01, -3.5357e-01, -2.2464e-01,  ..., -5.1779e-01,
          -3.7647e-01,  5.5006e-01],
         [ 6.7845e-01,  2.9403e-01, -2.0974e-01,  ...,  2.5586e-01,
          -2.7047e-01, -5.8366e-01],
         [-1.6920e-02,  1.0900e-01,  1.5850e-01,  ...,  6.9561e-01,
          -1.6555e-01, -9.8933e-02]],

        [[ 1.5857e-01,  4.8041e-01, -1.7775e-01,  ..., -2.5178e-01,
           3.5022e-01,  3.7165e-01],
         [ 2.8165e-01,  4.2239e-01, -3.7917e-01,  ..., -1.3355e-02,
           8.8876e-01,  4.2816e-01],
         [ 4.6219e-01,  6.3719e-01,  3.8362e-01,  ...,  2.5919e-01,
           1.8919e-01,  3.1840e-01],
         ...,
         [ 3.0722e-01,  5

In [39]:
from transformers import AutoTokenizer, LlamaModel
import torch
checkpoint = "/home/btr/bpmn/model/safetensors/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.pad_token = tokenizer.eos_token
model = LlamaModel.from_pretrained(checkpoint)

print('vocabulary size:', len(tokenizer))
num_added_toks = tokenizer.add_tokens(['[activity]', '[condition]', '[sign-successor]', '[sign-selection]', '[sign-parallel]', '[sign-loop]'], special_tokens=True)
print("After we add", num_added_toks, "tokens")
print('vocabulary size:', len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

new_token = {'[activity]': "activity event entity",
                     '[condition]': "gateway conditions",
                     '[sign-successor]': "sequential signal words",
                     '[sign-selection]': "selective signal words",
                     '[sign-parallel]': "parallel signal words",
                     '[sign-loop]': "cyclic signal words"}
with torch.no_grad():
    for i, (k, v) in enumerate(reversed(new_token.items()), start=1):
        print(i, k, v)
        tokenized = tokenizer.tokenize(v)
        tokenized_ids = tokenizer.convert_tokens_to_ids(tokenized)
        new_token_emb = model.embed_tokens.weight[tokenized_ids].mean(dim=0)
        model.embed_tokens.weight[-i, :] = new_token_emb.clone().detach().requires_grad_(True)

with torch.no_grad():
    inputs = tokenizer(['[activity]', '[condition]'], return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

vocabulary size: 128256
After we add 6 tokens
vocabulary size: 128262


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


1 [sign-loop] cyclic signal words
2 [sign-parallel] parallel signal words
3 [sign-selection] selective signal words
4 [sign-successor] sequential signal words
5 [condition] gateway conditions
6 [activity] activity event entity


In [40]:
print(outputs.last_hidden_state.shape)

torch.Size([2, 2, 4096])


In [15]:
from transformers import AutoTokenizer, LlamaModel, BertModel
import torch
checkpoint = "/home/btr/bpmn/model/safetensors/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = LlamaModel.from_pretrained(checkpoint)
print(model)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LlamaModel(
  (embed_tokens): Embedding(128256, 4096)
  (layers): ModuleList(
    (0-31): 32 x LlamaDecoderLayer(
      (self_attn): LlamaSdpaAttention(
        (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
        (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
        (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
        (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        (rotary_emb): LlamaRotaryEmbedding()
      )
      (mlp): LlamaMLP(
        (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
        (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
        (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
        (act_fn): SiLU()
      )
      (input_layernorm): LlamaRMSNorm()
      (post_attention_layernorm): LlamaRMSNorm()
    )
  )
  (norm): LlamaRMSNorm()
)


In [25]:
print(model.embed_tokens.weight.size())
print(model.transformer.wte.weight)

torch.Size([128256, 4096])


AttributeError: 'LlamaModel' object has no attribute 'transformer'

In [14]:
import torch

# 创建一个二维张量
tensor_2d = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32)

# 计算整个张量的均值
mean_all = torch.mean(tensor_2d)

# 沿着特定维度计算均值
mean_dim0 = torch.mean(tensor_2d, dim=0)  # 沿着行计算均值
mean_dim1 = torch.mean(tensor_2d, dim=1)  # 沿着列计算均值

print("整体均值:", mean_all)
print("沿着行的均值:", mean_dim0)
print("沿着列的均值:", mean_dim1)

整体均值: tensor(3.5000)
沿着行的均值: tensor([2.5000, 3.5000, 4.5000])
沿着列的均值: tensor([2., 5.])


In [4]:
from transformers import AutoTokenizer, AutoModel
import torch
checkpoint = "/home/btr/bpmn/model/safetensors/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModel.from_pretrained(checkpoint)
print(model.config.hidden_size)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00,  1.35s/it]

4096





In [101]:
import numpy as np
from scipy.sparse import csr_matrix
import torch

# 创建一个邻接矩阵
adj_matrix = torch.tensor([[0, 2, 0],
                        [1, 0, -1],
                        [0, 1, 0]])

# 邻接矩阵转稀疏矩阵
sparse_matrix = csr_matrix(adj_matrix)
print("稀疏矩阵：")
print(sparse_matrix)

# 稀疏矩阵转邻接矩阵
adj_matrix_converted = sparse_matrix.toarray()
print("转换回的邻接矩阵：")
print(adj_matrix_converted)

稀疏矩阵：
  (0, 1)	2
  (1, 0)	1
  (1, 2)	-1
  (2, 1)	1
转换回的邻接矩阵：
[[ 0  2  0]
 [ 1  0 -1]
 [ 0  1  0]]


In [19]:
def adj_matrix_to_adj_list(matrix):
    adj_list = {}
    n = len(matrix)
    for i in range(n):
        adj_list[i] = [j for j in range(n) if matrix[i][j] != 0]
    return adj_list

def adj_list_to_adj_matrix(adj_list):
    n = len(adj_list)
    matrix = [[0] * n for _ in range(n)]
    for i in range(n):
        for j in adj_list[i]:
            matrix[i][j] = 1  # 可以根据需求修改为权重
    return matrix
# 示例邻接矩阵
adj_matrix = [
    [0, 2, 3, 0],
    [1, 0, 9, 1],
    [1, 0, 0, 1],
    [0, 1, 1, 0]
]

# 转换为邻接表
adj_list = adj_matrix_to_adj_list(adj_matrix)
print("邻接表:", adj_list)

# 从邻接表转换回邻接矩阵
new_adj_matrix = adj_list_to_adj_matrix(adj_list)
print("邻接矩阵:", new_adj_matrix)

邻接表: {0: [1, 2], 1: [0, 3], 2: [0, 3], 3: [1, 2]}
邻接矩阵: [[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1], [0, 1, 1, 0]]


In [108]:
import torch
from torch_geometric.data import Data, Batch
x = torch.tensor([[2,1], [5,6], [3,7], [12,0]])
y = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7])
edge_index = torch.tensor([[0, 1, 1, 1, 1, 2, 0, 3], 
                           [1, 0, 0, 0, 0, 1, 3, 2]])

data = Data(x=x, edge_index=edge_index, y=y)
print(data.edge_index)

tensor([[0, 1, 1, 1, 1, 2, 0, 3],
        [1, 0, 0, 0, 0, 1, 3, 2]])


In [79]:
import torch

a = [1,2,3,4,0,0,0,1,2,0]
b = [1,3,2,4,0,0,0,1,3,1]
c = filter(lambda x: x != 0, a)

[1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]


In [3]:
import torch

a = torch.ones(20, 128)
b = torch.zeros(20, 128)
c = torch.rand(20, 128)

# 将a, b, c组合成（3, 20, 128）
combined = torch.stack([a, b, c], dim=0)
print(combined)  # 输出: torch.Size([3, 20, 128])
print(combined.shape)  # 输出: torch.Size([3, 20, 128])


tensor([[[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         ...,
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.9296, 0.7615, 0.3728,  ..., 0.1704, 0.8016, 0.0198],
         [0.6924, 0.6742, 0.3752,  ..., 0.5237, 0.5476, 0.9446],
         [0.6509, 0.8919, 0.0971,  ..., 0.7914, 0.8428, 0.

In [20]:
import torch
 
# Set print options# Define a tensor
a = torch.tensor(
    [-1.8297, -1.9489, -1.9937, -1.9937, -2.0225, -2.0375, -2.0499,
     -2.6950, -2.6967, -2.8939, -2.9030, -2.9107, -2.9385, -2.9468,
     -2.9705, -2.9777])
# a = torch.sort(input=a, descending=False).values
# print(a, torch.mean(a), torch.std(a))
 
# Z-score standardization
mean_a = torch.mean(a)
std_a = torch.std(a)
n1 = (a - mean_a) / std_a
print(n1, torch.mean(n1), torch.std(n1))
 
# Min-Max scaling
min_a = torch.min(a)
max_a = torch.max(a)
n2 = (a - min_a) / (max_a - min_a)
print(n2, torch.mean(n2), torch.std(n2))

tensor([ 1.4010,  1.1473,  1.0520,  1.0520,  0.9907,  0.9588,  0.9324, -0.4404,
        -0.4440, -0.8637, -0.8830, -0.8994, -0.9586, -0.9762, -1.0267, -1.0420]) tensor(7.4506e-08) tensor(1.)
tensor([1.0000, 0.8962, 0.8571, 0.8571, 0.8321, 0.8190, 0.8082, 0.2463, 0.2448,
        0.0730, 0.0651, 0.0584, 0.0341, 0.0269, 0.0063, 0.0000]) tensor(0.4265) tensor(0.4093)


In [2]:
import numpy as np

# 创建一个10x10的矩阵
matrix = np.arange(100).reshape(10, 10)

# 输出原矩阵
print("原矩阵：")
print(matrix)

# 提取4x4子矩阵
sub_matrices = []

for i in range(8):  # 由于是4x4的框，行和列的最大起始索引是7
    sub_matrix = matrix[i:i+3, i:i+3]
    sub_matrices.append(sub_matrix)

# 输出提取的4x4子矩阵
print("\n提取的4x4子矩阵：")
for idx, sub_matrix in enumerate(sub_matrices):
    print(f"子矩阵 {idx+1}:")
    print(sub_matrix)

原矩阵：
[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]]

提取的4x4子矩阵：
子矩阵 1:
[[ 0  1  2]
 [10 11 12]
 [20 21 22]]
子矩阵 2:
[[11 12 13]
 [21 22 23]
 [31 32 33]]
子矩阵 3:
[[22 23 24]
 [32 33 34]
 [42 43 44]]
子矩阵 4:
[[33 34 35]
 [43 44 45]
 [53 54 55]]
子矩阵 5:
[[44 45 46]
 [54 55 56]
 [64 65 66]]
子矩阵 6:
[[55 56 57]
 [65 66 67]
 [75 76 77]]
子矩阵 7:
[[66 67 68]
 [76 77 78]
 [86 87 88]]
子矩阵 8:
[[77 78 79]
 [87 88 89]
 [97 98 99]]


In [12]:
from graphviz import Digraph

# 实例化一个Digraph对象(有向图)，name:生成的图片的图片名，format:生成的图片格式
dot = Digraph(name="MyPicture", format="png")

# 生成图片节点，name：这个节点对象的名称，label:节点名,color：画节点的线的颜色
dot.node(name='a', label='Ming', color='green')
dot.node(name='b', label='Hong', color='yellow')
dot.node(name='c', label='Dong')

# 在节点之间画线，label：线上显示的文本,color:线的颜色
dot.edge('a', 'b', label="ab\na-b", color='red')
# 一次性画多条线，c到b的线，a到c的线
dot.edges(['cb', 'ac'])

# 打印生成的源代码
print(dot.source)


# 跟view一样的用法(render跟view选择一个即可)，一般用render生成图片，不使用view=True,view=True用在调试的时候
dot.render(filename='MyPicture', directory="/home/btr/bpmn/LLMEnG")

digraph MyPicture {
	a [label=Ming color=green]
	b [label=Hong color=yellow]
	c [label=Dong]
	a -> b [label="ab
a-b" color=red]
	c -> b
	a -> c
}



'/home/btr/bpmn/LLMEnG/MyPicture.png'

'hello.gv.pdf'

Error: no "view" rule for type "application/pdf" passed its test case
       (for more information, add "--debug=1" on the command line)
Loading "original-fs" failed
Error: Cannot find module 'original-fs'
Require stack:
- /home/btr/.vscode-server/cli/servers/Stable-fee1edb8d6d72a0ddff41e5f71a671c23ed924b9/server/out/server-cli.js
[90m    at Module._resolveFilename (node:internal/modules/cjs/loader:1145:15)[39m
[90m    at Module._load (node:internal/modules/cjs/loader:986:27)[39m
[90m    at Module.require (node:internal/modules/cjs/loader:1233:19)[39m
[90m    at require (node:internal/modules/helpers:179:18)[39m
    at i (/home/btr/.vscode-server/cli/servers/Stable-fee1edb8d6d72a0ddff41e5f71a671c23ed924b9/server/out/server-cli.js:3:98)
    at r.load (/home/btr/.vscode-server/cli/servers/Stable-fee1edb8d6d72a0ddff41e5f71a671c23ed924b9/server/out/server-cli.js:2:1637)
    at h.load (/home/btr/.vscode-server/cli/servers/Stable-fee1edb8d6d72a0ddff41e5f71a671c23ed924b9/server/out/se