In [1]:
import argparse
import torch
import os
import json
from tqdm import tqdm
import shortuuid

from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.conversation import conv_templates, SeparatorStyle
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
from torch.utils.data import Dataset, DataLoader
import math


def split_list(lst, n):
    """Split a list into n (roughly) equal-sized chunks"""
    chunk_size = math.ceil(len(lst) / n)  # integer division
    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]


def get_chunk(lst, n, k):
    chunks = split_list(lst, n)
    return chunks[k]


# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data_path, tokenizer, model_config):
        self.list_data_dict = json.load(open(data_path, "r"))
        self.tokenizer = tokenizer
        self.model_config = model_config

    def __getitem__(self, index):
        sources = self.list_data_dict[index]
        qs = sources["conversations"][0]["value"]
        # if self.model_config.mm_use_im_start_end:
        #     qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
        # else:
        #     qs = DEFAULT_IMAGE_TOKEN + '\n' + qs

        conv = conv_templates[args.conv_mode].copy()
        conv.append_message(conv.roles[0], qs)
        conv.append_message(conv.roles[1], None)
        prompt = conv.get_prompt()
        #print(prompt)
        image_tensor=torch.Tensor(sources['tensor'])
        input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')

        return input_ids, image_tensor

    def __len__(self):
        return len(self.list_data_dict)


def collate_fn(batch):
    input_ids, image_tensors = zip(*batch)
    input_ids = torch.stack(input_ids, dim=0)
    image_tensors = torch.stack(image_tensors, dim=0)
    return input_ids, image_tensors


# DataLoader
def create_data_loader(data_path, tokenizer, model_config, batch_size=1, num_workers=4):
    assert batch_size == 1, "batch_size must be 1"
    dataset = CustomDataset(data_path, tokenizer, model_config)
    data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, collate_fn=collate_fn)
    return data_loader


def eval_model(args):
    # Model
    disable_torch_init()
    model_path = os.path.expanduser(args.model_path)
    model_name = get_model_name_from_path(model_path)
    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)

    answers_file = os.path.expanduser(args.answers_file)
    os.makedirs(os.path.dirname(answers_file), exist_ok=True)
    ans_file = open(answers_file, "w")

    if 'plain' in model_name and 'finetune' not in model_name.lower() and 'mmtag' not in args.conv_mode:
        args.conv_mode = args.conv_mode + '_mmtag'
        print(f'It seems that this is a plain model, but it is not using a mmtag prompt, auto switching to {args.conv_mode}.')

    data_loader = create_data_loader(args.data_path, tokenizer, model.config)
    list_data_dict = json.load(open(args.data_path, "r"))
    for (input_ids, image_tensor), line in tqdm(zip(data_loader, list_data_dict), total=len(list_data_dict)):
        cur_prompt = line["conversations"][0]["value"]
        groun_truth=line["conversations"][1]["value"]
        groun_truth_img_tensor=line["tensor"]
        input_ids = input_ids.to(device='cuda', non_blocking=True)

        with torch.inference_mode():
            outputs = model.generate(
                input_ids,
                images=image_tensor.to(dtype=torch.float16, device='cuda', non_blocking=True),
                do_sample=True if args.temperature > 0 else False,
                temperature=args.temperature,
                top_p=args.top_p,
                num_beams=args.num_beams,
                max_new_tokens=args.max_new_tokens,
                use_cache=True)
        output_ids=outputs['generated_tokens']
        hiddens=outputs['hidden_states']

        outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()

        ans_id = shortuuid.uuid()
        ans_file.write(json.dumps({"prompt": cur_prompt,
                                   "groun_truth": groun_truth,
                                    "groun_truth_img_tensor": groun_truth_img_tensor,
                                   "answer": outputs,
                                   "answer_id": ans_id,
                                   "model_id": model_name,
                                   "metadata": {}}) + "\n")
        # ans_file.flush()
    ans_file.close()

# if __name__ == "__main__":
#     parser = argparse.ArgumentParser()
#     parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
#     parser.add_argument("--model-base", type=str, default=None)
#     parser.add_argument("--data-path", type=str, default="tables/question.jsonl")
#     parser.add_argument("--answers-file", type=str, default="answer.jsonl")
#     parser.add_argument("--conv-mode", type=str, default="llava_v1")
#     parser.add_argument("--num-chunks", type=int, default=1)
#     parser.add_argument("--chunk-idx", type=int, default=0)
#     parser.add_argument("--temperature", type=float, default=0.2)
#     parser.add_argument("--top_p", type=float, default=None)
#     parser.add_argument("--num_beams", type=int, default=1)
#     parser.add_argument("--max_new_tokens", type=int, default=128)
#     args = parser.parse_args()

#     eval_model(args)



  from .autonotebook import tqdm as notebook_tqdm


[2025-01-21 00:34:01,204] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [7]:
#load trained model

args = type('Args', (), {
    "model_path": "/datadrive_a/jihai/LLaVA/scripts/v1_5/checkpoints/llava-v1.5-7b-g-lora/checkpoint-152", #152 195
    "model_base": '/datadrive_a/tmp/vicuna-7b-v1.5/vicuna-7b-v1.5',
    "data_path": '/datadrive_a/jihai/data/multimodalout/dummy_data_eval.json',
    "answers_file": "./answer/answer-g-152.jsonl",
    "conv_mode": "llava_v1",
    "num_chunks": 1,
    "chunk_idx": 0,
    "temperature": 0,
    "top_p": None,
    "num_beams": 1,
    "max_new_tokens": 128
})()
print(args.model_path)
disable_torch_init()
model_path = os.path.expanduser(args.model_path)
print(model_path)
model_name = get_model_name_from_path(model_path)
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)




/datadrive_a/jihai/LLaVA/scripts/v1_5/checkpoints/llava-v1.5-7b-g-lora/checkpoint-152
/datadrive_a/jihai/LLaVA/scripts/v1_5/checkpoints/llava-v1.5-7b-g-lora/checkpoint-152
Loading LLaVA from base model...


Loading checkpoint shards: 100%|██████████| 2/2 [00:08<00:00,  4.26s/it]
Some weights of LlavaLlamaForCausalLM_ImgGen were not initialized from the model checkpoint at /datadrive_a/tmp/vicuna-7b-v1.5/vicuna-7b-v1.5 and are newly initialized: ['model.mm_projector.bias', 'model.mm_projector.weight', 'model.mm_projector_inverse.bias', 'model.mm_projector_inverse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading additional LLaVA weights...
Parameter containing:
tensor([[-0.0075,  0.0022,  0.0034,  ...,  0.0148, -0.0071,  0.0050],
        [ 0.0026, -0.0008, -0.0042,  ..., -0.0101, -0.0091, -0.0129],
        [ 0.0104,  0.0009, -0.0040,  ...,  0.0135, -0.0012,  0.0056],
        ...,
        [ 0.0102, -0.0066,  0.0124,  ..., -0.0004,  0.0066,  0.0057],
        [ 0.0075,  0.0142,  0.0096,  ...,  0.0037, -0.0031,  0.0063],
        [-0.0117, -0.0137,  0.0136,  ...,  0.0035, -0.0021, -0.0147]],
       device='cuda:3', dtype=torch.float16, requires_grad=True)
Parameter containing:
tensor([[ 0.0424,  0.1979, -0.0742,  ..., -0.2150, -0.2922, -0.1216],
        [-0.0779, -0.3293, -0.1805,  ..., -0.3223, -0.1934,  0.0366],
        [-0.0799,  0.2705, -0.2607,  ..., -0.0199, -0.2891,  0.2227],
        ...,
        [ 0.1932, -0.2091,  0.0664,  ..., -0.0469, -0.2386,  0.2052],
        [ 0.1003, -0.0266, -0.2786,  ..., -0.1954,  0.2683, -0.0525],
        [-0.1106,  0.3206,  0.1492,  ...,  0.1957, -0.2018

In [8]:
answers_file = args.answers_file
os.makedirs(os.path.dirname(answers_file), exist_ok=True)
ans_file = open(answers_file, "w")

In [9]:


if 'plain' in model_name and 'finetune' not in model_name.lower() and 'mmtag' not in args.conv_mode:
    args.conv_mode = args.conv_mode + '_mmtag'
    print(f'It seems that this is a plain model, but it is not using a mmtag prompt, auto switching to {args.conv_mode}.')

data_loader = create_data_loader(args.data_path, tokenizer, model.config)
list_data_dict = json.load(open(args.data_path, "r"))

In [10]:
def generate_image(input_ids,model):
    output_img=[]
    inputs_embeds=model.get_model().embed_tokens(input_ids) #1, seq_le, 4096
    with torch.inference_mode():
        for i in range(6):
            outputs = model.model(
                input_ids=None,
                attention_mask=None,
                position_ids=None,
                past_key_values=None,
                inputs_embeds=inputs_embeds,
            )
            hidden_states = outputs[0]
            img = model.get_model().mm_projector_inverse(hidden_states[:,-1,:])
            output_img.append(img)
            new_embed=model.get_model().mm_projector(img)
            new_embed=new_embed.unsqueeze(1).to(inputs_embeds.device)
            inputs_embeds=torch.cat([inputs_embeds,new_embed],dim=1)
            
    return output_img

In [11]:
count=0
for (input_ids, image_tensor), line in tqdm(zip(data_loader, list_data_dict), total=len(list_data_dict)):
    count+=1
    if count==500: break
    #print(line)
  
    cur_prompt = line["conversations"][0]["value"]
    groun_truth=line["conversations"][1]["value"]
    groun_truth_img_tensor=line["tensor"]
    input_ids = input_ids.to(device='cuda', non_blocking=True)
    image_tensor=image_tensor.to(dtype=torch.float16, device='cuda', non_blocking=True)
    with torch.inference_mode():
        outputs = model.generate(
            input_ids,
            images=image_tensor,
            do_sample=True if args.temperature > 0 else False,
            temperature=args.temperature,
            top_p=args.top_p,
            num_beams=args.num_beams,
            max_new_tokens=args.max_new_tokens,
            use_cache=True)
    output_ids=outputs['generated_tokens']
    outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=False)[0].strip()
    #print(outputs)

    img_indicator = torch.tensor([529,  3027, 29958])
    id_seq = output_ids[0].cpu()

    # 子序列长度
    sub_seq_len = len(img_indicator)

    # 滑动窗口查找子序列
    start_idx = -1
    for i in range(id_seq.size(0) - sub_seq_len + 1):
        if torch.equal(id_seq[i:i + sub_seq_len], img_indicator):
            start_idx = i
            break
    img=None
    if line['task']=='generation':
        if start_idx != -1:
            #print(start_idx)
            #print(input_ids)
            output_ids=output_ids[:,1:start_idx+3]
            input_ids=torch.cat((input_ids, output_ids), dim=1)
            img=generate_image(input_ids,model)
            img=torch.stack(img,dim=0).squeeze().cpu().tolist()
    ans_file.write(json.dumps({"prompt": cur_prompt,
                                "groun_truth": groun_truth,
                                "answer": outputs,
                                "groun_truth_img_tensor": groun_truth_img_tensor,
                                "output_img_tensor": img,
                                "model_id": model_name,
                                "metadata": {}}) + "\n")
    #outputs = tokenizer.batch_decode(input_ids, skip_special_tokens=False)[0].strip()
  
print(ans_file)
ans_file.close() 


100%|██████████| 200/200 [02:58<00:00,  1.12it/s]

<_io.TextIOWrapper name='./answer/answer-g-152.jsonl' mode='w' encoding='UTF-8'>





<_io.TextIOWrapper name='./answer/answer-u-g.jsonl' mode='w' encoding='UTF-8'>


In [22]:
# print(len(hiddens))
# print(len(hiddens[0]))
# print(len(hiddens[0][1]))
# print(len(hiddens[0][0][0]))
# print(len(hiddens[0][0][0][0]))
# print(len(hiddens[1]))
# print(len(hiddens[1][0]))
# print(len(hiddens[1][0][0]))

In [20]:
print(line['tensor'])

[[0.0, 0.47717979550361633, 0.47717979550361633, 0.0, 0.0, 0.0, 0.0], [0.47717979550361633, 0.0, 0.47717979550361633, 0.47717979550361633, 0.0, 0.47717979550361633, 0.47717979550361633], [0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.0, 0.0, 0.47717979550361633], [0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.0, 0.0, 0.47717979550361633], [0.47717979550361633, 0.47717979550361633, 0.0, 0.47717979550361633, 0.47717979550361633, 0.0, 0.47717979550361633], [0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.47717979550361633, 0.0]]


In [None]:
Q=torch.load('/datadrive_a/jihai/LLaVA/scripts/v1_5/checkpoints/Q.pth')
A_inv = torch.linalg.inv(Q)
for i in range(33):
    print(torch.matmul(hiddens[i].cpu().squeeze().float(), A_inv)[:7])

In [30]:
model_path = "/datadrive_a/jihai/LLaVA/scripts/v1_5/checkpoints/llava-v1.5-7b-lora"
prompt = "I want to see an image of 10:39:38 with numbers in orange."
image_file = "https://llava-vl.github.io/static/images/view.jpg"

args = type('Args', (), {
    "model_path": model_path,
    "model_base": '/datadrive_a/jihai/azure_storage2/vigstandard_data/jihai/checkpoint/vicuna-7b-v1.5/vicuna-7b-v1.5',
    "model_name": get_model_name_from_path(model_path),
    "query": prompt,
    "conv_mode": None,
    "image_file": None,
    "sep": ",",
    "temperature": 0,
    "top_p": None,
    "num_beams": 1,
    "max_new_tokens": 512
})()

#eval_model(args)

In [31]:
import argparse
import torch
import re

import requests
from PIL import Image
from io import BytesIO

from llava.constants import (
    IMAGE_TOKEN_INDEX,
    DEFAULT_IMAGE_TOKEN,
    DEFAULT_IM_START_TOKEN,
    DEFAULT_IM_END_TOKEN,
    IMAGE_PLACEHOLDER,
)
from llava.conversation import conv_templates, SeparatorStyle
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from llava.mm_utils import (
    process_images,
    tokenizer_image_token,
    get_model_name_from_path,
)

def image_parser(args):
    out = args.image_file.split(args.sep)
    return out


def load_image(image_file):
    if image_file.startswith("http") or image_file.startswith("https"):
        response = requests.get(image_file)
        image = Image.open(BytesIO(response.content)).convert("RGB")
    else:
        image = Image.open(image_file).convert("RGB")
    return image


def load_images(image_files):
    out = []
    for image_file in image_files:
        image = load_image(image_file)
        out.append(image)
    return out


# model_name = get_model_name_from_path(args.model_path)
# tokenizer, model, image_processor, context_len = load_pretrained_model(
#     args.model_path, args.model_base, model_name
# )

In [32]:



qs = args.query
image_token_se = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
if IMAGE_PLACEHOLDER in qs:
    if model.config.mm_use_im_start_end:
        qs = re.sub(IMAGE_PLACEHOLDER, image_token_se, qs)
    else:
        qs = re.sub(IMAGE_PLACEHOLDER, DEFAULT_IMAGE_TOKEN, qs)
else:
    if model.config.mm_use_im_start_end:
        qs = image_token_se + "\n" + qs
    else:
        qs = DEFAULT_IMAGE_TOKEN + "\n" + qs

if "llama-2" in model_name.lower():
    conv_mode = "llava_llama_2"
elif "mistral" in model_name.lower():
    conv_mode = "mistral_instruct"
elif "v1.6-34b" in model_name.lower():
    conv_mode = "chatml_direct"
elif "v1" in model_name.lower():
    conv_mode = "llava_v1"
elif "mpt" in model_name.lower():
    conv_mode = "mpt"
else:
    conv_mode = "llava_v0"

if args.conv_mode is not None and conv_mode != args.conv_mode:
    print(
        "[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}".format(
            conv_mode, args.conv_mode, args.conv_mode
        )
    )
else:
    args.conv_mode = conv_mode
print(args.conv_mode)
conv = conv_templates[args.conv_mode].copy()
conv.append_message(conv.roles[0], qs)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()

# image_files = image_parser(args)
# images = load_images(image_files)
# image_sizes = [x.size for x in images]
# images_tensor = process_images(
#     images,
#     image_processor,
#     model.config
# ).to(model.device, dtype=torch.float16)
image_sizes=None
images_tensor=None
if images_tensor is not None:
    images_tensor=images_tensor.to(model.device, dtype=torch.float16)

print(prompt)
input_ids = (
    tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
    .unsqueeze(0)
    .cuda()
)

llava_v1
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: <image>
I want to see an image of 10:39:38 with numbers in orange. ASSISTANT:


In [33]:

prompt=prompt.replace('<image>\n','')
print(prompt)
input_ids = (
    tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
    .unsqueeze(0)
    .cuda()
)
print(input_ids)

A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: I want to see an image of 10:39:38 with numbers in orange. ASSISTANT:
tensor([[    1,   319, 13563,  1546,   263, 12758,  5199,   322,   385, 23116,
         21082, 20255, 29889,   450, 20255,  4076,  8444, 29892, 13173, 29892,
           322,  1248,   568,  6089,   304,   278,  5199, 29915, 29879,  5155,
         29889,  3148,  1001, 29901,   306,   864,   304,  1074,   385,  1967,
           310, 29871, 29896, 29900, 29901, 29941, 29929, 29901, 29941, 29947,
           411,  3694,   297, 24841, 29889,   319,  1799,  9047, 13566, 29901]],
       device='cuda:0')


In [37]:
print(image_tensor.device)

cuda:0


In [39]:


with torch.inference_mode():
    outputs = model.generate(
        input_ids,
        images=image_tensor,
        image_sizes=image_sizes,
        do_sample=True if args.temperature > 0 else False,
        temperature=args.temperature,
        top_p=args.top_p,
        num_beams=args.num_beams,
        max_new_tokens=args.max_new_tokens,
        use_cache=True,
    )

output_ids=outputs['generated_tokens']
hiddens=outputs['hidden_states']


outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()

# # 解码生成的 token
# if tokenizer is not None:
#     generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=False)
# else:
#     generated_text = generated_tokens[0].tolist()  # 直接返回 token IDs

# # 检查生成文本中是否包含 `<image>`
# image_start_idx = None
# image_end_idx = None
# if tokenizer is not None:
#     tokens = tokenizer.convert_ids_to_tokens(generated_tokens[0])
#     if "<image>" in tokens:
#         image_start_idx = tokens.index("<image>")
#         # 查找终止符号（假设为 `<eos>` 或句子结束符号）
#         image_end_idx = len(tokens)
#         for i, token in enumerate(tokens[image_start_idx + 1:], start=image_start_idx + 1):
#             if token in ["<eos>", ".", "!", "?"]:  # 自定义终止符号
#                 image_end_idx = i
#                 break

# # 提取 `<image>` 到终止符号之间的 hidden states
# image_hidden_states = None
# if image_start_idx is not None and image_end_idx is not None:
#     image_hidden_states = hidden_states[:, image_start_idx:image_end_idx, :]
print(outputs)
print(output_ids)

cuda:0
cuda:0


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cuda:1!

In [17]:
print(output_ids)

tensor([[    1,   910,   338,   278,  1967,   310, 29871, 29896, 29953, 29901,
         29945, 29953, 29901, 29900, 29896,   411, 13328, 13340, 29889, 29871,
            13,   529,  3027, 29958, 29871, 29871, 29871,   313, 29871,    13,
            13,    13,    13,     2]], device='cuda:0')


In [22]:
def inverse_transform(self, y: torch.Tensor):
    # 检查 A 是否已存在且可逆
    if not hasattr(self, 'A'):
        raise ValueError("Matrix A is not initialized.")
    
    # 计算 A 的逆
    A_inv = torch.linalg.inv(self.A)
    
    # 逆变换
    padded_y = torch.matmul(y, A_inv)
    
    # 恢复原始的 mm_dim 维度
    x_recovered = padded_y[:, :self.mm_dim]
    
    return x_recovered
print(output_ids.shape)
print(output_ids)
print(len(hiddens))
print(hiddens[0].shape)
Q=torch.load('/datadrive_a/jihai/LLaVA/scripts/v1_5/checkpoints/Q.pth')
A_inv = torch.linalg.inv(Q)
for i in range(33):
    print(torch.matmul(hiddens[i].cpu().squeeze().float(), A_inv)[:7])

torch.Size([1, 34])
tensor([[    1,   910,   338,   278,  1967,   310, 29871, 29896, 29896, 29901,
         29896, 29896, 29901, 29896, 29896,   411,  2654, 13340, 29889, 29871,
            13,   529,  3027, 29958, 29871, 29871, 29871, 29871, 29871,    13,
            13,    13,    13,     2]], device='cuda:0')
33
torch.Size([1, 1, 4096])


tensor([-0.0042,  0.0026,  0.0046,  0.0025,  0.0005,  0.0010, -0.0024])
tensor([-0.0434,  0.0391,  0.0829,  0.0264,  0.0261, -0.0209, -0.0738])
tensor([-0.0332,  0.0007,  0.1352,  0.0153,  0.0585,  0.0331, -0.0426])
tensor([-0.0022,  0.0103,  0.0906,  0.0242,  0.0282,  0.0487, -0.0613])
tensor([-0.0906, -0.0100,  0.1108,  0.0472,  0.0249,  0.0518, -0.1285])
tensor([-0.2601, -0.0245,  0.1536,  0.0288, -0.1128, -0.0409, -0.1169])
tensor([-0.2172, -0.0267,  0.1975,  0.0026, -0.1045, -0.0038, -0.1285])
tensor([-0.1474, -0.0665,  0.1694, -0.0363, -0.1266,  0.1893, -0.0642])
tensor([-0.0402, -0.1077,  0.1994, -0.1753,  0.0017,  0.1325, -0.1450])
tensor([-0.0961, -0.0359,  0.0947, -0.1537,  0.0039,  0.1341, -0.1053])
tensor([-0.0885,  0.0097, -0.1305,  0.0899,  0.1223,  0.0151, -0.2086])
tensor([-0.2900,  0.1205, -0.2291,  0.2406, -0.1005, -0.0183, -0.2178])
tensor([-0.6227,  0.2001, -0.2242,  0.0700, -0.1521, -0.0731, -0.2632])
tensor([-0.5183,  0.1683, -0.1968,  0.0922,  0.0700,  0.1536, -0

In [17]:
tokenizer.batch_decode(torch.LongTensor([[529,  3027, 29958]]), skip_special_tokens=False)[0].strip()

'<image>'

In [None]:
import torch
a=torch.tensor([1,2,3])
b=torch.LongTensor([1,1,0]).bool()
print(a[b])

In [None]:
import numpy as np

# 1. 生成一个随机的可逆矩阵 A
def generate_invertible_matrix(dim):
    # 随机生成一个矩阵
    random_matrix = np.random.randn(dim, dim)
    
    # 通过 QR 分解确保矩阵 A 是可逆的（正交矩阵 Q 总是可逆的）
    Q, _ = np.linalg.qr(random_matrix)  # QR 分解，Q 是正交矩阵
    return Q

# 2. 生成一个随机的平移向量 b
def generate_translation_vector(dim):
    return np.random.randn(dim)

# 3. 仿射变换函数 y = A * x + b
def affine_transform(x, A, b):
    return np.dot(A, x) + b

# 4. 逆仿射变换 x = A^(-1) * (y - b)
def inverse_affine_transform(y, A, b):
    A_inv = np.linalg.inv(A)
    return np.dot(A_inv, (y - b))

# 维度 4096
dim = 4096

# 随机生成可逆矩阵 A 和平移向量 b
A = generate_invertible_matrix(dim)
b = generate_translation_vector(dim)

# 随机生成 4096 维输入向量 x
x = np.random.randn(dim)

# 进行仿射变换 y = A * x + b
y = affine_transform(x, A, b)

# 进行逆仿射变换，检查是否能恢复原始的 x
x_reconstructed = inverse_affine_transform(y, A, b)

# 打印重构误差，检查是否正确恢复
reconstruction_error = np.linalg.norm(x - x_reconstructed)
print("Reconstruction error:", reconstruction_error)
