In [1]:
# import os

# os.system("CUDA_VISIBLE_DEVICES=$GPU python scripts/downscale_dataset_blender.py --config configs/blender.yml")


In [1]:
import os
import torch
# 强制使用CPU（无CUDA版本）
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

from random import randint
from utils.loss_utils import l1_loss, ssim
from gaussian_renderer import render, network_gui 
import sys
from scene import Scene, GaussianModel 
from utils.general_utils import safe_state
import uuid
from tqdm import tqdm
from utils.image_utils import psnr
from argparse import ArgumentParser, Namespace
from arguments import ModelParams, PipelineParams, OptimizationParams

import torch.nn.functional as F

try:
    from torch.utils.tensorboard import SummaryWriter
    TENSORBOARD_FOUND = True
except ImportError:
    TENSORBOARD_FOUND = False

import shutil
from utils.general_utils import load_config
from vsr.utils_vsr import (
    setup_paths_and_params,
    load_images,
    load_vsr_model,
    process_S,
    process_ALS,
    create_video_from_images,
)

Using device: cuda




In [2]:
# Set up command line argument parser
parser = ArgumentParser(description="Training script parameters")
lp = ModelParams(parser)
op = OptimizationParams(parser)
pp = PipelineParams(parser)
parser.add_argument('--ip', type=str, default="127.0.0.1")
parser.add_argument('--port', type=int, default=6009)
parser.add_argument('--debug_from', type=int, default=-1)
parser.add_argument('--detect_anomaly', action='store_true', default=False)
parser.add_argument("--test_iterations", nargs="+", type=int, default=[7_000, 30_000])
parser.add_argument("--save_iterations", nargs="+", type=int, default=[7_000, 30_000])
parser.add_argument("--quiet", action="store_true")
parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[])
parser.add_argument("--start_checkpoint", type=str, default = None)

parser.add_argument("--config", type=str, default=None, help="Path to configuration YAML file")

# -m : args.model_path
args = parser.parse_args([
    "--eval", 
    "--config", "configs/blender.yml"
])
args.save_iterations.append(args.iterations)

print(f"\nGet args: {args}\n")

args = load_config(args)
print("Optimizing " + args.model_path)

# Initialize system state (RNG)
# safe_state(args.quiet)

# Start GUI server, configure and run training
# network_gui.init(args.ip, args.port)
# torch.autograd.set_detect_anomaly(args.detect_anomaly)


Get args: Namespace(checkpoint_iterations=[], compute_cov3D_python=False, config='configs/blender.yml', convert_SHs_python=False, data_device='cuda', debug=False, debug_from=-1, densification_interval=100, densify_from_iter=500, densify_grad_threshold=0.0002, densify_until_iter=15000, detect_anomaly=False, eval=True, feature_lr=0.0025, images='images', ip='127.0.0.1', iterations=30000, lambda_dssim=0.2, model_path='output/blender/ship', opacity_lr=0.05, opacity_reset_interval=3000, percent_dense=0.01, port=6009, position_lr_delay_mult=0.01, position_lr_final=1.6e-06, position_lr_init=0.00016, position_lr_max_steps=30000, quiet=False, random_background=False, resolution=-1, rotation_lr=0.001, save_iterations=[7000, 30000, 30000], scaling_lr=0.005, sh_degree=3, source_path='', start_checkpoint=None, test_iterations=[7000, 30000], white_background=False)

source_path: ../OUTPUTS/HR/ship

hr_source_path: ../NERF_SYNTHETIC/ship

lr_source_path: ../OUTPUTS/LR/ship

gt_source_path: ../NERF_S

In [8]:
# 遍历并打印
for key, value in vars(args).items():
    print(f"{key:<25}: {value}")

sh_degree                : 3
source_path              : ../OUTPUTS/HR/ship
model_path               : output/blender/ship
images                   : images
resolution               : -1
white_background         : False
data_device              : cuda
eval                     : True
iterations               : 30000
position_lr_init         : 0.00016
position_lr_final        : 1.6e-06
position_lr_delay_mult   : 0.01
position_lr_max_steps    : 30000
feature_lr               : 0.0025
opacity_lr               : 0.05
scaling_lr               : 0.005
rotation_lr              : 0.001
percent_dense            : 0.01
lambda_dssim             : 0.2
densification_interval   : 100
opacity_reset_interval   : 3000
densify_from_iter        : 500
densify_until_iter       : 15000
densify_grad_threshold   : 0.0002
random_background        : False
convert_SHs_python       : False
compute_cov3D_python     : False
debug                    : False
ip                       : 127.0.0.1
port                    

## 上采样阶段

In [14]:
(
    spynet_path, 
    model_path, 
    lr_trainset_path, 
    transform_path, 
    vsr_trainset_path, 
    video_save_path, 
    num_images_in_sequence, 
    similarity, 
    thres_values
) = setup_paths_and_params(args) # 读取配置文件， 处理路径

'''DEBUG: 打印需要的变量'''
vars_dict = {
    "spynet_path": spynet_path,
    "model_path": model_path,
    "lr_trainset_path": lr_trainset_path,
    "transform_path": transform_path,
    "vsr_trainset_path": vsr_trainset_path,
    "video_save_path": video_save_path,
    "num_images_in_sequence": num_images_in_sequence,
    "similarity": similarity,
    "thres_values": thres_values,
}
for name, value in vars_dict.items():
    print(f"{name:<25}: {value}")


model_path: vsr/psrt/experiments/pretrained_models/PSRT_Vimeo.pth
model_path: vsr/psrt/experiments/pretrained_models/PSRT_Vimeo.pth

spynet_path              : vsr/psrt/experiments/pretrained_models/flownet/spynet_sintel_final-3d2a1287.pth
model_path               : vsr/psrt/experiments/pretrained_models/PSRT_Vimeo.pth
lr_trainset_path         : ../OUTPUTS/LR/ship/train
transform_path           : ../NERF_SYNTHETIC/ship/transforms_train.json
vsr_trainset_path        : ../OUTPUTS/HR/ship/train
video_save_path          : None
num_images_in_sequence   : 100
similarity               : feature
thres_values             : [45]


In [15]:
# 加载模型
model_vsr = load_vsr_model(spynet_path=spynet_path, model_path=model_path, device=device)
print("VSR model loaded.")

loading VSR model...



  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


VSR model loaded.


In [16]:
# 加载训练数据集
if not os.path.exists(vsr_trainset_path):
    os.makedirs(vsr_trainset_path)
images, names = load_images(lr_trainset_path)

### process_S

In [33]:
import os
import json
import shutil
import cv2
import numpy as np
import torch
from torchvision import transforms
from PIL import Image
import re
from archs.psrt_recurrent_arch import BasicRecurrentSwin
from basicsr.utils import tensor2img
from scene.colmap_loader import read_extrinsics_binary, qvec2rotmat
from tqdm import tqdm
from vsr.utils_vsr import (
    setup_paths_and_params,
    load_images,
    load_vsr_model,
    compute_similarity,
    compute_position_distance_ranking,
    compute_feature_distance_ranking,
    ordering_sim1_thresholding_sim2
)

# process_S
# 创建一个无序不重复元素集
created_images = set()
# 创建保存文件夹
save_path = vsr_trainset_path
os.makedirs(save_path, exist_ok=True)

# 创建空列表
all_sorted_image_paths = []
total_outputs = []

# 两种“相似”的定义
if similarity == 'feature':
    # 将compute_feature_distance_ranking(images)返回结果放到数组中
    feature_rankings = np.array(compute_feature_distance_ranking(images))
    ''' 
    feature_distance_rankings = [
        [[rank_0, dist_0], [rank_1, dist_1], [rank_2, dist_2], ...],  # 对第1张图的排名
        [[rank_0, dist_0], [rank_1, dist_1], [rank_2, dist_2], ...],  # 对第2张图的排名
        ...
    ]
    '''
    sorted_indices = ordering_sim1_thresholding_sim2(images, 0, 180, feature_rankings, None, similarity)
elif similarity == 'pose':
    position_rankings = np.array(compute_position_distance_ranking(images, transform_path))
    sorted_indices = ordering_sim1_thresholding_sim2(images, 0, 180, position_rankings, None, similarity)

# Inference VSR and save images
# 将已排序序列按num_images_in_sequence为步进分为许多序列块（chunk_indices）
for i in range(0, len(sorted_indices), num_images_in_sequence):
    chunk_indices = sorted_indices[i:i + num_images_in_sequence]

    # Handle case where the last chunk is smaller
    # 末端不足num_images_in_sequence长度的块，重新定义为最后num_images_in_sequence个图片
    if len(chunk_indices) < num_images_in_sequence:
        chunk_indices = sorted_indices[-num_images_in_sequence:]

    # Prepare batch of images with context (like in code2)
    # 添加序列块上下文（VSR模型需要参考上下文）
    batch_imgs = [images[idx] for idx in chunk_indices]	# 列表推导式
    batch_imgs = [batch_imgs[0]] + batch_imgs + [batch_imgs[-1]] # 重复首尾作为上下文
    # 图片序列格式变化，用于模型适配
    batch_imgs = np.stack(batch_imgs).transpose(0, 3, 1, 2)  # Convert to NCHW format
    batch_imgs = torch.from_numpy(batch_imgs).float().div(255.0).unsqueeze(0).to(device)

    # Generate outputs
    # 用VSR模型进行超分操作，并保存输出
    with torch.no_grad():   # 禁用梯度追踪， 不计算梯度
        outputs = model_vsr(batch_imgs).squeeze(0)[1:-1]    # 删除0维， 删除首尾帧
    total_outputs.append(outputs)

    # 格式转换
    outputs = [tensor2img(outputs[idx], rgb2bgr=True, min_max=(0, 1)) for idx in range(outputs.shape[0])]
    # Save output images
    for idx in range(len(chunk_indices)):
        output_name = names[chunk_indices[idx]]
        output_path = os.path.join(save_path, output_name)
        if output_name not in created_images:
            Image.fromarray(outputs[idx]).save(output_path)
            created_images.add(output_name)
            all_sorted_image_paths.append(output_path)

    print(f"\rProcessing S: {len(created_images)} / {len(images)} ({(len(created_images) / len(images)) * 100:.2f}%)", end="", flush=True)

total_outputs = torch.cat(total_outputs, dim=0)

# return all_sorted_image_paths, total_outputs
print("\nUpscaling completed.")



Processing S: 100 / 100 (100.00%)Upscaling completed.


## 3DGS训练

In [3]:
lambda_tex=0.40
subpixel="avg"
(
    dataset, opt, pipe, testing_iterations, saving_iterations, 
    checkpoint_iterations, checkpoint, debug_from, lambda_tex, subpixel
) = (
    lp.extract(args), op.extract(args), pp.extract(args), args.test_iterations, args.save_iterations, 
    args.checkpoint_iterations, args.start_checkpoint, args.debug_from, args.lambda_tex, 
    args.subpixel
)

In [4]:
# 1. 准备日志和输出目录
first_iter = 0

if not args.model_path:
    if os.getenv('OAR_JOB_ID'):
        unique_str=os.getenv('OAR_JOB_ID')
    else:
        unique_str = str(uuid.uuid4())
    args.model_path = os.path.join("./output/", unique_str[0:10])

# Set up output folder
print("Output folder: {}".format(args.model_path))
os.makedirs(args.model_path, exist_ok = True)
with open(os.path.join(args.model_path, "cfg_args"), 'w') as cfg_log_f:
    cfg_log_f.write(str(Namespace(**vars(args))))

# Create Tensorboard writer
tb_writer = None
if TENSORBOARD_FOUND:
    tb_writer = SummaryWriter(args.model_path)
else:
    print("Tensorboard not available: not logging progress")

Output folder: output/blender/ship


**注**：args.model_path指的是3D高斯模型输出路径

In [5]:
# 2. 初始化高斯模型
gaussians = GaussianModel(dataset.sh_degree)

In [6]:
# 3. 创建场景（加载训练相机）
scene = Scene(dataset, gaussians)

Found transforms_train.json file, assuming Blender data set!
Reading Training Transforms
Reading Test Transforms
Loading Training Cameras
Loading Test Cameras
Number of points at initialisation :  100000


In [7]:
# 4. 设置优化器
gaussians.training_setup(opt)

In [8]:
# 5. 恢复检查点（如果提供）
if checkpoint:
    (model_params, first_iter) = torch.load(checkpoint)
    gaussians.restore(model_params, opt)

In [11]:
def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene, renderFunc, renderArgs):
    if tb_writer:
        tb_writer.add_scalar('train_loss_patches/l1_loss', Ll1.item(), iteration)
        tb_writer.add_scalar('train_loss_patches/total_loss', loss.item(), iteration)
        tb_writer.add_scalar('iter_time', elapsed, iteration)

    # Report test and samples of training set
    if iteration in testing_iterations:
        torch.cuda.empty_cache()
        validation_configs = ({'name': 'test', 'cameras' : scene.getTestCameras()}, 
                              {'name': 'train', 'cameras' : [scene.getTrainCameras()[idx % len(scene.getTrainCameras())] for idx in range(5, 30, 5)]})

        for config in validation_configs:
            if config['cameras'] and len(config['cameras']) > 0:
                l1_test = 0.0
                psnr_test = 0.0
                for idx, viewpoint in enumerate(config['cameras']):
                    image = torch.clamp(renderFunc(viewpoint, scene.gaussians, *renderArgs)["render"], 0.0, 1.0)
                    gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0)
                    if tb_writer and (idx < 5):
                        tb_writer.add_images(config['name'] + "_view_{}/render".format(viewpoint.image_name), image[None], global_step=iteration)
                        if iteration == testing_iterations[0]:
                            tb_writer.add_images(config['name'] + "_view_{}/ground_truth".format(viewpoint.image_name), gt_image[None], global_step=iteration)
                    l1_test += l1_loss(image, gt_image).mean().double()
                    psnr_test += psnr(image, gt_image).mean().double()
                psnr_test /= len(config['cameras'])
                l1_test /= len(config['cameras'])          
                print("\n[ITER {}] Evaluating {}: L1 {} PSNR {}".format(iteration, config['name'], l1_test, psnr_test))
                if tb_writer:
                    tb_writer.add_scalar(config['name'] + '/loss_viewpoint - l1_loss', l1_test, iteration)
                    tb_writer.add_scalar(config['name'] + '/loss_viewpoint - psnr', psnr_test, iteration)

        if tb_writer:
            tb_writer.add_histogram("scene/opacity_histogram", scene.gaussians.get_opacity, iteration)
            tb_writer.add_scalar('total_points', scene.gaussians.get_xyz.shape[0], iteration)
        torch.cuda.empty_cache()

In [13]:
from tqdm.notebook import tqdm

bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")

iter_start = torch.cuda.Event(enable_timing = True)
iter_end = torch.cuda.Event(enable_timing = True)

viewpoint_stack = None
avg_kernel = torch.nn.AvgPool2d(4, stride=4)  

ema_loss_for_log = 0.0
progress_bar = tqdm(range(first_iter, opt.iterations), desc="Training progress")
first_iter += 1
for iteration in range(first_iter, opt.iterations + 1):        
    if network_gui.conn == None:
        network_gui.try_connect()
    while network_gui.conn != None:
        try:
            net_image_bytes = None
            custom_cam, do_training, pipe.convert_SHs_python, pipe.compute_cov3D_python, keep_alive, scaling_modifer = network_gui.receive()
            if custom_cam != None:
                net_image = render(custom_cam, gaussians, pipe, background, scaling_modifer)["render"]
                net_image_bytes = memoryview((torch.clamp(net_image, min=0, max=1.0) * 255).byte().permute(1, 2, 0).contiguous().cpu().numpy())
            network_gui.send(net_image_bytes, dataset.source_path)
            if do_training and ((iteration < int(opt.iterations)) or not keep_alive):
                break
        except Exception as e:
            network_gui.conn = None

    iter_start.record()

    gaussians.update_learning_rate(iteration)

    # Every 1000 its we increase the levels of SH up to a maximum degree
    if iteration % 1000 == 0:
        gaussians.oneupSHdegree()

    ### HR scale
    # Pick a random Camera
    if not viewpoint_stack:
        viewpoint_stack = scene.getTrainCameras().copy()
    idx_cam = randint(0, len(viewpoint_stack)-1)
    viewpoint_cam = viewpoint_stack.pop(idx_cam)

    # Render
    if (iteration - 1) == debug_from:
        pipe.debug = True

    bg = torch.rand((3), device="cuda") if opt.random_background else background

    render_pkg = render(viewpoint_cam, gaussians, pipe, bg)
    image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"]

    # Loss
    gt_image = viewpoint_cam.original_image.cuda()
    Ll1 = l1_loss(image, gt_image)
    loss_tex = (1.0 - opt.lambda_dssim) * Ll1 + opt.lambda_dssim * (1.0 - ssim(image, gt_image))

    ### LR scale
    # Pick a random Camera
    if subpixel == 'avg':
        image_avg = avg_kernel(image)
    elif subpixel == 'bicubic':
        image_avg = torch.nn.functional.interpolate(image.unsqueeze(0), scale_factor=0.25, mode='bicubic', antialias=True).squeeze(0)
    else:
        raise Exception("Wrong sub-pixel option")

    gt_image_lr = viewpoint_cam.original_image_lr.cuda()
    if image_avg.shape != gt_image_lr.shape:
        # import torch.nn.functional as F
        gt_image_lr = torch.nn.functional.interpolate(gt_image.unsqueeze(0), size=image_avg.size()[-2:], mode='bicubic', antialias=True).squeeze(0)
    # import pdb; pdb.set_trace()

    # Loss
    Ll1_sp = l1_loss(image_avg, gt_image_lr)
    loss_sp = (1.0 - opt.lambda_dssim) * Ll1_sp + opt.lambda_dssim * (1.0 - ssim(image_avg, gt_image_lr))
    ###

    # import pdb; pdb.set_trace()
    if iteration == opt.iterations - 5000:
        import torchvision.transforms as transforms
        from PIL import Image

        to_pil_image = transforms.ToPILImage()

        gt_image_lr_pil = to_pil_image(gt_image_lr)
        gt_image_lr_pil.save("gt_image_lr_pil.png")

        image_avg_pil  = to_pil_image(image_avg)
        image_avg_pil.save("image_avg_pil.png")

    lambda_tex_scheduled = lambda_tex
    loss = (1.0 - lambda_tex_scheduled) * loss_sp + lambda_tex_scheduled * loss_tex
    loss.backward()

    iter_end.record()

    # 由高斯点云渲染3D模型
    with torch.no_grad():
        # Progress bar
        ema_loss_for_log = 0.4 * loss.item() + 0.6 * ema_loss_for_log
        if iteration % 10 == 0:
            progress_bar.set_postfix({"Loss": f"{ema_loss_for_log:.{7}f}"})
            progress_bar.update(10)
        if iteration == opt.iterations:
            progress_bar.close()

        # Log and save
        training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background))
        if (iteration in saving_iterations):
            print("\n[ITER {}] Saving Gaussians".format(iteration))
            scene.save(iteration)

        # Densification
        # 在细节不足或误差大的地方自动“加点”； 在冗余或不可见区域“删点”。
        if iteration < opt.densify_until_iter:
            # Keep track of max radii in image-space for pruning
            gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter])
            gaussians.add_densification_stats(viewspace_point_tensor, visibility_filter)

            if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0:
                size_threshold = 20 if iteration > opt.opacity_reset_interval else None
                gaussians.densify_and_prune(opt.densify_grad_threshold, 0.005, scene.cameras_extent, size_threshold)

            if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter):
                gaussians.reset_opacity()

        # Optimizer step
        if iteration < opt.iterations:
            gaussians.optimizer.step()
            gaussians.optimizer.zero_grad(set_to_none = True)

        # 输出保存高精度3D Gaussian点云模型
        if (iteration in checkpoint_iterations):
            print("\n[ITER {}] Saving Checkpoint".format(iteration))
            torch.save((gaussians.capture(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth")

Training progress:   0%|          | 0/29998 [00:00<?, ?it/s]


[ITER 7000] Evaluating test: L1 0.014479106673970819 PSNR 29.091571922302247

[ITER 7000] Evaluating train: L1 0.008679708186537028 PSNR 33.63783149719239

[ITER 7000] Saving Gaussians

[ITER 30000] Evaluating test: L1 0.013914167943876237 PSNR 29.39746416091919

[ITER 30000] Evaluating train: L1 0.006262173224240542 PSNR 36.298919677734375

[ITER 30000] Saving Gaussians
