<a href="https://colab.research.google.com/github/Its-Itachi/Updated_PIFuHD/blob/main/Updated_PIFuHD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Requirements
- Python 3
- PyTorch tested on 1.4.0
- json
- PIL
- skimage
- tqdm
- numpy
- cv2

## Clone PIFuHD repository

In [1]:
!git clone https://github.com/facebookresearch/pifuhd

Cloning into 'pifuhd'...
remote: Enumerating objects: 222, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 222 (delta 20), reused 20 (delta 20), pack-reused 200 (from 1)[K
Receiving objects: 100% (222/222), 399.83 KiB | 1.54 MiB/s, done.
Resolving deltas: 100% (113/113), done.


In [20]:
import os

# Ensure the apps/ folder exists
os.makedirs("apps", exist_ok=True)

# Path to the recon.py inside the repo
file_path = "apps/recon.py"

# Full updated recon.py code with PyTorch 2.6+ safe loading
patched_code = """
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.

import sys
import os

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

import time
import json
import numpy as np
import cv2
import random
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from numpy.linalg import inv

from lib.options import BaseOptions
from lib.mesh_util import save_obj_mesh_with_color, reconstruction
from lib.data import EvalWPoseDataset, EvalDataset
from lib.model import HGPIFuNetwNML, HGPIFuMRNet
from lib.geometry import index

from PIL import Image

# Safe loading for PyTorch 2.6+
import argparse
import torch.serialization as serialization

parser = BaseOptions()

def gen_mesh(res, net, cuda, data, save_path, thresh=0.5, use_octree=True, components=False):
    image_tensor_global = data['img_512'].to(device=cuda)
    image_tensor = data['img'].to(device=cuda)
    calib_tensor = data['calib'].to(device=cuda)

    net.filter_global(image_tensor_global)
    net.filter_local(image_tensor[:, None])

    try:
        if net.netG.netF is not None:
            image_tensor_global = torch.cat([image_tensor_global, net.netG.nmlF], 0)
        if net.netG.netB is not None:
            image_tensor_global = torch.cat([image_tensor_global, net.netG.nmlB], 0)
    except:
        pass

    b_min = data['b_min']
    b_max = data['b_max']

    try:
        save_img_path = save_path[:-4] + '.png'
        save_img_list = []
        for v in range(image_tensor_global.shape[0]):
            save_img = (np.transpose(image_tensor_global[v].detach().cpu().numpy(), (1,2,0)) * 0.5 + 0.5)[:, :, ::-1] * 255.0
            save_img_list.append(save_img)
        save_img = np.concatenate(save_img_list, axis=1)
        cv2.imwrite(save_img_path, save_img)

        verts, faces, _, _ = reconstruction(net, cuda, calib_tensor, res, b_min, b_max, thresh, use_octree=use_octree, num_samples=50000)
        verts_tensor = torch.from_numpy(verts.T).unsqueeze(0).to(device=cuda).float()

        color = np.zeros(verts.shape)
        interval = 50000
        for i in range(len(color)//interval + 1):
            left = i*interval
            right = (i+1)*interval if i != len(color)//interval else -1
            net.calc_normal(verts_tensor[:, None, :, left:right], calib_tensor[:, None], calib_tensor)
            nml = net.nmls.detach().cpu().numpy()[0]*0.5 + 0.5
            color[left:right] = nml.T

        save_obj_mesh_with_color(save_path, verts, faces, color)
    except Exception as e:
        print(e)

def gen_mesh_imgColor(res, net, cuda, data, save_path, thresh=0.5, use_octree=True, components=False):
    image_tensor_global = data['img_512'].to(device=cuda)
    image_tensor = data['img'].to(device=cuda)
    calib_tensor = data['calib'].to(device=cuda)

    net.filter_global(image_tensor_global)
    net.filter_local(image_tensor[:, None])

    try:
        if net.netG.netF is not None:
            image_tensor_global = torch.cat([image_tensor_global, net.netG.nmlF], 0)
        if net.netG.netB is not None:
            image_tensor_global = torch.cat([image_tensor_global, net.netG.nmlB], 0)
    except:
        pass

    b_min = data['b_min']
    b_max = data['b_max']

    try:
        save_img_path = save_path[:-4] + '.png'
        save_img_list = []
        for v in range(image_tensor_global.shape[0]):
            save_img = (np.transpose(image_tensor_global[v].detach().cpu().numpy(), (1,2,0)) * 0.5 + 0.5)[:, :, ::-1] * 255.0
            save_img_list.append(save_img)
        save_img = np.concatenate(save_img_list, axis=1)
        cv2.imwrite(save_img_path, save_img)

        verts, faces, _, _ = reconstruction(net, cuda, calib_tensor, res, b_min, b_max, thresh, use_octree=use_octree, num_samples=100000)
        verts_tensor = torch.from_numpy(verts.T).unsqueeze(0).to(device=cuda).float()

        xyz_tensor = net.projection(verts_tensor, calib_tensor[:1])
        uv = xyz_tensor[:, :2, :]
        color = index(image_tensor[:1], uv).detach().cpu().numpy()[0].T
        color = color * 0.5 + 0.5

        if 'calib_world' in data:
            calib_world = data['calib_world'].numpy()[0]
            verts = np.matmul(np.concatenate([verts, np.ones_like(verts[:, :1])],1), inv(calib_world).T)[:, :3]

        save_obj_mesh_with_color(save_path, verts, faces, color)
    except Exception as e:
        print(e)

def recon(opt, use_rect=False):
    state_dict_path = None
    if opt.load_netMR_checkpoint_path is not None:
        state_dict_path = opt.load_netMR_checkpoint_path
    elif opt.resume_epoch < 0:
        state_dict_path = f"{opt.checkpoints_path}/{opt.name}_train_latest"
        opt.resume_epoch = 0
    else:
        state_dict_path = f"{opt.checkpoints_path}/{opt.name}_train_epoch_{opt.resume_epoch}"

    start_id = opt.start_id
    end_id = opt.end_id
    cuda = torch.device(f"cuda:{opt.gpu_id}" if torch.cuda.is_available() else "cpu")

    if state_dict_path is not None and os.path.exists(state_dict_path):
        print("Resuming from", state_dict_path)
        try:
            serialization.add_safe_globals([argparse.Namespace])
        except:
            pass

        try:
            state_dict = torch.load(state_dict_path, map_location=cuda, weights_only=False)
        except TypeError:
            state_dict = torch.load(state_dict_path, map_location=cuda)
        except Exception as e:
            raise RuntimeError(f"Failed loading checkpoint {state_dict_path}: {e}") from e

        dataroot = opt.dataroot
        resolution = opt.resolution
        results_path = opt.results_path
        loadSize = opt.loadSize

        opt = state_dict['opt']
        opt.dataroot = dataroot
        opt.resolution = resolution
        opt.results_path = results_path
        opt.loadSize = loadSize
    else:
        raise FileNotFoundError(f"Checkpoint not found: {state_dict_path}")

    test_dataset = EvalDataset(opt) if use_rect else EvalWPoseDataset(opt)
    print("test data size:", len(test_dataset))
    projection_mode = test_dataset.projection_mode

    opt_netG = state_dict['opt_netG']
    netG = HGPIFuNetwNML(opt_netG, projection_mode).to(device=cuda)
    netMR = HGPIFuMRNet(opt, netG, projection_mode).to(device=cuda)

    netMR.load_state_dict(state_dict['model_state_dict'])

    os.makedirs(opt.checkpoints_path, exist_ok=True)
    os.makedirs(opt.results_path, exist_ok=True)
    os.makedirs(f"{opt.results_path}/{opt.name}/recon", exist_ok=True)

    if start_id < 0: start_id = 0
    if end_id < 0: end_id = len(test_dataset)

    with torch.no_grad():
        netG.eval()
        print("generate mesh (test) ...")
        for i in tqdm(range(start_id, end_id)):
            if i >= len(test_dataset):
                break
            test_data = test_dataset[i]
            save_path = f"{opt.results_path}/{opt.name}/recon/result_{test_data['name']}_{opt.resolution}.obj"
            print(save_path)
            gen_mesh(opt.resolution, netMR, cuda, test_data, save_path, components=opt.use_compose)

def reconWrapper(args=None, use_rect=False):
    opt = parser.parse(args)
    recon(opt, use_rect)

if __name__ == "__main__":
    reconWrapper()
"""

# Write the file
with open(file_path, "w", encoding="utf-8") as f:
    f.write(patched_code)

print(f"✅ Custom recon.py written to {file_path}")


✅ Custom recon.py written to apps/recon.py


## Configure input data

In [21]:
cd /content/pifuhd/sample_images

/content/pifuhd/sample_images


**If you want to upload your own picture, run the next cell**. Otherwise, go to the next next cell. Currently PNG, JPEG files are supported.

In [22]:
from google.colab import files

filename = list(files.upload().keys())[0]

Saving 2.jpeg to 2 (1).jpeg


In [23]:
import os

try:
  image_path = '/content/pifuhd/sample_images/%s' % filename
except:
  image_path = '/content/pifuhd/sample_images/test.png' # example image
image_dir = os.path.dirname(image_path)
file_name = os.path.splitext(os.path.basename(image_path))[0]

# output pathes
obj_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.obj' % file_name
out_img_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.png' % file_name
video_path = '/content/pifuhd/results/pifuhd_final/recon/result_%s_256.mp4' % file_name
video_display_path = '/content/pifuhd/results/pifuhd_final/result_%s_256_display.mp4' % file_name

In [24]:
cd /content

/content


## Preprocess (for cropping image)

In [25]:
!git clone https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch.git

fatal: destination path 'lightweight-human-pose-estimation.pytorch' already exists and is not an empty directory.


In [26]:
cd /content/lightweight-human-pose-estimation.pytorch/

/content/lightweight-human-pose-estimation.pytorch


In [27]:
!wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth

--2025-10-10 17:19:21--  https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth
Resolving download.01.org (download.01.org)... 23.42.200.215, 2600:1406:5400:2ac::a87, 2600:1406:5400:2a0::a87
Connecting to download.01.org (download.01.org)|23.42.200.215|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87959810 (84M) [application/octet-stream]
Saving to: ‘checkpoint_iter_370000.pth.1’


2025-10-10 17:19:22 (104 MB/s) - ‘checkpoint_iter_370000.pth.1’ saved [87959810/87959810]



In [28]:
import torch
import cv2
import numpy as np
from models.with_mobilenet import PoseEstimationWithMobileNet
from modules.keypoints import extract_keypoints, group_keypoints
from modules.load_state import load_state
from modules.pose import Pose, track_poses
import demo

def get_rect(net, images, height_size):
    net = net.eval()

    stride = 8
    upsample_ratio = 4
    num_keypoints = Pose.num_kpts
    previous_poses = []
    delay = 33
    for image in images:
        rect_path = image.replace('.%s' % (image.split('.')[-1]), '_rect.txt')
        img = cv2.imread(image, cv2.IMREAD_COLOR)
        orig_img = img.copy()
        orig_img = img.copy()
        heatmaps, pafs, scale, pad = demo.infer_fast(net, img, height_size, stride, upsample_ratio, cpu=False)

        total_keypoints_num = 0
        all_keypoints_by_type = []
        for kpt_idx in range(num_keypoints):  # 19th for bg
            total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)

        pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs)
        for kpt_id in range(all_keypoints.shape[0]):
            all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
            all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
        current_poses = []

        rects = []
        for n in range(len(pose_entries)):
            if len(pose_entries[n]) == 0:
                continue
            pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
            valid_keypoints = []
            for kpt_id in range(num_keypoints):
                if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
                    pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
                    pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
                    valid_keypoints.append([pose_keypoints[kpt_id, 0], pose_keypoints[kpt_id, 1]])
            valid_keypoints = np.array(valid_keypoints)

            if pose_entries[n][10] != -1.0 or pose_entries[n][13] != -1.0:
              pmin = valid_keypoints.min(0)
              pmax = valid_keypoints.max(0)

              center = (0.5 * (pmax[:2] + pmin[:2])).astype(np.int)
              radius = int(0.65 * max(pmax[0]-pmin[0], pmax[1]-pmin[1]))
            elif pose_entries[n][10] == -1.0 and pose_entries[n][13] == -1.0 and pose_entries[n][8] != -1.0 and pose_entries[n][11] != -1.0:
              # if leg is missing, use pelvis to get cropping
              center = (0.5 * (pose_keypoints[8] + pose_keypoints[11])).astype(np.int)
              radius = int(1.45*np.sqrt(((center[None,:] - valid_keypoints)**2).sum(1)).max(0))
              center[1] += int(0.05*radius)
            else:
              center = np.array([img.shape[1]//2,img.shape[0]//2])
              radius = max(img.shape[1]//2,img.shape[0]//2)

            x1 = center[0] - radius
            y1 = center[1] - radius

            rects.append([x1, y1, 2*radius, 2*radius])

        np.savetxt(rect_path, np.array(rects), fmt='%d')

In [29]:
# Make checkpoints folder
!mkdir -p checkpoints

# Download the official PIFuHD checkpoint (for full-body reconstruction)
!wget -O checkpoints/pifuhd.pt https://dl.fbaipublicfiles.com/pifuhd/checkpoints/pifuhd.pt


--2025-10-10 17:19:30--  https://dl.fbaipublicfiles.com/pifuhd/checkpoints/pifuhd.pt
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 99.84.41.33, 99.84.41.79, 99.84.41.129, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|99.84.41.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1548375177 (1.4G) [application/octet-stream]
Saving to: ‘checkpoints/pifuhd.pt’


2025-10-10 17:19:38 (181 MB/s) - ‘checkpoints/pifuhd.pt’ saved [1548375177/1548375177]



In [30]:
import numpy as np
np.int = int  # restore deprecated alias for compatibility


In [31]:
net = PoseEstimationWithMobileNet()
checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu')
load_state(net, checkpoint)

get_rect(net.cuda(), [image_path], 512)

## Download the Pretrained Model

In [32]:
cd /content/pifuhd/

/content/pifuhd


In [33]:
!sh ./scripts/download_trained_model.sh

+ mkdir -p checkpoints
+ cd checkpoints
+ wget https://dl.fbaipublicfiles.com/pifuhd/checkpoints/pifuhd.pt pifuhd.pt
--2025-10-10 17:19:47--  https://dl.fbaipublicfiles.com/pifuhd/checkpoints/pifuhd.pt
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 99.84.41.33, 99.84.41.79, 99.84.41.129, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|99.84.41.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1548375177 (1.4G) [application/octet-stream]
Saving to: ‘pifuhd.pt.1’


2025-10-10 17:19:56 (166 MB/s) - ‘pifuhd.pt.1’ saved [1548375177/1548375177]

--2025-10-10 17:19:56--  http://pifuhd.pt/
Resolving pifuhd.pt (pifuhd.pt)... failed: Name or service not known.
wget: unable to resolve host address ‘pifuhd.pt’
FINISHED --2025-10-10 17:19:56--
Total wall clock time: 9.0s
Downloaded: 1 files, 1.4G in 8.9s (166 MB/s)


## Run PIFuHD!


In [34]:
ls /content/pifuhd/apps


batch_openpose.py  [0m[01;34m__pycache__[0m/  render_turntable.py
clean_mesh.py      recon.py      simple_test.py


In [35]:
# Warning: all images with the corresponding rectangle files under -i will be processed.
!python -m apps.simple_test -r 256 --use_rect -i $image_dir

# seems that 256 is the maximum resolution that can fit into Google Colab.
# If you want to reconstruct a higher-resolution mesh, please try with your own machine.

Resuming from ./checkpoints/pifuhd.pt
test data size: 2
initialize network with normal
initialize network with normal
generate mesh (test) ...
  0% 0/2 [00:00<?, ?it/s]./results/pifuhd_final/recon/result_2 (1)_256.obj
[ WARN:0@16.937] global loadsave.cpp:1063 imwrite_ Unsupported depth image for selected encoder is fallbacked to CV_8U.
 50% 1/2 [00:07<00:07,  7.10s/it]./results/pifuhd_final/recon/result_2_256.obj
100% 2/2 [00:11<00:00,  5.93s/it]
