## Gnomonic projection of laue image and laue patern

In [None]:
import timeit

from tqdm.autonotebook import tqdm
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch

from laueimproc import Diagram
from laueimproc.geometry import detector_to_ray, ray_to_detector, uf_to_uq, uq_to_uf
from laueimproc.io import get_sample

### Reference constants and parameters
* `CAM_PONI` are the detector parameters $[dist, poni_1, poni_2, rot_1, rot_2, rot_3]$
* `GNOM_PONI` are the virtual gnomonic detector parameters $[dist, poni_1, poni_2, rot_1, rot_2, rot_3]$

In [None]:
CAM_PONI = torch.tensor([1.0, 0.0, 0.0, 0.0, -torch.pi/2, 0.0])  # mode laue detector on top
GNOM_PONI = torch.tensor([1.0, 0, 0, 0.0, -3*torch.pi/4, 0.0])

In [None]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Associate the functions

In [None]:
def cam_to_gnom(points, cam_poni, gnom_poni):
    return ray_to_detector(uf_to_uq(detector_to_ray(points, cam_poni)), gnom_poni)[0]

def gnom_to_cam(points, gnom_poni, cam_poni):
    return ray_to_detector(uq_to_uf(detector_to_ray(points, gnom_poni)), cam_poni)[0]

#### Timing comparison

In [None]:
# simple timing

BATCH = 100  # number of simulated diagrams

points = torch.rand(1000, 2)

# case float64
points_, cam_poni, gnom_poni = points.clone().to(torch.float64), CAM_PONI.clone().to(torch.float64), GNOM_PONI.clone().to(torch.float64)
speed = min(timeit.repeat(lambda: cam_to_gnom(points_, cam_poni, gnom_poni), repeat=10, number=BATCH)) / BATCH
print(f"float64: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")

# case float32
points_, cam_poni, gnom_poni = points.clone().to(torch.float32), CAM_PONI.clone().to(torch.float32), GNOM_PONI.clone().to(torch.float32)
speed = min(timeit.repeat(lambda: cam_to_gnom(points_, cam_poni, gnom_poni), repeat=10, number=BATCH)) / BATCH
print(f"float32: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")

# case float64 batched
points_, cam_poni, gnom_poni = points.clone().to(torch.float64), CAM_PONI.clone().to(torch.float64), GNOM_PONI.clone().to(torch.float64)
points_ = points_[None, :, :].expand(BATCH, -1, -1)
speed = min(timeit.repeat(lambda: cam_to_gnom(points_, cam_poni, gnom_poni), repeat=10, number=1)) / BATCH
print(f"float64 batched: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")

# case float32 batched
points_, cam_poni, gnom_poni = points.clone().to(torch.float32), CAM_PONI.clone().to(torch.float32), GNOM_PONI.clone().to(torch.float32)
points_ = points_[None, :, :].expand(BATCH, -1, -1)
speed = min(timeit.repeat(lambda: cam_to_gnom(points_, cam_poni, gnom_poni), repeat=10, number=1)) / BATCH
print(f"float32 batched: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")

# case float64 batched compiled
points_, cam_poni, gnom_poni = points.clone().to(torch.float64), CAM_PONI.clone().to(torch.float64), GNOM_PONI.clone().to(torch.float64)
points_ = points_[None, :, :].expand(BATCH, -1, -1)
cam_to_gnom_comp = torch.compile(cam_to_gnom, dynamic=False)
speed = min(timeit.repeat(lambda: cam_to_gnom_comp(points_, cam_poni, gnom_poni), repeat=10, number=1)) / BATCH
print(f"float64 batched compiled: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")

# case float32 batched compiled
points_, cam_poni, gnom_poni = points.clone().to(torch.float32), CAM_PONI.clone().to(torch.float32), GNOM_PONI.clone().to(torch.float32)
points_ = points_[None, :, :].expand(BATCH, -1, -1)
cam_to_gnom_comp = torch.compile(cam_to_gnom, dynamic=False)
speed = min(timeit.repeat(lambda: cam_to_gnom_comp(points_, cam_poni, gnom_poni), repeat=10, number=1)) / BATCH
print(f"float32 batched compiled: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")

if DEVICE.type == "cuda":
    # case float64 batched compiled gpu
    points_, cam_poni, gnom_poni = (
        points.clone().to(torch.float64, device=DEVICE),
        CAM_PONI.clone().to(torch.float64, device=DEVICE),
        GNOM_PONI.clone().to(torch.float64, device=DEVICE)
    )
    points_ = points_[None, :, :].expand(BATCH, -1, -1)
    cam_to_gnom_comp = torch.compile(cam_to_gnom, dynamic=False)
    speed = min(timeit.repeat(lambda: cam_to_gnom_comp(points_, cam_poni, gnom_poni), repeat=10, number=1)) / BATCH
    print(f"float64 batched compiled gpu: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")
    
    # case float32 batched compiled gpu
    points_, cam_poni, gnom_poni = (
        points.clone().to(torch.float32, device=DEVICE),
        CAM_PONI.clone().to(torch.float32, device=DEVICE),
        GNOM_PONI.clone().to(torch.float32, device=DEVICE)
    )
    points_ = points_[None, :, :].expand(BATCH, -1, -1)
    cam_to_gnom_comp = torch.compile(cam_to_gnom, dynamic=False)
    speed = min(timeit.repeat(lambda: cam_to_gnom_comp(points_, cam_poni, gnom_poni), repeat=10, number=1)) / BATCH
    print(f"float32 batched compiled gpu: it takes {speed*1e6:.2f}us by simulation <=> {1.0/speed:.2f}Hz")

### Projection gnomonic

#### Points projection

In [None]:
# get points
diagram = Diagram(get_sample())
diagram.find_spots()
cam_point = diagram.compute_rois_centroid()
cam_point /= 0.5 * torch.tensor(diagram.image.shape)
cam_point -= 1.0

# camera -> gnomonic
gnom_point = cam_to_gnom(cam_point, CAM_PONI, GNOM_PONI)

# display result
plt.subplot(1, 2, 1)
plt.scatter(*cam_point.movedim(-1, 0))
plt.subplot(1, 2, 2)
plt.scatter(*gnom_point.movedim(-1, 0))
plt.show()

#### Image projection

In [None]:
# get image
diagram = Diagram(get_sample())
image = diagram.image.numpy(force=True)

# forward for estimation of shape
cam = torch.tensor(
    [[-1, -1], [0, -1], [1, -1], [1, 0], [1, 1], [0, 1], [-1, 1], [-1, 0]],
    dtype=torch.float32
)
gnom = cam_to_gnom(cam, CAM_PONI, GNOM_PONI)

# create map convertion for cv2
gnom_shape = (2000, 2000)  # best shape differencial
gnom = (
    torch.linspace(gnom[:, 0].amin(), gnom[:, 0].amax(), gnom_shape[0]),  
    torch.linspace(gnom[:, 1].amin(), gnom[:, 1].amax(), gnom_shape[1]),
)
gnom = torch.meshgrid(*gnom, indexing="ij")
gnom = torch.cat([g.ravel().unsqueeze(1) for g in gnom], dim=1)
cam = gnom_to_cam(gnom, GNOM_PONI, CAM_PONI)
cam += 1.0
cam *= 0.5 * torch.tensor(image.shape)

# to cv2 map
map1 = cam[..., 0].reshape(gnom_shape).to(torch.float32).numpy(force=True)
map2 = cam[..., 1].reshape(gnom_shape).to(torch.float32).numpy(force=True)

# simple convertion
gnom_img = cv2.remap(image, map2, map1, interpolation=cv2.INTER_LINEAR)

# display result
plt.subplot(1, 2, 1)
plt.imshow(image, cmap="plasma", vmin=image.mean(), vmax=image.mean()+3*image.std())
plt.subplot(1, 2, 2)
plt.imshow(gnom_img, cmap="plasma", vmin=image.mean(), vmax=image.mean()+3*image.std())
plt.show()

#### Timing comparison

In [None]:
# simple timing

img_u16 = (diagram.image * 65535 + 0.5).to(torch.int32).numpy(force=True).astype(np.uint16)
img_f32 = diagram.image.to(torch.float32).numpy(force=True)

BATCH = 10

# case uint16 bilinear
speed = min(timeit.repeat(lambda: cv2.remap(img_u16, map2, map1, interpolation=cv2.INTER_LINEAR), repeat=10, number=BATCH)) / BATCH
print(f"uint16 bilinear: it takes {speed*1e3:.2f}ms by image")

# case uint16 bicubic
speed = min(timeit.repeat(lambda: cv2.remap(img_u16, map2, map1, interpolation=cv2.INTER_CUBIC), repeat=10, number=BATCH)) / BATCH
print(f"uint16 bicubic: it takes {speed*1e3:.2f}ms by image")

# case float32 bilinear
speed = min(timeit.repeat(lambda: cv2.remap(img_f32, map2, map1, interpolation=cv2.INTER_LINEAR), repeat=10, number=BATCH)) / BATCH
print(f"float32 bilinear: it takes {speed*1e3:.2f}ms by image")

# case float32 bicubic
speed = min(timeit.repeat(lambda: cv2.remap(img_f32, map2, map1, interpolation=cv2.INTER_CUBIC), repeat=10, number=BATCH)) / BATCH
print(f"float32 bicubic: it takes {speed*1e3:.2f}ms by image")