In [None]:
'''
- transforms[i]: torch variable. relative transform between frame i and frame 0. transforms[0] = identity
- triangulate_point[i][(u, v)]: the torch variable, representing the 3D point that frame i's feature (uv) corresponds to.

- Given a set of images, do feature matching between frames => got matched_points_i_j.csv

- Estimate intial guess of transforms, using gap = 5 (your choice)
    transforms[0] = identity
    for i = 0 ~ 150
        if(i-5 < 0) j = i-1
        else j = i-5
        relative_T_i_j = estimateT(matched_points_i_j)
        transforms[j] = transforms[i] * relative_T_i_j # these should be leaf node tensor, it's just their initial value is Ti*relT

- Estimate initial guess of 3D point. 
    # For each matched_points_i_j, do 3D point registration
    for gap = 1 ~ 10
        for i = 0 ~ 150
            j = i + gap
            load matched_points_i_j
            for each match m: (u1, v1), (u2, v2) <= (int(u1), int(v1)), (int(u2), int(v2))
                if already_triangulate[frame i][(u1, v1)] || already_triangulate[frame j][(u2, v2)]
                    => there's already a 3D point registered, 
                       already_triangulate[frame][feature u v] map to a variable => that 3D point
                    => al_tr[frame i][u1 v1] = al_tr[frame j][u2 v2] = one of the above (they're same anyway)
                else
                    => al_tr[frame i][u1 v1] = al_tr[frame j][u2 v2] = initial value set to triangulate(two features, two transforms)

- Let's do optimization! project each 3D point(itself is a variable) onto each associated frame (now transforms variable included)
and loss = reprojection error


In [8]:
# Define a list to store the camera data
camera_data = {}

# Open the file and read its contents line by line
with open('./train/haiper/bike/sfm/cameras.txt', 'r') as file:
    for line in file:
    # Split each line into individual values
        values = line.strip().split()

        # Check if there are at least 7 values (CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[])
        if len(values) == 8:
            # Extract the relevant values
            camera_id = int(values[0])
            model = values[1]
            width = int(values[2])
            height = int(values[3])
            focal_length = float(values[4])
            cx = float(values[5])
            cy = float(values[6])
            k1 = float(values[7])

            # Create a dictionary for each camera and add it to the list
            camera_info = {
                'CAMERA_ID': camera_id,
                'MODEL': model,
                'WIDTH': width,
                'HEIGHT': height,
                'FOCAL_LENGTH': focal_length,
                'CX': cx,
                'CY': cy,
                'K1': k1
            }

            camera_data[camera_id] = camera_info

print(camera_data) # camera_id (1~151) => camera_info

{151: {'CAMERA_ID': 151, 'MODEL': 'SIMPLE_RADIAL', 'WIDTH': 1920, 'HEIGHT': 1440, 'FOCAL_LENGTH': 1537.9905357118319, 'CX': 713.2343519989034, 'CY': 970.3617864700348, 'K1': 0.007398606978821472}, 150: {'CAMERA_ID': 150, 'MODEL': 'SIMPLE_RADIAL', 'WIDTH': 1920, 'HEIGHT': 1440, 'FOCAL_LENGTH': 1536.7216320344005, 'CX': 714.4733454380107, 'CY': 972.5423055226008, 'K1': 0.009325786272887224}, 149: {'CAMERA_ID': 149, 'MODEL': 'SIMPLE_RADIAL', 'WIDTH': 1920, 'HEIGHT': 1440, 'FOCAL_LENGTH': 1532.8397219637102, 'CX': 715.8176842161554, 'CY': 975.7926836835544, 'K1': 0.0089268770427598}, 148: {'CAMERA_ID': 148, 'MODEL': 'SIMPLE_RADIAL', 'WIDTH': 1920, 'HEIGHT': 1440, 'FOCAL_LENGTH': 1535.5344212571836, 'CX': 714.074611743481, 'CY': 972.0168107869681, 'K1': 0.007998274919952867}, 147: {'CAMERA_ID': 147, 'MODEL': 'SIMPLE_RADIAL', 'WIDTH': 1920, 'HEIGHT': 1440, 'FOCAL_LENGTH': 1530.909615887358, 'CX': 715.1771880491902, 'CY': 970.7264076511703, 'K1': 0.006124555884328375}, 146: {'CAMERA_ID': 146,

In [26]:
import torch
import math
import numpy as np

num_frames = 10
transforms = [];
for _ in range(num_frames):
    transforms.append(torch.eye(4, requires_grad=True, device="cuda")) # 4x4 Transformation matrix for each frame
    
def project(pointsXYZ, camera_id):
    '''
    given a set of 3D points (3 x N), project them on image, return u, v (2 x N) (considering radial distortion)
    '''
    camera_info = camera_data[camera_id]
    fx = camera_info['FOCAL_LENGTH']
    fy = camera_info['FOCAL_LENGTH']
    cx = camera_info['CX']
    cy = camera_info['CY']
    height = camera_info['HEIGHT']
    width = camera_info['WIDTH']
    k1 = camera_info['K1']
    X = pointsXYZ[:, 0]
    Y = pointsXYZ[:, 1]
    Z = pointsXYZ[:, 2]
    
    x_norm = X / Z
    y_norm = Y / Z
    r = np.sqrt(x_norm ** 2 + y_norm ** 2)
    radial_dist_coeff = 1 + k1 * r ** 2
    x_distorted = x_norm * radial_dist_coeff
    y_distorted = y_norm * radial_dist_coeff
    u = fx * x_distorted + cx
    v = fy * y_distorted + cy
    pointsUV = np.vstack((u, v))
    return pointsUV


[1 2 3]
[23 24 25]
[0.04347826 0.08333333 0.12      ]
[0.52173913 0.54166667 0.56      ]
[0.52354759 0.54803943 0.57271284]
[[ 782.05622085  843.16385826  899.4234901 ]
 [1769.94833533 1800.73704103 1829.11434189]]


In [22]:
math.sqrt(0.043**2+0.5217**2)

0.5234690917332179

In [1]:
import csv
import os

def estimateTransform(i, j):
    # Path to the CSV file
    csv_file_path = f'./matched_points/matched_points_{i:03d}_{j:03d}.csv'
    
    # Initialize lists to store the values
    pts1 = []
    pts2 = []

    # Open the CSV file for reading
    with open(csv_file_path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file)

        # Skip the header row
        next(csv_reader)

        # Read and store the values from each row
        for row in csv_reader:
            u1, v1, u2, v2 = map(float, row)
            pts1.append([u1, v1])
            pts2.append([u2, v2])
    pts1 = np.int32(pts1)
    pts2 = np.int32(pts2)
    F, mask = cv.findFundamentalMat(pts1,pts2,cv.FM_LMEDS)
    # We select only inlier points
    pts1 = pts1[mask.ravel()==1]
    pts2 = pts2[mask.ravel()==1]
    def drawlines(img1,img2,lines,pts1,pts2):
        ''' img1 - image on which we draw the epilines for the points in img2
            lines - corresponding epilines '''
        r,c = img1.shape
        img1 = cv.cvtColor(img1,cv.COLOR_GRAY2BGR)
        img2 = cv.cvtColor(img2,cv.COLOR_GRAY2BGR)
        for r,pt1,pt2 in zip(lines,pts1,pts2):
            color = tuple(np.random.randint(0,255,3).tolist())
            x0,y0 = map(int, [0, -r[2]/r[1] ])
            x1,y1 = map(int, [c, -(r[2]+r[0]*c)/r[1] ])
            img1 = cv.line(img1, (x0,y0), (x1,y1), color,1)
            img1 = cv.circle(img1,tuple(pt1),5,color,-1)
            img2 = cv.circle(img2,tuple(pt2),5,color,-1)
        return img1,img2
    
    
for j in range(1, 151):
    if(j-5 < 0): i = j-1
    else: i = j-5
    print(i, j)
    T_ij = estimateTransform(i, j)
    break
#     with no_grad:
#         T_jtoi = estimateTransform(i, j)
#     relative_T_i_j = estimateT(matched_points_i_j)
#     transforms[j] = transforms[i] * relative_T_i_j # these should be leaf node tensor, it's just their initial value is Ti*relT


0 1


In [89]:
torch.eye(4)

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [66]:
import torch

# Calculate the initial value for x from y
point = torch.tensor([1.0], requires_grad=True)  # Replace with the value computed from y

# Create x as a tensor with requires_grad=True and the initial value
transform = torch.tensor([1.0], requires_grad=True)
transform2 = transform * 2

transform2.retain_grad()
# transform2.retain_grad()
point = transform2 / 8
# point = transform * 2 + 1

point.backward()
print(transform2, transform2.grad, point.is_leaf)
print(transform, transform.grad, transform.is_leaf)

transform3 = torch.tensor([], requires_grad=True)
print(transform3.is_leaf)
transform3.data = transform2.data
transform3.backward()
print(transform3.is_leaf, transform3.data, transform3.grad)

tensor([2.], grad_fn=<MulBackward0>) tensor([0.1250]) False
tensor([1.], requires_grad=True) tensor([0.2500]) True
True
True tensor([2.]) tensor([1.])


In [84]:
b = torch.rand(10, requires_grad=True)
b.is_leaf # True
b = b.detach()
print(b.is_leaf, b.requires_grad) # True

True False
