# LightGlue Video Workflow
In this notebook we
- Load a video and seperate it into frames
- match two pairs of images using LightGlue with early stopping and point pruning.

In [1]:
# If we are on colab: this clones the repo and installs the dependencies
from pathlib import Path

if Path.cwd().name != "LightGlue":
    !git clone --quiet https://github.com/cvg/LightGlue/
    %cd LightGlue
    !pip install --progress-bar off --quiet -e .

from lightglue import LightGlue, SuperPoint, DISK
from lightglue.utils import load_image, rbd
from lightglue import viz2d
import torch

# Installiere FFmpeg
!apt-get update
!apt-get install -y ffmpeg --progress-bar off --quiet
print(f"\n")

torch.set_grad_enabled(False)
images = Path("assets")

/content/LightGlue
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
  Building editable for lightglue (pyproject.toml) ... [?25l[?25hdone


  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to cloud.r-project.org (3.171.85.15)] [Con                                                                                                    Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
                                                                                                    Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
0% [2 InRelease 82.2 kB/128 kB 64%] [Connected to cloud.r-project.org (3.171.85.15)] [Connected to r                                                                                                    Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get

## Load OpenCV to preprocess the video into frames

In [4]:
import cv2
import os
import numpy


input_folder = "/content/drive/MyDrive/Promotion/Code/feature_matching/LightGlue-main/data/"
output_folder = "/content/drive/MyDrive/Promotion/Code/feature_matching/LightGlue-main/frames/"
video_path = input_folder + "Panama_City_topdown.mp4"
os.makedirs(output_folder, exist_ok=True)

!ffmpeg -i /content/drive/MyDrive/Promotion/Code/feature_matching/LightGlue-main/data/Panama_City_topdown.mp4

# Funktion zum Extrahieren der Frames aus dem Video
def extract_frames(video_path):
    video = cv2.VideoCapture(video_path)

    # Überprüfe, ob das Video erfolgreich geladen wurde
    if not video.isOpened():
        print("Fehler: Das Video konnte nicht geladen werden.")
    else:
        print("Das Video wurde erfolgreich geladen.")
        print(video)

        frames = []
        while True:
            ret, frame = video.read()
            print(frame, ret)
            if not ret:
               break
            frames.append(frame)
        video.release()
        return frames

# Beispiel: Feature Matching auf den ersten beiden Frames
def feature_matching_with_lightglue(video_path):
    # 1. Extrahiere die Frames aus dem MP4-Video
    frames = []
    frames = extract_frames(video_path)

    # Stelle sicher, dass genügend Frames vorhanden sind
    if len(frames) < 2:
        print("Das Video enthält nicht genug Frames.")
        return

    # 2. Verwende LightGlue für das Feature Matching
    frame1 = frames[0]  # Erster Frame
    frame2 = frames[1]  # Zweiter Frame

    # Wandle die Bilder in Graustufen um, da LightGlue nur Graustufenbilder verarbeitet
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

    # 3. Feature Matching mit LightGlue
    matches, keypoints1, keypoints2 = lightglue.match(gray1, gray2)

    # 4. Visualisiere die Matches
    result_img = cv2.drawMatches(frame1, keypoints1, frame2, keypoints2, matches, None)

    # Zeige das Ergebnis
    cv2.imshow("Feature Matches", result_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


# Feature Matching auf den ersten beiden Frames durchführen
feature_matching_with_lightglue(video_path)

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

TypeError: object of type 'NoneType' has no len()

## Load extractor and matcher module
In this example we use SuperPoint features combined with LightGlue.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 'mps', 'cpu'

extractor = SuperPoint(max_num_keypoints=2048).eval().to(device)  # load the extractor
matcher = LightGlue(features="superpoint").eval().to(device)

In [None]:
def load_image(path):
    img = cv2.imread(path)
    img = cv2.resize(img, (640, 480)) #Größe anpassen, falls nötig
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #Farbschema anpassen, falls nötig
    img = torch.tensor(img).unsqueeze(0)unsqueeze(0).to(device).float() / 255
    return img

In [None]:
frames = sorted(os.listdir(output_folder))
prev_img = load_image(os.path.join(output_folder, frames[0]))
prev_feats = extractor.extract(prev_img)

In [None]:
for i in range (1, len(frames)):
  curr_img = load_image(os.path.join(output_folder, frames[i]))
  curr_feats = extractor.extract(curr_img)
  matches = matcher({"image0": prev_feats, "image1": curr_feats})
  print(f"Frame {i-1} -> {i}: {len(matches['matches'])} Matches")

  prev_img, prev_feats = curr_img, curr_feats

## Visualizing the results

In [None]:
# LightGlue returns keypoints as tensor
kp1 = [cv2.KeyPoint(x,y,1) for x, y in feats1['keypoints'].cpu().numpy()]
kp2 = [cv2.KeyPoint(x,y,1) for x, y in feats2['keypoints'].cpu().numpy()]

# Transforming Matches
matches_list = matches['matches'].cpu().numpy()
good_matches = [cv2.DMatch(_queryIdx=m[0], _trainIdx=m[1], _distance=0) for m in matches_list]

# Drawing Matches
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, good_matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

# Display
cv2.imshow("Feature Matches", img_matches)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Easy example
The top image shows the matches, while the bottom image shows the point pruning across layers. In this case, LightGlue prunes a few points with occlusions, but is able to stop the context aggregation after 4/9 layers.

In [None]:
image0 = load_image("/content/LightGlue/assets/leaf_on.png")    #"Tahoua_Airport_01")#images / "DSC_0411.JPG")
image1 = load_image("/content/LightGlue/assets/leaf_off.png")    #images / "Tahoua_Airport_02")#images / "DSC_0410.JPG")

feats0 = extractor.extract(image0.to(device))
feats1 = extractor.extract(image1.to(device))
matches01 = matcher({"image0": feats0, "image1": feats1})
feats0, feats1, matches01 = [
    rbd(x) for x in [feats0, feats1, matches01]
]  # remove batch dimension

kpts0, kpts1, matches = feats0["keypoints"], feats1["keypoints"], matches01["matches"]
m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]



In [None]:
axes = viz2d.plot_images([image0, image1])
viz2d.plot_matches(m_kpts0, m_kpts1, color="lime", lw=0.2)
viz2d.add_text(0, f'Stop after {matches01["stop"]} layers', fs=20)

kpc0, kpc1 = viz2d.cm_prune(matches01["prune0"]), viz2d.cm_prune(matches01["prune1"])
viz2d.plot_images([image0, image1])
viz2d.plot_keypoints([kpts0, kpts1], colors=[kpc0, kpc1], ps=10)

## Difficult example
For pairs with significant viewpoint- and illumination changes, LightGlue can exclude a lot of points early in the matching process (red points), which significantly reduces the inference time.

In [None]:
image0 = load_image(images / "sacre_coeur1.jpg")
image1 = load_image(images / "sacre_coeur2.jpg")

feats0 = extractor.extract(image0.to(device))
feats1 = extractor.extract(image1.to(device))
matches01 = matcher({"image0": feats0, "image1": feats1})
feats0, feats1, matches01 = [
    rbd(x) for x in [feats0, feats1, matches01]
]  # remove batch dimension

kpts0, kpts1, matches = feats0["keypoints"], feats1["keypoints"], matches01["matches"]
m_kpts0, m_kpts1 = kpts0[matches[..., 0]], kpts1[matches[..., 1]]

axes = viz2d.plot_images([image0, image1])
viz2d.plot_matches(m_kpts0, m_kpts1, color="lime", lw=0.2)
viz2d.add_text(0, f'Stop after {matches01["stop"]} layers')

kpc0, kpc1 = viz2d.cm_prune(matches01["prune0"]), viz2d.cm_prune(matches01["prune1"])
viz2d.plot_images([image0, image1])
viz2d.plot_keypoints([kpts0, kpts1], colors=[kpc0, kpc1], ps=6)