# Step 1: Install Dependencies
We need to install the `transformers` library to use the Swin2SR model and `accelerate` for optimized performance.

In [None]:
!pip install transformers accelerate

# Step 2: Import Libraries and Load Model
Here we load the Swin2SR processor and the lightweight X2 model from Hugging Face.

In [None]:
import torch
from transformers import Swin2SRForImageSuperResolution, Swin2SRImageProcessor

device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "caidas/swin2SR-lightweight-x2-64"

processor = Swin2SRImageProcessor.from_pretrained(model_id)
model = Swin2SRForImageSuperResolution.from_pretrained(model_id).to(device)
model.eval()
print("✅ Swin2SR-Light model loaded")

# Step 3: Upload Video File
Upload the video you want to enhance (e.g., a CCTV clip).

In [None]:
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]
print("Uploaded:", video_path)

# Step 4: Extract Frames from Video
We split the video into individual frames to process them one by one.

In [None]:
import cv2, os
os.makedirs("frames", exist_ok=True)
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
i = 0
while True:
    ret, frame = cap.read()
    if not ret: break
    cv2.imwrite(f"frames/{i:05d}.png", frame)
    i += 1
cap.release()
print(f"✅ Extracted {i} frames at {fps:.2f} FPS")

# Step 5: Define Enhancement Logic (Tiled Processing)
To avoid memory errors on large images/frames, we use a tiled approach to enhance segments of the frame.

In [None]:
from PIL import Image
TILE = 256
OVERLAP = 32
SCALE = 2

def enhance_tiled(pil_img):
    w, h = pil_img.size
    out = Image.new("RGB", (w * SCALE, h * SCALE))
    for y in range(0, h, TILE - OVERLAP):
        for x in range(0, w, TILE - OVERLAP):
            tile = pil_img.crop((x, y, min(x+TILE, w), min(y+TILE, h)))
            with torch.no_grad():
                inputs = processor(tile, return_tensors="pt").to(device)
                outputs = model(**inputs)
                tile_out = outputs.reconstruction[0].clamp(0, 1).cpu()
                tile_out = (tile_out.permute(1, 2, 0).numpy() * 255).astype("uint8")
                tile_out = Image.fromarray(tile_out)
            out.paste(tile_out, (x * SCALE, y * SCALE))
    return out

# Step 6: Process All Frames
This step enhances every frame using the Swin2SR model. This may take several minutes.

In [None]:
from tqdm import tqdm
os.makedirs("enhanced", exist_ok=True)
for f in tqdm(sorted(os.listdir("frames"))):
    img = Image.open(f"frames/{f}").convert("RGB")
    out = enhance_tiled(img)
    out.save(f"enhanced/{f}")

# Step 7: Reconstruct Enhanced Video
We compile the processed frames back into a high-resolution video file.

In [None]:
first = cv2.imread("enhanced/00000.png")
h, w, _ = first.shape
out_video = cv2.VideoWriter("enhanced_video.mp4", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
for f in sorted(os.listdir("enhanced")):
    frame = cv2.imread(f"enhanced/{f}")
    out_video.write(frame)
out_video.release()
print("✅ Enhanced video saved as enhanced_video.mp4")

# Step 8: Side-by-Side Comparison
Compare the original video and the enhanced output side-by-side.

In [None]:
import base64
import os

def get_video_html(path, width=400):
    # 1. Convert video to H.264 so browsers can play it
    processed_path = path.replace(".mp4", "_encoded.mp4")
    os.system(f"ffmpeg -i {path} -vcodec libx264 -y {processed_path} -loglevel quiet")

    # 2. Read and Base64 encode the video
    video_file = open(processed_path, "rb").read()
    video_url = f"data:video/mp4;base64,{base64.b64encode(video_file).decode()}"

    return f"<video width='{width}' controls><source src='{video_url}' type='video/mp4'></video>"

# Generate the side-by-side HTML
html_code = f"""
<div style='display: flex; justify-content: space-around; align-items: flex-start; gap: 20px;'>
    <div style='text-align: center;'>
        <h3 style="font-family: sans-serif;">Original Video</h3>
        {get_video_html('/content/cctv.mp4')}
    </div>
    <div style='text-align: center;'>
        <h3 style="font-family: sans-serif;">Swin2SR Enhanced (2x)</h3>
        {get_video_html('/content/enhanced_video.mp4')}
    </div>
</div>
"""

from IPython.display import HTML
display(HTML(html_code))

# Step 9: Download Results
Download the final enhanced video to your local machine.

In [None]:
files.download("enhanced_video.mp4")