In [1]:
import cv2
import torch
import torchvision.transforms as transforms
import RRDBNet_arch as arch

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_path = "C:/Users/win10/Real-ESRGAN/weights/RealESRGAN_x4plus.pth"
model = arch.RRDBNet(3, 3, 64, 23, gc=32).to(device)

# Load the model for inference (ignore missing keys)
model.load_state_dict(torch.load(model_path, map_location=device), strict=False)

# Set the model to evaluation mode
model.eval()


RRDBNet(
  (conv_first): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (RRDB_trunk): Sequential(
    (0): RRDB(
      (RDB1): ResidualDenseBlock_5C(
        (conv1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): Conv2d(96, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv3): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv4): Conv2d(160, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv5): Conv2d(192, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (lrelu): LeakyReLU(negative_slope=0.2, inplace=True)
      )
      (RDB2): ResidualDenseBlock_5C(
        (conv1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): Conv2d(96, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv3): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv4): Conv2d(160, 32, kernel_size=(3, 3), str

In [6]:
input_video_path = "C:/Users/win10/Desktop/OpenInApp/test1.mp4"
output_video_path = "C:/Users/win10/Desktop/OpenInApp/test1_out.mp4"

In [7]:
cap = cv2.VideoCapture(input_video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

In [8]:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width*2, height))

In [11]:
from PIL import Image

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((height * 4, width * 4), interpolation=Image.NEAREST),
    transforms.ToTensor()
])


In [None]:
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess frame for ESRGAN
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = transform(frame).unsqueeze(0).to(device)

    # Upscale frame using ESRGAN
    with torch.no_grad():
        enhanced_frame = model(frame)

    # Postprocess and concatenate frames
    enhanced_frame = enhanced_frame.squeeze(0).cpu().numpy().transpose((1, 2, 0))
    enhanced_frame = cv2.cvtColor(enhanced_frame, cv2.COLOR_RGB2BGR)
    concatenated_frame = cv2.hconcat([frame, enhanced_frame])

    # Write frame to output video
    out.write(concatenated_frame)

# Release video capture and writer
cap.release()
out.release()