In [1]:
import os
import torch
import torch.nn as nn
from torchvision.transforms.functional import resize
from torch.utils.data import DataLoader
import torch.optim as optim
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
from torchvision import transforms

import os
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torchvision.transforms.functional import pad, resize
from torch.utils.data import DataLoader, Dataset
import numpy as np

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()



In [2]:
import torch.nn.functional as F
import random
from torchvision.transforms.functional import gaussian_blur


class SuperResolutionDataset(Dataset):
    def __init__(self, root_dir, patch_size=128, scale_factor=4, max_patches=16):
        self.root_dir = root_dir
        self.patch_size = patch_size
        self.lr_patch_size = patch_size // scale_factor
        self.image_list = os.listdir(root_dir)
        self.scale_factor = scale_factor
        self.max_patches = max_patches  
        self.normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.image_list[idx])
        image = Image.open(img_path).convert("RGB")
        width, height = image.size

        hr_patches = []
        lr_patches = []
        for y in range(0, height, self.patch_size):
            for x in range(0, width, self.patch_size):
                hr_patch = image.crop((x, y, x + self.patch_size, y + self.patch_size))

                if hr_patch.size[0] != self.patch_size or hr_patch.size[1] != self.patch_size:
                    continue

                lr_patch = resize(hr_patch, (self.lr_patch_size, self.lr_patch_size), interpolation=Image.BILINEAR)

                hr_patch_tensor = self.normalize(transforms.ToTensor()(hr_patch))
                lr_patch_tensor = self.normalize(transforms.ToTensor()(lr_patch))
                hr_patches.append(hr_patch_tensor)
                lr_patches.append(lr_patch_tensor)

                blur_kernel_size = random.choice([3, 5])
                lr_patch_blur = gaussian_blur(lr_patch_tensor, kernel_size=blur_kernel_size)
                hr_patches.append(hr_patch_tensor) 
                lr_patches.append(lr_patch_blur)

                noise = torch.randn_like(lr_patch_tensor) * 0.1 
                lr_patch_noisy = torch.clamp(lr_patch_tensor + noise, 0, 1)
                hr_patches.append(hr_patch_tensor) 
                lr_patches.append(lr_patch_noisy)

        while len(hr_patches) < self.max_patches:
            hr_patches.append(torch.zeros((3, self.patch_size, self.patch_size)))
            lr_patches.append(torch.zeros((3, self.lr_patch_size, self.lr_patch_size)))

        hr_patches = hr_patches[:self.max_patches]
        lr_patches = lr_patches[:self.max_patches]

        if len(hr_patches) == 0 or len(lr_patches) == 0:
            raise ValueError(f"No valid patches for image {img_path}")

        return torch.stack(lr_patches), torch.stack(hr_patches)


In [3]:



from torchvision.models import vgg16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class ResNetSuperResolution(nn.Module):
    def __init__(self, upscale_factor=4):
        super(ResNetSuperResolution, self).__init__()
        
        self.vgg_features = vgg16(pretrained=True).features[:8].eval()  
        for param in self.vgg_features.parameters():
            param.requires_grad = False 

        self.vgg_features.to(device) 

        self.channel_reducer = nn.Conv2d(128, 64, kernel_size=1)  

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)  

        self.conv1 = nn.Conv2d(64, 64, kernel_size=9, padding=4)
        self.relu = nn.ReLU(inplace=True)

        self.residual_blocks = nn.Sequential(*[
            self._make_residual_block(64) for _ in range(5)
        ])
        
        self.upsample1 = nn.Conv2d(64, 64 * (upscale_factor ** 2), kernel_size=3, padding=1)
        self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
        
        # Final output layer
        self.conv2 = nn.Conv2d(64, 3, kernel_size=9, padding=4)

    def _make_residual_block(self, channels):
        return nn.Sequential(
            nn.Conv2d(channels, channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(channels, channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(channels)
        )

    def forward(self, x):
        with torch.no_grad():  
            x = self.vgg_features(x)

        x = self.channel_reducer(x) 
        x = self.upsample(x)  
        x = self.relu(self.conv1(x))
        residual = x
        x = self.residual_blocks(x)
        x += residual  
        x = self.pixel_shuffle(self.upsample1(x)) 
        x = self.conv2(x) 
        return x


In [4]:
def superres_loss(output, target):
    return F.l1_loss(output, target)


In [5]:
root_dir = r"C:\Users\Turog\OneDrive\Documents\GitHub\576_DL_SuperRes\data\combined_largest_images_rd"
patch_size = 256
scale_factor = 2
dataset = SuperResolutionDataset(root_dir=root_dir, patch_size=patch_size, scale_factor=scale_factor)
dataloader = DataLoader(dataset, batch_size=3, shuffle=False)


In [6]:
upscale_factor = scale_factor
model = ResNetSuperResolution(upscale_factor)
criterion = superres_loss
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
from torch.optim import RMSprop
optimizer = RMSprop(model.parameters(), lr=1e-4, alpha=0.9, weight_decay=1e-5)

os.makedirs("chkt_resnet_v9", exist_ok=True) 
os.makedirs("progress_resnet_v9", exist_ok=True)  




In [7]:

if device.type == "cuda":
    print(torch.cuda.get_device_name(0))
    print(f"Memory allocated: {torch.cuda.memory_allocated(0)/1024**3:.2f} GB")
    print(f"Memory reserved: {torch.cuda.memory_reserved(0)/1024**3:.2f} GB")

NVIDIA GeForce RTX 4060 Laptop GPU
Memory allocated: 0.00 GB
Memory reserved: 0.00 GB


In [8]:
def pad_image(image, patch_size):
    width, height = image.size
    pad_width = (patch_size - width % patch_size) % patch_size
    pad_height = (patch_size - height % patch_size) % patch_size
    padding = (0, 0, pad_width, pad_height)
    padded_image = pad(image, padding, fill=0)
    return padded_image, padding


def denormalize(tensor):
    denorm = transforms.Normalize(mean=[-1, -1, -1], std=[2, 2, 2])  
    return denorm(tensor)

test_image_path =     r"C:\Users\Turog\OneDrive\Documents\GitHub\576_DL_SuperRes\data\DIV2K\0745.png"
test_image = Image.open(test_image_path).convert("RGB")
test_width, test_height = test_image.size
padded_test_image, padding = pad_image(test_image, patch_size)
padded_width, padded_height = padded_test_image.size


In [9]:
test_patches = []
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  ### ADDED HERE

for y in range(0, padded_height, patch_size):
    for x in range(0, padded_width, patch_size):
        patch = padded_test_image.crop((x, y, x + patch_size, y + patch_size))
        lr_patch = resize(patch, (patch_size // scale_factor, patch_size // scale_factor), interpolation=Image.BILINEAR)
        test_patches.append(normalize(transforms.ToTensor()(lr_patch)).unsqueeze(0))  



In [10]:
import matplotlib.pyplot as plt
from tqdm import tqdm

# Normalization and Denormalization Functions
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  ### ADDED HERE
def denormalize(tensor):  ### ADDED HERE
    return tensor * 0.5 + 0.5  # Denormalize from [-1, 1] to [0, 1]


In [11]:
import os
import torch
from torchvision.transforms.functional import pad, to_pil_image, to_tensor
from PIL import Image
import matplotlib.pyplot as plt

os.makedirs("inference_results_comparison", exist_ok=True)

def pad_image(image, patch_size):
    """Pad the image to make dimensions divisible by patch size."""
    width, height = image.size
    pad_width = (patch_size - width % patch_size) % patch_size
    pad_height = (patch_size - height % patch_size) % patch_size
    padding = (0, 0, pad_width, pad_height)  
    padded_image = pad(image, padding, fill=0)
    return padded_image, padding



In [None]:
import os
import torch
import math
import torch.nn.functional as F
from torchvision.transforms.functional import pad, resize, to_pil_image, to_tensor
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# Define normalization and denormalization
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

def denormalize(tensor):
    """Denormalize the tensor from [-1, 1] to [0, 1]."""
    return torch.clamp(tensor * 0.5 + 0.5, 0, 1)

def calculate_psnr(img1, img2):
    """Calculate PSNR between two images."""
    mse = torch.mean((img1 - img2) ** 2)
    if mse == 0:
        return float('inf')
    return 20 * math.log10(1.0 / math.sqrt(mse))

def pad_image(image, patch_size):
    """Pad image to make dimensions divisible by patch size."""
    width, height = image.size
    pad_width = (patch_size - width % patch_size) % patch_size
    pad_height = (patch_size - height % patch_size) % patch_size
    padding = (0, 0, pad_width, pad_height)
    padded_image = pad(image, padding, fill=0)
    return padded_image, padding


In [21]:
import os
import torch
import math
import torch.nn.functional as F
from torchvision.transforms.functional import pad, resize, to_pil_image, to_tensor
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

def denormalize(tensor):
    return torch.clamp(tensor * 0.5 + 0.5, 0, 1)

def calculate_psnr(img1, img2):
    mse = torch.mean((img1 - img2) ** 2)
    if mse == 0:
        return float('inf') 
    return 20 * math.log10(1.0 / math.sqrt(mse))

def pad_image(image, patch_size):
    width, height = image.size
    pad_width = (patch_size - width % patch_size) % patch_size
    pad_height = (patch_size - height % patch_size) % patch_size
    padding = (0, 0, pad_width, pad_height)
    padded_image = pad(image, padding, fill=0)
    return padded_image, padding

def perform_resnet_inference_comparison(
    model_path, image_paths, output_dir="resnet_inference_results", patch_size=128, scale_factor=2
):
    os.makedirs(output_dir, exist_ok=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ResNetSuperResolution(upscale_factor=scale_factor).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()

    for image_path in image_paths:
        image_name = os.path.splitext(os.path.basename(image_path))[0]
        image = Image.open(image_path).convert("RGB")
        original_width, original_height = image.size

        padded_image, _ = pad_image(image, patch_size)
        padded_width, padded_height = padded_image.size

        lr_image = resize(padded_image, (padded_height // scale_factor, padded_width // scale_factor), interpolation=Image.BILINEAR)
        lr_tensor = normalize(to_tensor(lr_image)).unsqueeze(0).to(device)
        hr_tensor = normalize(to_tensor(padded_image)).unsqueeze(0).to(device) 

        with torch.no_grad():
            sr_tensor = model(lr_tensor).to(device).squeeze(0) 
            bilinear_tensor = F.interpolate(
                lr_tensor, scale_factor=scale_factor, mode="bilinear", align_corners=False
            ).to(device).squeeze(0)  

        sr_psnr = calculate_psnr(denormalize(sr_tensor), denormalize(hr_tensor.squeeze(0)))
        bilinear_psnr = calculate_psnr(bilinear_tensor, hr_tensor.squeeze(0))

        lr_image = to_pil_image(denormalize(lr_tensor.squeeze(0).cpu()))
        sr_image = to_pil_image(denormalize(sr_tensor.cpu()))
        bilinear_image = to_pil_image(denormalize(bilinear_tensor.cpu()))
        hr_image = to_pil_image(denormalize(hr_tensor.squeeze(0).cpu()))

        plt.figure(figsize=(20, 10))

        plt.subplot(1, 4, 1)
        plt.title("Low-Resolution Input")
        plt.imshow(lr_image)
        plt.axis("off")

        plt.subplot(1, 4, 2)
        plt.title(f"Bilinear PSNR: {bilinear_psnr:.2f} dB")
        plt.imshow(bilinear_image)
        plt.axis("off")

        plt.subplot(1, 4, 3)
        plt.title(f"ResNet PSNR: {sr_psnr:.2f} dB")
        plt.imshow(sr_image)
        plt.axis("off")

        plt.subplot(1, 4, 4)
        plt.title("Original High-Resolution")
        plt.imshow(hr_image)
        plt.axis("off")

        plt.tight_layout()
        output_path = os.path.join(output_dir, f"{image_name}_comparison.png")
        plt.savefig(output_path, dpi=300, bbox_inches="tight")
        plt.close()

        print(f"Saved comparison for '{image_name}' to {output_path}")
        print(f"PSNR Results: Bilinear = {bilinear_psnr:.2f} dB, ResNet = {sr_psnr:.2f} dB")


perform_resnet_inference_comparison(
    model_path="chkt_resnet_combined/superres_epoch_7.pth",
    image_paths=image_list,
    output_dir="resnet_inference_results",
    patch_size=128,
    scale_factor=2
)


  model.load_state_dict(torch.load(model_path, map_location=device))


Saved comparison for '0745' to resnet_inference_results\0745_comparison.png
PSNR Results: Bilinear = 23.29 dB, ResNet = 20.05 dB
Saved comparison for '0746' to resnet_inference_results\0746_comparison.png
PSNR Results: Bilinear = 27.64 dB, ResNet = 18.50 dB
Saved comparison for '0747' to resnet_inference_results\0747_comparison.png
PSNR Results: Bilinear = 20.17 dB, ResNet = 19.04 dB
Saved comparison for '0748' to resnet_inference_results\0748_comparison.png
PSNR Results: Bilinear = 17.97 dB, ResNet = 18.95 dB
Saved comparison for '0749' to resnet_inference_results\0749_comparison.png
PSNR Results: Bilinear = 28.43 dB, ResNet = 20.75 dB
Saved comparison for 'HR_output' to resnet_inference_results\HR_output_comparison.png
PSNR Results: Bilinear = 18.51 dB, ResNet = 18.71 dB
Saved comparison for '1702053632133310' to resnet_inference_results\1702053632133310_comparison.png
PSNR Results: Bilinear = 17.66 dB, ResNet = 19.71 dB
