All logs are saved at : MyDrive/Final_project/LOGS.zip

# 1 Upload Files

In [None]:
from google.colab import files, drive
import zipfile
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import altair as alt
from scipy.stats import wilcoxon, ttest_rel
import scipy.stats as stats

# 2 Upload logs and make clean csv from each log

In [None]:
drive.mount('/content/drive')

zip_path = "/content/drive/MyDrive/Final_project/LOGS.zip"
extract_path = "/content/originalLOGS"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

Mounted at /content/drive


In [None]:
outputLogs_dir = "/content/outputLogs"
os.makedirs(outputLogs_dir, exist_ok=True)

## 2.1 Real_esrgan

In [None]:
log_file = "/content/originalLOGS/outputLog/RealEsrganOutputLog.log"

# Regex for training lines
train_pattern = re.compile(
    r'^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+\s+INFO:\s+\[finet\.\.\]'
    r'\[epoch:\s+(?P<epoch>\d+),\s+iter:\s+(?P<iter>[\d,]+),\s+lr:\((?P<lr>[\d.eE+\-]+),\)\]\s+'
    r'\[eta:\s+(?P<eta>[\d:\- ]+),\s+time \(data\):\s+(?P<time_data>[\d.]+)\s+\((?P<time_data_aux>[\d.]+)\)\]\s+'
    r'l_g_pix:\s+(?P<l_g_pix>[\d.eE+\-]+)\s+l_g_percep:\s+(?P<l_g_percep>[\d.eE+\-]+)\s+'
    r'l_g_gan:\s+(?P<l_g_gan>[\d.eE+\-]+)\s+l_d_real:\s+(?P<l_d_real>[\d.eE+\-]+)\s+'
    r'out_d_real:\s+(?P<out_d_real>[\d.eE+\-]+)\s+l_d_fake:\s+(?P<l_d_fake>[\d.eE+\-]+)\s+'
    r'out_d_fake:\s+(?P<out_d_fake>[\d.eE+\-]+)',
    re.MULTILINE
)

# Regex for the 3-line validation block
val_pattern = re.compile(
    r'Validation[^\n]*\n\s*'
    r'# psnr:\s+(?P<psnr>[\d.]+)\s+Best:\s+(?P<best_psnr>[\d.]+)\s+@ (?P<psnr_iter>[\d,]+) iter?\s*\n\s*'
    r'# ssim:\s+(?P<ssim>[\d.]+)\s+Best:\s+(?P<best_ssim>[\d.]+)\s+@ (?P<ssim_iter>[\d,]+) iter?\s*\n\s*'
    r'# niqe:\s+(?P<niqe>[\d.]+)\s+Best:\s+(?P<best_niqe>[\d.]+)\s+@ (?P<niqe_iter>[\d,]+)\s+ite?r?',
    re.IGNORECASE
)

# Read log lines
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
    lines = f.readlines()

records = []
i = 0
n = len(lines)

while i < n:
    line = lines[i]
    m_train = train_pattern.search(line)
    if m_train:
        rec = m_train.groupdict()

        # Defaults for validation metrics
        rec.update({
            "psnr": None, "ssim": None, "niqe": None,
            "best_psnr": None, "best_ssim": None, "best_niqe": None
        })

        # Look ahead until next training line or EOF
        lookahead_lines = []
        j = i + 1
        while j < n and not train_pattern.search(lines[j]):
            lookahead_lines.append(lines[j])
            j += 1

        lookahead_text = "".join(lookahead_lines)
        m_val = val_pattern.search(lookahead_text)
        if m_val:
            vd = m_val.groupdict()
            for k in ["psnr", "ssim", "niqe", "best_psnr", "best_ssim", "best_niqe"]:
                rec[k] = vd.get(k)

        records.append(rec)

    i += 1

# Build DataFrame
df_logs = pd.DataFrame.from_records(records)

# Remove thousand separators and convert numerics
def to_num(x):
    if isinstance(x, str):
        x = x.replace(",", "")
    return pd.to_numeric(x, errors="coerce")

numeric_cols = [
    "epoch", "iter", "lr", "time_data", "time_data_aux",
    "l_g_pix", "l_g_percep", "l_g_gan", "l_d_real", "out_d_real", "l_d_fake", "out_d_fake",
    "psnr", "ssim", "niqe", "best_psnr", "best_ssim", "best_niqe"
]

# Ensure iter is kept from TRAINING
df_logs["iter"] = df_logs["iter"].astype(str).str.replace(",", "", regex=False)
df_logs["iter"] = pd.to_numeric(df_logs["iter"], errors="coerce").astype("Int64")

for c in numeric_cols:
    if c in df_logs.columns and c != "iter":
        df_logs[c] = df_logs[c].apply(to_num)

# Save CSV
out_path = "/content/outputLogs/RealEsrgan_Log.csv"
df_logs.to_csv(out_path, index=False)

print("Saved to:", out_path)
print(df_logs.tail())

Saved to: /content/outputLogs/RealEsrgan_Log.csv
              timestamp  epoch  iter       lr      eta  time_data  \
44  2025-09-01 13:10:42      2  4500  0.00001  0:10:57      0.188   
45  2025-09-01 13:12:52      2  4600  0.00001  0:08:45      0.188   
46  2025-09-01 13:15:03      2  4700  0.00001  0:06:33      0.190   
47  2025-09-01 13:17:14      2  4800  0.00001  0:04:22      0.193   
48  2025-09-01 13:19:25      2  4900  0.00001  0:02:10      0.188   

    time_data_aux   l_g_pix  l_g_percep   l_g_gan  l_d_real  out_d_real  \
44          0.003  0.103860     0.95664  0.042772   0.49920    0.567260   
45          0.003  0.041040     0.60353  0.062610   0.77790   -0.080664   
46          0.003  0.071234     0.71287  0.048641   0.58410    0.551620   
47          0.003  0.177890     1.44270  0.038565   0.36353    1.468600   
48          0.003  0.060181     0.74335  0.056393   0.72768    0.060011   

    l_d_fake  out_d_fake     psnr    ssim    niqe  best_psnr  best_ssim  \
44   0.601

## 2.2 BSRGAN

In [None]:
# Path to log file
log_file = "/content/originalLOGS/outputLog/bsrganOutputLog.log"

# Regex for training lines
train_pattern = re.compile(
    r'^'                                   # start of line
    r'(?P<timestamp>\d{2}-\d{2}-\d{2} '    # date (yy-mm-dd)
    r'\d{2}:\d{2}:\d{2}\.\d+)'             # time (hh:mm:ss.mmm)
    r'\s+: <epoch:\s+(?P<epoch>\d+),'      # epoch number
    r'\s+iter:\s+(?P<iter>[\d,]+),'        # iteration number
    r'\s+lr:(?P<lr>[\d.eE+\-]+),'          # learning rate
    r'\s+iter_time:(?P<iter_time>[\d.]+)s>'# iteration time
    r'\s+G_loss:\s+(?P<G_loss>[\d.eE+\-]+)'# generator loss
    r'\s+F_loss:\s+(?P<F_loss>[\d.eE+\-]+)'# feature loss
    r'\s+D_loss:\s+(?P<D_loss>[\d.eE+\-]+)'# discriminator loss
    r'\s+D_real:\s+(?P<D_real>[\d.eE+\-]+)'# discriminator real score
    r'\s+D_fake:\s+(?P<D_fake>[\d.eE+\-]+)'# discriminator fake score
)

# Regex for validation lines
val_pattern = re.compile(
    r'^'                                   # start of line
    r'(?P<timestamp>\d{2}-\d{2}-\d{2} '    # date (yy-mm-dd)
    r'\d{2}:\d{2}:\d{2}\.\d+)'             # time (hh:mm:ss.mmm)
    r'\s+: <epoch:\s+(?P<epoch>\d+),'      # epoch number
    r'\s+iter:\s+(?P<iter>[\d,]+),'        # iteration number
    r'\s+Avg PSNR:\s+(?P<psnr>[\d.]+)dB,'  # validation PSNR
    r'\s+Avg SSIM:\s+(?P<ssim>[\d.]+),'    # validation SSIM
    r'\s+Avg NIQE:\s+(?P<niqe>[\d.]+)>'    # validation NIQE
)

# Read file
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
    lines = f.readlines()

train_records = []
val_records = []

for line in lines:
    m_train = train_pattern.search(line)
    m_val = val_pattern.search(line)

    if m_train:
        rec = m_train.groupdict()
        train_records.append(rec)

    elif m_val:
        rec = m_val.groupdict()
        val_records.append(rec)

# Convert to DataFrames
df_train = pd.DataFrame(train_records)
df_val = pd.DataFrame(val_records)

# Clean up numbers
def to_num(x):
    if isinstance(x, str):
        x = x.replace(",", "")
    return pd.to_numeric(x, errors="coerce")

for df in [df_train, df_val]:
    for col in df.columns:
        if col not in ["timestamp"]:
            df[col] = df[col].apply(to_num)

# Merge training and validation on epoch + iter
df_logs = pd.merge(df_train, df_val, on=["epoch", "iter"], suffixes=("_train", "_val"), how="left")

# Final reordering
df_logs = df_logs[[
    "timestamp_train", "epoch", "iter", "lr", "iter_time",
    "G_loss", "F_loss", "D_loss", "D_real", "D_fake",
    "psnr", "ssim", "niqe", "timestamp_val"
]]

# Save CSV
out_path = "/content/outputLogs/Bsrgan_Log.csv"
df_logs.to_csv(out_path, index=False)

print("Saved to:", out_path)
print(df_logs.head())


Saved to: /content/outputLogs/Bsrgan_Log.csv
         timestamp_train  epoch  iter        lr  iter_time   G_loss  F_loss  \
0  25-09-08 13:19:51.703      0   100  0.000005      0.596  0.05047   9.607   
1  25-09-08 13:22:51.002      0   200  0.000005      0.597  0.20270  21.700   
2  25-09-08 13:25:49.127      0   300  0.000005      0.603  0.04305   8.097   
3  25-09-08 13:28:49.798      0   400  0.000005      0.597  0.14390  16.950   
4  25-09-08 13:31:47.757      0   500  0.000005      0.603  0.05533   8.931   

   D_loss  D_real  D_fake   psnr    ssim  niqe          timestamp_val  
0  0.3918  0.3809  0.3844  17.52  0.4073  5.69  25-09-08 13:21:48.129  
1  0.3129  0.4612  0.4694  17.45  0.4156  6.08  25-09-08 13:24:46.525  
2  0.3751  0.4141  0.4022  17.46  0.4152  6.09  25-09-08 13:27:47.069  
3  0.3065  0.7450  0.4671  17.60  0.4193  6.10  25-09-08 13:30:44.603  
4  0.5594  0.2831  0.2548  17.50  0.4114  6.06  25-09-08 13:33:43.865  


## 2.3 SwinIR

In [None]:
log_file = "/content/originalLOGS/outputLog/SwinIROutputLog.log"

# Training regex (some metrics optional)
train_pattern = re.compile(
    r'^(?P<timestamp>\d{2}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)'
    r'\s+: <epoch:\s+(?P<epoch>\d+),'
    r'\s+iter:\s+(?P<iter>[\d,]+),'
    r'\s+lr:(?P<lr>[\d.eE+\-]+),'
    r'\s+iter_time:(?P<iter_time>[\d.]+)s>'
    r'\s+G_loss:\s+(?P<G_loss>[\d.eE+\-]+)'
    r'(?:\s+F_loss:\s+(?P<F_loss>[\d.eE+\-]+))?'
    r'(?:\s+D_loss:\s+(?P<D_loss>[\d.eE+\-]+))?'
    r'(?:\s+D_real:\s+(?P<D_real>[\d.eE+\-]+))?'
    r'(?:\s+D_fake:\s+(?P<D_fake>[\d.eE+\-]+))?'
)

# Validation regex
val_pattern = re.compile(
    r'^(?P<timestamp>\d{2}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+)'
    r'\s+: <epoch:\s+(?P<epoch>\d+),'
    r'\s+iter:\s+(?P<iter>[\d,]+),'
    r'\s+Avg PSNR:\s+(?P<psnr>[\d.]+)dB,'
    r'\s+Avg SSIM:\s+(?P<ssim>[\d.]+),'
    r'\s+Avg NIQE:\s+(?P<niqe>[\d.]+)>'
)

train_records = []
val_records = []

with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
    for line in f:
        m_train = train_pattern.search(line)
        m_val = val_pattern.search(line)

        if m_train:
            rec = m_train.groupdict()
            train_records.append(rec)
        elif m_val:
            rec = m_val.groupdict()
            val_records.append(rec)

# Convert to DataFrames
df_train = pd.DataFrame(train_records)
df_val = pd.DataFrame(val_records)

# Convert numeric columns
def to_num(x):
    if isinstance(x, str):
        x = x.replace(",", "")
    return pd.to_numeric(x, errors="coerce")

for col in df_train.columns:
    if col not in ["timestamp"]:
        df_train[col] = df_train[col].apply(to_num)

for col in df_val.columns:
    if col not in ["timestamp"]:
        df_val[col] = df_val[col].apply(to_num)

# Merge training and validation on epoch & iter
df_logs = pd.merge(df_train, df_val, on=["epoch","iter"], how="outer", suffixes=("_train","_val"))

# Sort
df_logs = df_logs.sort_values(by=["epoch","iter"]).reset_index(drop=True)

# Save
out_path = "/content/outputLogs/SwinIR_Log.csv"
df_logs.to_csv(out_path, index=False)

print("Saved to:", out_path)
print(df_logs.head(10))


Saved to: /content/outputLogs/SwinIR_Log.csv
         timestamp_train  epoch  iter        lr  iter_time   G_loss  F_loss  \
0  25-09-17 06:59:09.342      0   100  0.000010      0.772  0.05342   9.142   
1  25-09-17 07:02:29.741      0   200  0.000010      0.770  0.10920  11.740   
2  25-09-17 07:05:50.791      0   300  0.000010      0.785  0.09241  11.790   
3  25-09-17 07:09:12.486      0   400  0.000010      0.773  0.07045  11.560   
4  25-09-17 07:12:34.762      0   500  0.000010      0.778  0.09894  11.360   
5  25-09-17 07:15:58.338      0   600  0.000010      0.775  0.07217   9.135   
6  25-09-17 07:19:21.484      0   700  0.000010      0.774  0.04442   8.454   
7  25-09-17 07:22:43.389      0   800  0.000010      0.780  0.09554  11.400   
8  25-09-17 07:26:05.179      0   900  0.000010      0.768  0.10020  11.180   
9  25-09-17 07:29:23.611      0  1000  0.000006      0.775  0.09990  14.220   

    D_loss    D_real    D_fake          timestamp_val   psnr    ssim  niqe  
0  0.069

## 2.4 Hybrid Attention Transformer

In [None]:
log_file = "/content/originalLOGS/outputLog/hatOutputLog.log"

# Regex patterns for each metric (training)
train_patterns = {
    "timestamp": r'^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d+)',
    "epoch": r'epoch:\s*(?P<epoch>\d+)',
    "iter": r'iter:\s*(?P<iter>[\d,]+)',
    "lr": r'lr:\((?P<lr>[\d.eE+\-]+),?\)',
    "time_data_avg": r'time \(data\):\s*(?P<time_data_avg>[\d.eE+\-]+)',
    "time_data_recent": r'time \(data\):\s*[\d.eE+\-]+\s*\((?P<time_data_recent>[\d.eE+\-]+)\)',
    "l_g_pix": r'l_g_pix:\s*(?P<l_g_pix>[\d.eE+\-]+)',
    "l_g_percep": r'l_g_percep:\s*(?P<l_g_percep>[\d.eE+\-]+)',
    "l_g_gan": r'l_g_gan:\s*(?P<l_g_gan>[\d.eE+\-]+)',
    "l_d_real": r'l_d_real:\s*(?P<l_d_real>[\d.eE+\-]+)',
    "out_d_real": r'out_d_real:\s*(?P<out_d_real>[\d.eE+\-]+)',
    "l_d_fake": r'l_d_fake:\s*(?P<l_d_fake>[\d.eE+\-]+)',
    "out_d_fake": r'out_d_fake:\s*(?P<out_d_fake>[\d.eE+\-]+)'
}

# Regex patterns for validation
val_patterns = {
    "timestamp": r'^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d+)',
    "psnr": r'# psnr:\s*(?P<psnr>[\d.]+)',
    "ssim": r'# ssim:\s*(?P<ssim>[\d.]+)',
    "niqe": r'# niqe:\s*(?P<niqe>[\d.]+)'
}

# Read file
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
    lines = f.readlines()

train_records = []
val_records = []

for i, line in enumerate(lines):
    # Check if it's a training line
    if "[train" in line:
        rec = {}
        for key, pattern in train_patterns.items():
            m = re.search(pattern, line)
            if m:
                if key not in ["timestamp"]:
                    rec[key] = float(m.group(key).replace(",", ""))
                else:
                    rec[key] = m.group(key)
            else:
                rec[key] = None
        train_records.append(rec)

    # Check if it's a validation line
    elif "Validation" in line:
        rec = {}
        val_text = line + "\n" + "".join(lines[i+1:i+4])
        for key, pattern in val_patterns.items():
            m = re.search(pattern, val_text)
            if m:
                if key != "timestamp":
                    rec[key] = float(m.group(key))
                else:
                    rec[key] = m.group(key)
            else:
                rec[key] = None
        # Attach epoch and iter from last training line
        if train_records:
            rec["epoch"] = train_records[-1]["epoch"]
            rec["iter"] = train_records[-1]["iter"]
        val_records.append(rec)

# Convert to DataFrames
df_train = pd.DataFrame(train_records)
df_val = pd.DataFrame(val_records)

# Merge on epoch and iter
df_logs = pd.merge(df_train, df_val, on=["epoch","iter"], how="outer", suffixes=("_train","_val"))

# Sort
df_logs = df_logs.sort_values(by=["epoch","iter"]).reset_index(drop=True)

# Save CSV
out_path = "/content/outputLogs/Hat_Log.csv"
df_logs.to_csv(out_path, index=False)
print("Saved to:", out_path)
print(df_logs.head())


Saved to: /content/outputLogs/Hat_Log.csv
           timestamp_train  epoch   iter       lr  time_data_avg  \
0  2025-09-12 13:16:16,625    0.0  100.0  0.00002          1.158   
1  2025-09-12 13:20:19,799    0.0  200.0  0.00002          1.122   
2  2025-09-12 13:24:20,145    0.0  300.0  0.00002          1.084   
3  2025-09-12 13:28:20,858    0.0  400.0  0.00002          1.085   
4  2025-09-12 13:32:24,511    0.0  500.0  0.00002          1.085   

   time_data_recent   l_g_pix  l_g_percep   l_g_gan  l_d_real  out_d_real  \
0             0.005  0.104740      13.928  0.073276   0.72195   -0.055797   
1             0.004  0.094066      12.352  0.073061   0.68575    0.022554   
2             0.004  0.069184      11.141  0.067326   0.63981    0.179600   
3             0.004  0.165040      18.358  0.075634   0.44016    0.788980   
4             0.004  0.076404      11.122  0.105400   0.94561   -0.442860   

   l_d_fake  out_d_fake            timestamp_val     psnr    ssim    niqe  
0   0.6551

## 2.5 Combined Log for GPU

In [None]:
path = "/content/originalLOGS/gpuLog"
models = ["bsrgan", "hat", "realesrgan", "swinir"]

df_all = []

for model in models:
    mem_file = os.path.join(path, f"{model}MemoryMB.csv")
    pow_file = os.path.join(path, f"{model}PowerWatts.csv")

    df_mem = pd.read_csv(mem_file, usecols=["Relative Time (Process)", "memoryMB"])
    df_pow = pd.read_csv(pow_file, usecols=["Relative Time (Process)", "powerWatts"])

    # Rename time col for clarity before merge
    df_mem = df_mem.rename(columns={"Relative Time (Process)": "time"})
    df_pow = df_pow.rename(columns={"Relative Time (Process)": "time"})

    # Convert time to numeric (just in case it's string)
    df_mem["time"] = pd.to_numeric(df_mem["time"], errors="coerce")
    df_pow["time"] = pd.to_numeric(df_pow["time"], errors="coerce")

    # Merge on nearest time
    df = pd.merge_asof(
        df_mem.sort_values("time"),
        df_pow.sort_values("time"),
        on="time",
        direction="nearest",
        tolerance=0.1  # allow small mismatch (adjust if needed)
    )

    df["model"] = model
    df_all.append(df)

df_final = pd.concat(df_all, ignore_index=True)
out_path = "/content/outputLogs/combined_gpu_log.csv"
df_final.to_csv(out_path, index=False)

print("Combined CSV saved to:", out_path)
print(df_final.head())


Combined CSV saved to: /content/outputLogs/combined_gpu_log.csv
        time     memoryMB  powerWatts   model
0  15.407947  1485.859375      79.203  bsrgan
1  30.404281  1486.023438      73.621  bsrgan
2  45.407688  1486.140625      69.594  bsrgan
3  60.412879  1486.203125      72.045  bsrgan
4  75.404697  1573.390625      30.517  bsrgan


# 3 Metrics trends through iterations

In [None]:
# File paths
HAT_log = "/content/outputLogs/Hat_Log.csv"
Swin_log = "/content/outputLogs/SwinIR_Log.csv"
BSRGAN_log = "/content/outputLogs/Bsrgan_Log.csv"
RealESRGAN_log = "/content/outputLogs/RealEsrgan_Log.csv"
GPU_log = "/content/outputLogs/combined_gpu_log.csv"

In [None]:
#create clean dataframe with metrics and models
# Load CSVs
df_hat = pd.read_csv(HAT_log)[["iter", "psnr", "ssim", "niqe"]].copy()
df_swin = pd.read_csv(Swin_log)[["iter", "psnr", "ssim", "niqe"]].copy()
df_bsrg = pd.read_csv(BSRGAN_log)[["iter", "psnr", "ssim", "niqe"]].copy()
df_real = pd.read_csv(RealESRGAN_log)[["iter", "psnr", "ssim", "niqe"]].copy()

# Add a column to identify the model
df_hat["model"] = "HAT"
df_swin["model"] = "SwinIR"
df_bsrg["model"] = "BSRGAN"
df_real["model"] = "RealESRGAN"

# Combine into a single dataframe
df_all = pd.concat([df_hat, df_swin, df_bsrg, df_real], ignore_index=True)

# Ensure 'iter' is numeric
df_all["iter"] = pd.to_numeric(df_all["iter"], errors="coerce")

print(df_all.head())

    iter     psnr    ssim    niqe model
0  100.0  19.2082  0.4665  6.5025   HAT
1  200.0  19.2266  0.4680  6.4696   HAT
2  300.0  19.2429  0.4695  6.4122   HAT
3  400.0  19.2584  0.4710  6.3684   HAT
4  500.0  19.2689  0.4722  6.3462   HAT


In [None]:
chart_width = 800

# PSNR line chart
chart_psnr = alt.Chart(df_all).mark_line(point=True).encode(
    x='iter:Q',
    y=alt.Y('psnr:Q', scale=alt.Scale(zero=False)),
    color='model:N',
    tooltip=['model', 'iter', 'psnr']
).properties(
    title='PSNR over iterations',
    width=chart_width
).interactive()

# SSIM line chart
chart_ssim = alt.Chart(df_all).mark_line(point=True).encode(
    x='iter:Q',
    y=alt.Y('ssim:Q', scale=alt.Scale(zero=False)),
    color='model:N',
    tooltip=['model', 'iter', 'ssim']
).properties(
    title='SSIM over iterations',
    width=chart_width
).interactive()

# NIQE line chart
chart_niqe = alt.Chart(df_all).mark_line(point=True).encode(
    x='iter:Q',
    y=alt.Y('niqe:Q', scale=alt.Scale(zero=False)),
    color='model:N',
    tooltip=['model', 'iter', 'niqe']
).properties(
    title='NIQE over iterations',
    width=chart_width
).interactive()

# Display charts vertically
chart_psnr & chart_ssim & chart_niqe


# 4 Convergence Analysis via rate of change of metrics per iteration

In [None]:
bin_size = 500
df_all['bin'] = (df_all['iter'] // bin_size) * bin_size

# Compute gradients
df_all = df_all.sort_values(['model','iter'])
df_all['psnr_grad'] = df_all.groupby('model')['psnr'].diff()
df_all['ssim_grad'] = df_all.groupby('model')['ssim'].diff()
df_all['niqe_grad'] = df_all.groupby('model')['niqe'].diff()

# Count how many records per model+bin
bin_counts = df_all.groupby(['model','bin']).size().reset_index(name='count')

# Keep only bins with more than 1 entry
valid_bins = bin_counts.query("count > 1")[['model','bin']]

# Aggregate by bin
df_convergence_stats = (
    df_all.merge(valid_bins, on=['model','bin'], how='inner')
    .groupby(['model','bin'])
    .agg(
        mean_psnr_grad=('psnr_grad','mean'),
        std_psnr_grad=('psnr_grad','std'),
        min_psnr_grad=('psnr_grad','min'),
        max_psnr_grad=('psnr_grad','max'),

        mean_ssim_grad=('ssim_grad','mean'),
        std_ssim_grad=('ssim_grad','std'),
        min_ssim_grad=('ssim_grad','min'),
        max_ssim_grad=('ssim_grad','max'),

        mean_niqe_grad=('niqe_grad','mean'),
        std_niqe_grad=('niqe_grad','std'),
        min_niqe_grad=('niqe_grad','min'),
        max_niqe_grad=('niqe_grad','max')
    )
    .reset_index()
)

# Fill NaNs if any
df_convergence_stats = df_convergence_stats.fillna(0)

In [None]:
#create clustered bar charts
# Common tooltip fields
tooltips = [
    'model:N',
    'bin:Q',
    'mean_psnr_grad:Q',
    'std_psnr_grad:Q',
    'mean_ssim_grad:Q',
    'std_ssim_grad:Q',
    'mean_niqe_grad:Q',
    'std_niqe_grad:Q'
]

# Function to create side-by-side bars per bin
def make_bar_chart(metric, title):
    bars = alt.Chart(df_convergence_stats).mark_bar(opacity=0.7).encode(
        x=alt.X('bin:O', title='Iteration Bin'),  # treat bin as ordinal for neat spacing
        y=alt.Y(f'{metric}:Q', title=title),
        color='model:N',
        xOffset='model:N',  # side-by-side bars for each model
        tooltip=tooltips
    ).properties(width=500, height=300)

    zero = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='red').encode(y='y:Q')

    return bars + zero

# Build faceted chart
final_chart = alt.hconcat(
    make_bar_chart('mean_psnr_grad', 'PSNR Gradient'),
    make_bar_chart('mean_ssim_grad', 'SSIM Gradient'),
    make_bar_chart('mean_niqe_grad', 'NIQE Gradient')
).resolve_scale(color='shared')

final_chart


## 4.1 Hypothesis testing using standard deviations of 'rate of change' per model

In [None]:
#standard deviation of the rate of change is used rather than the metrics themselves to capture how
#stable and consistent the model’s improvement is during training.

# Compute gradient std per model
grad_std = df_all.groupby('model')[['psnr_grad','ssim_grad','niqe_grad']].std().reset_index()

# Compute confidence score (normalize so higher std = lower confidence)
for metric in ['psnr_grad','ssim_grad','niqe_grad']:
    max_std = grad_std[metric].max()
    grad_std[f'conf_{metric}'] = 1 - (grad_std[metric] / max_std) # scale between 0 and 1

# Overall confidence as mean of metrics
grad_std['overall_conf'] = grad_std[['conf_psnr_grad','conf_ssim_grad','conf_niqe_grad']].mean(axis=1)

grad_std

Unnamed: 0,model,psnr_grad,ssim_grad,niqe_grad,conf_psnr_grad,conf_ssim_grad,conf_niqe_grad,overall_conf
0,BSRGAN,0.061241,0.003993,0.103013,0.0,0.0,0.0,0.0
1,HAT,0.008689,0.000608,0.016234,0.858115,0.847678,0.84241,0.849401
2,RealESRGAN,0.01532,0.001294,0.03884,0.74985,0.675798,0.622964,0.682871
3,SwinIR,0.041995,0.002202,0.081631,0.314266,0.448601,0.207569,0.323479


# 5 Overfitting Analysis

In [None]:
chart_width = 500

# Filter for HAT and RealESRGAN only
df_filtered = df_all[df_all['model'].isin(['HAT', 'RealESRGAN'])]

# PSNR line chart
chart_psnr = alt.Chart(df_filtered).mark_line(point=True).encode(
    x='iter:Q',
    y=alt.Y('psnr:Q', scale=alt.Scale(zero=False)),
    color='model:N',
    tooltip=['model', 'iter', 'psnr']
).properties(
    title='PSNR over iterations',
    width=chart_width
).interactive()

# SSIM line chart
chart_ssim = alt.Chart(df_filtered).mark_line(point=True).encode(
    x='iter:Q',
    y=alt.Y('ssim:Q', scale=alt.Scale(zero=False)),
    color='model:N',
    tooltip=['model', 'iter', 'ssim']
).properties(
    title='SSIM over iterations',
    width=chart_width
).interactive()

# NIQE line chart
chart_niqe = alt.Chart(df_filtered).mark_line(point=True).encode(
    x='iter:Q',
    y=alt.Y('niqe:Q', scale=alt.Scale(zero=False)),
    color='model:N',
    tooltip=['model', 'iter', 'niqe']
).properties(
    title='NIQE over iterations',
    width=chart_width
).interactive()

# Display charts horizontally
final_chart = alt.hconcat(chart_psnr, chart_ssim, chart_niqe).resolve_scale(color='shared')

final_chart


## 5.1 hypothesis to assess statistical significance

In [None]:
#prior to this step, the generator from overfitting and accurate chekpoint should be tested
#using the Testing_with_validation_metrics.ipynb. Results should be saved to hat_realesrgan_test_results.txt
#and uploaded manually on colab

# Load data
df = pd.read_csv("/content/hat_realesrgan_test_results.txt", sep="\t")

results = []

# Loop over models and metrics
for model in df['Model'].unique():
    df_model = df[df['Model'] == model]
    for metric in ['PSNR', 'SSIM', 'NIQE']:
        best = df_model[f'{metric}_Best']
        overfit = df_model[f'{metric}_Overfit']

        # Mean difference
        mean_best = best.mean()
        mean_overfit = overfit.mean()
        mean_diff = (overfit - best).mean()

        # Paired t-test
        t_stat, t_p = ttest_rel(overfit, best)

        # Wilcoxon signed-rank test
        w_stat, w_p = wilcoxon(overfit, best)

        results.append({
            'Model': model,
            'Metric': metric,
            'Mean_Best': mean_best,
            'Mean_Overfit': mean_overfit,
            'Mean_Diff': mean_diff,
            't_stat': t_stat,
            't_p_value': t_p,
            'Wilcoxon_Stat': w_stat,
            'Wilcoxon_p_value': w_p
        })

# Convert to DataFrame
df_stats = pd.DataFrame(results)

print(df_stats)

        Model Metric  Mean_Best  Mean_Overfit  Mean_Diff    t_stat  t_p_value  \
0  RealESRGAN   PSNR   17.93868      17.69004   -0.24864 -2.592107   0.060549   
1  RealESRGAN   SSIM    0.43242       0.42666   -0.00576 -1.356794   0.246366   
2  RealESRGAN   NIQE    5.90628       5.99242    0.08614  0.803144   0.466902   
3         HAT   PSNR   18.08192      17.66470   -0.41722 -3.264167   0.030959   
4         HAT   SSIM    0.43730       0.43082   -0.00648 -1.460134   0.218035   
5         HAT   NIQE    6.25910       6.46036    0.20126  2.270414   0.085688   

   Wilcoxon_Stat  Wilcoxon_p_value  
0            0.0            0.0625  
1            4.0            0.4375  
2            4.0            0.4375  
3            0.0            0.0625  
4            2.0            0.1875  
5            0.0            0.0625  


# 6 Metric Sensitivity Analysis

In [None]:
#visualise relationship among metrics

def scatter_grid(df):
    # normalise column names to lowercase
    df = df.rename(columns=str.lower)

    pairs = [
        ("psnr", "ssim"),
        ("psnr", "niqe"),
        ("niqe", "ssim")
    ]

    rows = []
    for model in df["model"].unique():
        df_model = df[df["model"] == model]
        plots = []

        for y_metric, x_metric in pairs:
            chart = alt.Chart(df_model).mark_point(size=80, filled=True).encode(
                x=alt.X(x_metric, scale=alt.Scale(domain=[df_model[x_metric].min(), df_model[x_metric].max()])),
                y=alt.Y(y_metric, scale=alt.Scale(domain=[df_model[y_metric].min(), df_model[y_metric].max()]))
            )

            reg = chart.transform_regression(x_metric, y_metric).mark_line()

            plots.append((chart + reg).properties(title=f"{y_metric.upper()} vs {x_metric.upper()}"))

        row = plots[0] | plots[1] | plots[2]
        row = row.properties(title=f"Model: {model}")
        rows.append(row)

    return alt.vconcat(*rows).resolve_scale(x="independent", y="independent")
scatter_grid(df_all)

## 6.1 Quantify metric sensitivity per model using the coefficient of variation

In [None]:
# Compute mean per model
mean_df = df_all.groupby('model')[['psnr','ssim','niqe']].mean().reset_index()
mean_df = mean_df.rename(columns={'psnr':'PSNR_mean', 'ssim':'SSIM_mean', 'niqe':'NIQE_mean'})

# Compute std per model
std_df = df_all.groupby('model')[['psnr','ssim','niqe']].std().reset_index()
std_df = std_df.rename(columns={'psnr':'PSNR_std', 'ssim':'SSIM_std', 'niqe':'NIQE_std'})

# Merge
cv_df = mean_df.merge(std_df, on='model')

# Compute CV
cv_df['PSNR_CV'] = cv_df['PSNR_std'] / cv_df['PSNR_mean']
cv_df['SSIM_CV'] = cv_df['SSIM_std'] / cv_df['SSIM_mean']
cv_df['NIQE_CV'] = cv_df['NIQE_std'] / cv_df['NIQE_mean']

# Keep only model and CVs
cv_df = cv_df[['model','PSNR_CV','SSIM_CV','NIQE_CV']]

print(cv_df)


        model   PSNR_CV   SSIM_CV   NIQE_CV
0      BSRGAN  0.005932  0.021134  0.029622
1         HAT  0.007519  0.012357  0.015741
2  RealESRGAN  0.010227  0.026946  0.038633
3      SwinIR  0.002654  0.005000  0.018399


In [None]:
#Coeffficinet of variation for 'rate of change' in metrics

grad_cols = ['std_psnr_grad','std_ssim_grad','std_niqe_grad']
metrics = ['PSNR','SSIM','NIQE']

results = []

for model in df_convergence_stats['model'].unique():
    df_model = df_convergence_stats[df_convergence_stats['model'] == model]
    for metric, col in zip(metrics, grad_cols):
        # CV of the gradient = std / mean of the gradient (used mean of abs to avoid negative/zero issues)
        grad_vals = df_model[col].values
        mean_grad = np.mean(np.abs(grad_vals))
        std_grad = np.std(grad_vals)
        cv_grad = std_grad / mean_grad if mean_grad != 0 else np.nan

        results.append({
            'model': model,
            'metric': metric,
            'CV_rate': cv_grad
        })

df_grad_cv = pd.DataFrame(results)
print(df_grad_cv)

         model metric   CV_rate
0       BSRGAN   PSNR  0.313042
1       BSRGAN   SSIM  0.216526
2       BSRGAN   NIQE  0.399536
3          HAT   PSNR  0.638757
4          HAT   SSIM  0.710611
5          HAT   NIQE  0.606257
6   RealESRGAN   PSNR  0.531137
7   RealESRGAN   SSIM  0.700936
8   RealESRGAN   NIQE  0.905093
9       SwinIR   PSNR  0.473010
10      SwinIR   SSIM  0.345907
11      SwinIR   NIQE  0.638074


# 7 Stability Analysis

In [None]:
# Load CSVs with loss metrics
df_hat_loss = pd.read_csv(HAT_log)[["iter", "l_g_pix","l_g_percep","l_g_gan","l_d_real","l_d_fake"]].copy()
df_swin_loss = pd.read_csv(Swin_log)[["iter", "G_loss","F_loss","D_loss","D_real","D_fake"]].copy()
df_bsrg_loss = pd.read_csv(BSRGAN_log)[["iter", "G_loss","F_loss","D_loss","D_real","D_fake"]].copy()
df_real_loss = pd.read_csv(RealESRGAN_log)[["iter", "l_g_pix","l_g_percep","l_g_gan","l_d_real","l_d_fake"]].copy()

# Rename columns so all models have same names
df_hat_loss = df_hat_loss.rename(columns={"l_d_real":"D_real", "l_d_fake":"D_fake"})
df_real_loss = df_real_loss.rename(columns={"l_d_real":"D_real", "l_d_fake":"D_fake"})

# Add model identifier
df_hat_loss["model"] = "HAT"
df_real_loss["model"] = "RealESRGAN"
df_swin_loss["model"] = "SwinIR"
df_bsrg_loss["model"] = "BSRGAN"


# Combine all into single dataframe
df_loss_all = pd.concat([df_hat_loss, df_real_loss, df_bsrg_loss, df_swin_loss], ignore_index=True)

# Ensure 'iter' is numeric
df_loss_all["iter"] = pd.to_numeric(df_loss_all["iter"], errors="coerce")

##7.1 Visualise discriminator loss per iteration.

In [None]:
charts = []

for model_name in df_loss_all['model'].unique():
    df_model = df_loss_all[df_loss_all['model'] == model_name]

    # Melt D_real and D_fake into one column
    df_melt = df_model.melt(
        id_vars=['iter'],
        value_vars=['D_real', 'D_fake'],
        var_name='type',
        value_name='loss'
    )

    c = alt.Chart(df_melt).mark_line().encode(
        x='iter',
        y='loss',
        color='type',
        tooltip=['iter', 'type', 'loss']
    ).properties(
        width=700,
        height=150,
        title=model_name
    ).interactive()

    charts.append(c)

# Stack charts vertically
final_chart = alt.vconcat(*charts)
final_chart.display()


#7.2 Identify outliers by visualising spread of pixel and perceptual loss

In [None]:
def plot_loss_boxplots(df, width, height):
    # Melt pixel loss metrics
    df_pixel = df.melt(
        id_vars=['model'],
        value_vars=['l_g_pix','G_loss'],
        var_name='metric',
        value_name='loss'
    )

    # Melt perceptual loss metrics
    df_percep = df.melt(
        id_vars=['model'],
        value_vars=['l_g_percep','F_loss'],
        var_name='metric',
        value_name='loss'
    )

    # Pixel loss boxplot
    pixel_loss_chart = alt.Chart(df_pixel).mark_boxplot().encode(
        x='loss:Q',
        y='model:N',
        color='metric:N'
    ).properties(title='Pixel Loss Boxplot', width=width, height=height)

    # Perceptual loss boxplot
    percep_loss_chart = alt.Chart(df_percep).mark_boxplot().encode(
        x='loss:Q',
        y='model:N',
        color='metric:N'
    ).properties(title='Perceptual Loss Boxplot', width=width, height=height)

    # Display side by side
    return pixel_loss_chart | percep_loss_chart

plot_loss_boxplots(df_loss_all, width=400, height=150)

## 7.3 Hypothesis testing to see is difference between single and second degradation order models is statistcally significant

In [None]:
# Group 1: second-order degradation
group2D_real = df_loss_all[df_loss_all['model'].isin(['HAT','RealESRGAN'])]['D_real'].dropna()
group2D_fake = df_loss_all[df_loss_all['model'].isin(['HAT','RealESRGAN'])]['D_fake'].dropna()

# Group 2: single-type degradation
group1D_real = df_loss_all[df_loss_all['model'].isin(['BSRGAN','SwinIR'])]['D_real'].dropna()
group1D_fake = df_loss_all[df_loss_all['model'].isin(['BSRGAN','SwinIR'])]['D_fake'].dropna()

# Levene’s test
stat_real, p_real = stats.levene(group2D_real, group1D_real)
stat_fake, p_fake = stats.levene(group2D_fake, group1D_fake)

print(f"D_real: Levene stat={stat_real:.3f}, p={p_real:.3e}")
print(f"D_fake: Levene stat={stat_fake:.3f}, p={p_fake:.3e}")

D_real: Levene stat=0.266, p=6.065e-01
D_fake: Levene stat=27.209, p=4.584e-07


# 8 Computational Cost assessment

## 8.1 visualise distribution of vram and power per model

In [None]:
df_gpu = pd.read_csv(GPU_log)

# Compute mean memory and power per model
df_summary = df_gpu.groupby("model")[["memoryMB", "powerWatts"]].mean().reset_index()

# Pie chart for memory
pie_memory = (
    alt.Chart(df_summary)
    .mark_arc()
    .encode(
        theta=alt.Theta(field="memoryMB", type="quantitative"),
        color=alt.Color("model:N", legend=alt.Legend(title="Model")),
        tooltip=["model", "memoryMB"]
    )
    .properties(title="Average Memory Usage (MB)", width=200, height=200)
)

# Pie chart for power
pie_power = (
    alt.Chart(df_summary)
    .mark_arc()
    .encode(
        theta=alt.Theta(field="powerWatts", type="quantitative"),
        color=alt.Color("model:N", legend=None),
        tooltip=["model", "powerWatts"]
    )
    .properties(title="Average Power Usage (Watts)", width=200, height=200)
)

# Display side by side
final_chart = pie_memory | pie_power
final_chart


## 8.2 Visualise average time per iter and epoch for each model

In [None]:
# Load CSVs
df_hat = pd.read_csv(HAT_log)
df_real = pd.read_csv(RealESRGAN_log)
df_swin = pd.read_csv(Swin_log)
df_bsrgan = pd.read_csv(BSRGAN_log)

# Standardise column: time_per_iter
df_hat["time_per_iter"] = df_hat["time_data_avg"]
df_real["time_per_iter"] = df_real["time_data"]
df_swin["time_per_iter"] = df_swin["iter_time"]
df_bsrgan["time_per_iter"] = df_bsrgan["iter_time"]

# Add model column
df_hat["model"] = "HAT"
df_real["model"] = "RealESRGAN"
df_swin["model"] = "SwinIR"
df_bsrgan["model"] = "BSRGAN"

# Concatenate all
df_all = pd.concat([df_hat[["model","epoch","iter","time_per_iter"]],
                    df_real[["model","epoch","iter","time_per_iter"]],
                    df_swin[["model","epoch","iter","time_per_iter"]],
                    df_bsrgan[["model","epoch","iter","time_per_iter"]]])

# Compute avg per iter and per epoch
avg_iter = df_all.groupby("model")["time_per_iter"].mean().reset_index()
avg_epoch = df_all.groupby("model").apply(lambda x: x["time_per_iter"].sum()).reset_index()
avg_epoch.columns = ["model","avg_epoch_time"]

# Bubble chart: avg time per iter
bubble_iter = alt.Chart(avg_iter).mark_circle().encode(
    x=alt.X('model:N', title='Model'),
    y=alt.Y('time_per_iter:Q', title='Avg Time per Iter (s)'),
    size=alt.Size('time_per_iter:Q', scale=alt.Scale(range=[100,1000])),
    color='model:N'
).properties(title='Avg Time per Iter', width=300, height=300)

labels_iter = alt.Chart(avg_iter).mark_text(
    align='center',
    baseline='middle',
    dy=-10  # move text slightly above bubble
).encode(
    x='model:N',
    y='time_per_iter:Q',
    text=alt.Text('time_per_iter:Q', format=".3f")
)

bubble_iter = bubble_iter + labels_iter


# Bubble chart: avg time per epoch
bubble_epoch = alt.Chart(avg_epoch).mark_circle().encode(
    x=alt.X('model:N', title='Model'),
    y=alt.Y('avg_epoch_time:Q', title='Avg Time per Epoch (s)'),
    size=alt.Size('avg_epoch_time:Q', scale=alt.Scale(range=[100,1000])),
    color='model:N'
).properties(title='Avg Time per Epoch', width=300, height=300)

labels_epoch = alt.Chart(avg_epoch).mark_text(
    align='center',
    baseline='middle',
    dy=-20
).encode(
    x='model:N',
    y='avg_epoch_time:Q',
    text=alt.Text('avg_epoch_time:Q', format=".1f")
)

bubble_epoch = bubble_epoch + labels_epoch


# Side-by-side
bubble_iter | bubble_epoch

  avg_epoch = df_all.groupby("model").apply(lambda x: x["time_per_iter"].sum()).reset_index()


## 8.3 understand if there is any relationship between checkpoints and decay milestones in vram and power usage

In [None]:
def make_gpu_charts(df_model, df_gpu, time_col="timestamp_train", model_name="Model"):
    """
    Create GPU Memory, Power, and Learning Rate charts for a given model dataframe.
    """
    # Convert to datetime if not already
    df_model[time_col] = pd.to_datetime(df_model[time_col], errors="coerce")

    # Reference start time
    t0 = df_model[time_col].min()
    df_model["elapsed_sec"] = (df_model[time_col] - t0).dt.total_seconds()

    # Align GPU readings with training iterations
    df_merged = pd.merge_asof(
        df_model.sort_values("elapsed_sec"),
        df_gpu.sort_values("time"),
        left_on="elapsed_sec",
        right_on="time",
        direction="nearest"
    )

    # line Charts
    chart_mem = alt.Chart(df_merged).mark_line(color="steelblue").encode(
        x=alt.X("iter:Q", title="Iterations"),
        y=alt.Y("memoryMB:Q", title="Memory (MB)")
    ).properties(title=f"{model_name} - GPU Memory", width=300, height=250)

    chart_power = alt.Chart(df_merged).mark_line(color="green").encode(
        x=alt.X("iter:Q"),
        y=alt.Y("powerWatts:Q", title="Power (Watts)")
    ).properties(title=f"{model_name} - GPU Power", width=300, height=250)

    chart_lr = alt.Chart(df_merged).mark_line(color="orange").encode(
        x=alt.X("iter:Q"),
        y=alt.Y("lr:Q", title="Learning Rate")
    ).properties(title=f"{model_name} - Learning Rate", width=300, height=250)

    return alt.hconcat(chart_mem, chart_power, chart_lr)

#reload the csvs into dataframes to avoid additional columsn from previous analysis
df_gpu = pd.read_csv("/content/outputLogs/combined_gpu_log.csv")
df_bsrgan = pd.read_csv("/content/outputLogs/Bsrgan_Log.csv")
df_hat = pd.read_csv("/content/outputLogs/Hat_Log.csv")
df_swinir = pd.read_csv("/content/outputLogs/SwinIR_Log.csv")
df_realesrgan = pd.read_csv("/content/outputLogs/RealEsrgan_Log.csv")


# multiple models stacked vertically
final_chart = alt.vconcat(
    make_gpu_charts(df_bsrgan, df_gpu, "timestamp_train", "BSRGAN"),
    make_gpu_charts(df_swinir, df_gpu, "timestamp_train", "SwinIR"),
    make_gpu_charts(df_hat, df_gpu, "timestamp_train", "HAT"),
    make_gpu_charts(df_realesrgan, df_gpu, "timestamp", "RealESRGAN")
)
final_chart


  df_model[time_col] = pd.to_datetime(df_model[time_col], errors="coerce")
  df_model[time_col] = pd.to_datetime(df_model[time_col], errors="coerce")
  df_model[time_col] = pd.to_datetime(df_model[time_col], errors="coerce")
