# Quantitative data sanity check

I generated some quantitative data from my models.
Every time I am testing my test set for each of the models and I am computing PSNR, LPIPS and SSIM.
I want to check now if the generated data actually conforms to what I think it should look like.

I will answer some (and more) of the following questions:

- Is every dataset, model and size accounted for?
- Can i compute the summary statistics per model?
- Are the summary statistics reasonable?

In [2]:
import pandas as pd

df_single = pd.read_excel("./data/singleMetricsResult.xlsx")
df_single

Unnamed: 0.1,Unnamed: 0,model,dataset,size,view_idx,psnr,ssim,lpips
0,0,default,truck,low,0,"tensor(22.8543, dtype=torch.float64)",tensor(0.7905),tensor([[[[0.2261]]]])
1,1,default,truck,low,1,"tensor(22.0874, dtype=torch.float64)",tensor(0.7851),tensor([[[[0.2295]]]])
2,2,default,truck,low,2,"tensor(20.9982, dtype=torch.float64)",tensor(0.7453),tensor([[[[0.2374]]]])
3,3,default,truck,low,3,"tensor(22.4937, dtype=torch.float64)",tensor(0.7999),tensor([[[[0.1657]]]])
4,4,default,truck,low,4,"tensor(23.0649, dtype=torch.float64)",tensor(0.8157),tensor([[[[0.1705]]]])
...,...,...,...,...,...,...,...,...
2431,2431,mip-splatting,stump,extended,11,"tensor(26.2844, dtype=torch.float64)",tensor(0.7715),tensor([[[[0.1466]]]])
2432,2432,mip-splatting,stump,extended,12,"tensor(26.9957, dtype=torch.float64)",tensor(0.8161),tensor([[[[0.1172]]]])
2433,2433,mip-splatting,stump,extended,13,"tensor(25.8797, dtype=torch.float64)",tensor(0.7517),tensor([[[[0.1529]]]])
2434,2434,mip-splatting,stump,extended,14,"tensor(26.7891, dtype=torch.float64)",tensor(0.8123),tensor([[[[0.1167]]]])


In [3]:
import re


def clean_tensor_string(value: str) -> float:
    if isinstance(value , str):
        numbers = re.findall(r"[-+]?\d*\.\d+|\d+", value)
        numbers = [float(num) for num in numbers]
        return numbers[0]
    return value


df_single["psnr"] = df_single["psnr"].apply(clean_tensor_string)
df_single["lpips"] = df_single["lpips"].apply(clean_tensor_string)
df_single["ssim"] = df_single["ssim"].apply(clean_tensor_string)
df_single

Unnamed: 0.1,Unnamed: 0,model,dataset,size,view_idx,psnr,ssim,lpips
0,0,default,truck,low,0,22.8543,0.7905,0.2261
1,1,default,truck,low,1,22.0874,0.7851,0.2295
2,2,default,truck,low,2,20.9982,0.7453,0.2374
3,3,default,truck,low,3,22.4937,0.7999,0.1657
4,4,default,truck,low,4,23.0649,0.8157,0.1705
...,...,...,...,...,...,...,...,...
2431,2431,mip-splatting,stump,extended,11,26.2844,0.7715,0.1466
2432,2432,mip-splatting,stump,extended,12,26.9957,0.8161,0.1172
2433,2433,mip-splatting,stump,extended,13,25.8797,0.7517,0.1529
2434,2434,mip-splatting,stump,extended,14,26.7891,0.8123,0.1167


In [4]:
df_single.to_excel("data/singleMetricResultClean.xlsx")

### Is every dataset, model and size accounted for?

In [17]:
counts = df_single.groupby(["model", "dataset", "size"])["Unnamed: 0"].count()

In [18]:
for dataset in df_single["dataset"].unique():
    for model in df_single["model"].unique():
        for size in df_single["size"].unique():
            if (
                not counts["default"][dataset]["extended"]
                == counts[model][dataset][size]
            ):
                print(
                    f'Error! {dataset} for model {model} has count {counts[model][dataset][size]} != {counts["default"][dataset]["extended"]}'
                )

### Can I compute the summary statistics per model?

In [20]:
df_single.groupby(["model", "dataset", "size"])[["psnr", "ssim", "lpips"]].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,psnr,ssim,lpips
model,dataset,size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
default,room,extended,30.186615,0.914177,0.099028
default,room,high,30.324018,0.913187,0.101162
default,room,low,28.873769,0.888956,0.139959
default,room,medium,29.759521,0.906133,0.114949
default,stump,extended,24.350763,0.709044,0.202881
...,...,...,...,...,...
mip-splatting,stump,medium,24.097375,0.652094,0.293212
mip-splatting,truck,extended,23.814241,0.852959,0.109325
mip-splatting,truck,high,23.640278,0.845419,0.123400
mip-splatting,truck,low,21.191956,0.774847,0.220106


In [21]:
df_single.groupby(["model", "dataset", "size"])[["psnr", "ssim", "lpips"]].std()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,psnr,ssim,lpips
model,dataset,size,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
default,room,extended,2.115705,0.024049,0.035240
default,room,high,2.086719,0.025740,0.037103
default,room,low,1.542895,0.033267,0.042097
default,room,medium,1.894804,0.028478,0.038911
default,stump,extended,1.652215,0.069142,0.047274
...,...,...,...,...,...
mip-splatting,stump,medium,1.697001,0.059449,0.056467
mip-splatting,truck,extended,0.885531,0.019703,0.010965
mip-splatting,truck,high,0.883980,0.019947,0.012644
mip-splatting,truck,low,1.717454,0.036131,0.036982


### Are the summary statistics reasonable?

In [22]:
df_single["psnr"].min() >= 0

np.True_

In [25]:
df_single["ssim"].min() >= 0

np.True_

In [29]:
df_single["ssim"].max() <= 1

np.True_

In [30]:
df_single["lpips"].min() >= 0

np.True_

In [32]:
df_single["lpips"].max() <= 1

np.True_