In [1]:
import os
import numpy as np 
import pandas as pd 
from datetime import datetime
import time
import random
from tqdm.auto import tqdm


#Torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
import torchvision.transforms as T

#sklearn
from sklearn.model_selection import StratifiedKFold
from skimage import io

################# DETR FUCNTIONS FOR LOSS######################## 
import sys
sys.path.append('./detr_custom/')

from models.matcher import HungarianMatcher
from models.detr import SetCriterion
#################################################################

import matplotlib.pyplot as plt

#Glob
from glob import glob

from typing import Iterable, Sequence, List, Tuple, Dict, Optional, Any
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
from generators import BlenderStandardDataset

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [3]:
TORCH_CACHE_DIR = 'torch_cache'
DATASET_DIR = '/mnt/blendervol/objdet_std_data'
SQL_TABLE = 'bboxes_std'
BATCH_SIZE = 4

In [4]:
from importlib import reload
import generators
reload(generators)
from generators import BlenderStandardDataset

datagen = BlenderStandardDataset(DATASET_DIR, SQL_TABLE, BATCH_SIZE, shuffle=False)
X, y = datagen[0]

In [10]:
def calc_mean(data: Iterable[Tuple[np.ndarray, Any]]):
    denominator = 0
    mean = np.zeros(3)
    for imgbatch, __ in tqdm(data, total=len(data)-1):
        imgbatch = np.array(imgbatch)
        n = len(imgbatch)
        mean = np.average((mean, imgbatch.mean((0,1,2))), 0, weights=(denominator, n))
        denominator += n
    return mean

def calc_var(data: Iterable[Tuple[np.ndarray, Any]], mean: np.ndarray):
    denominator = 0
    var = np.zeros_like(mean)
    for imgbatch, __ in tqdm(data, total=len(data)-1):
        imgbatch = np.array(imgbatch)
        n = len(imgbatch)
        
        batchvar = ((imgbatch - mean)**2).mean((0,1,2))
        var = np.average((var, batchvar), axis=0, weights=(denominator, n))
        denominator += n
    return var
    
def calc_stats(data: Iterable):
    print('Calculating mean:')
    mean = calc_mean(data)
    print('Calculating variance')
    var = calc_var(data, mean)
    print(f'Mean: {mean}')
    print(f'Variance: {var}')
    print(f'Std: {np.sqrt(var)}')
    
    return mean, var

m, v = calc_stats(datagen)

Calculating mean:


HBox(children=(FloatProgress(value=0.0, max=749.0), HTML(value='')))


Calculating variance


HBox(children=(FloatProgress(value=0.0, max=749.0), HTML(value='')))


Mean: [0.64817397 0.75178422 0.43881263]
Variance: [0.00467012 0.00596115 0.00344419]
Std: [0.06833826 0.07720845 0.05868721]


In [14]:
v2 / 255

array([1.19087988, 1.52009207, 0.87826803])

In [7]:
np.sqrt(v)

array([17.4262552 , 19.68815575, 14.96523801])

<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>

In [8]:
A = np.array([
    np.full((16,16), 1),
    np.full((16,16), 2),
    np.full((16,16), 3)
])[None,...].repeat(10, 0).transpose((0,2,3,1))

B = np.array([
    np.full((16,16), 2),
    np.full((16,16), 4),
    np.full((16,16), 6)
])[None,...].repeat(10, 0).transpose((0,2,3,1))

C = np.array([
    np.full((16,16), 3),
    np.full((16,16), 6),
    np.full((16,16), 9)
])[None,...].repeat(10, 0).transpose((0,2,3,1))

D = np.array([
    np.full((16,16), 4),
    np.full((16,16), 8),
    np.full((16,16), 12)
])[None,...].repeat(10, 0).transpose((0,2,3,1))

E = np.array([
    np.full((16,16), 5),
    np.full((16,16), 10),
    np.full((16,16), 15)
])[None,...].repeat(10, 0).transpose((0,2,3,1))

np.mean([A,B,C,D,E], (0,1,2,3))

array([3., 6., 9.])

In [9]:
np.std([np.full(16*16*10, 2),
        np.full(16*16*10, 4),
        np.full(16*16*10, 6),
        np.full(16*16*10, 8),
        np.full(16*16*10, 10)
       ])

2.8284271247461903

In [10]:
np.std([A,B,C,D,E], (0,1,2,3))

array([1.41421356, 2.82842712, 4.24264069])

In [11]:
# Super memory efficient implementation :^)
mean = np.zeros(3)
denominator = 0
for thing in [A, B, C]:
    mean = np.average((mean, thing.mean((0,1,2))), axis=0, weights=(denominator, len(thing)))
    denominator += len(thing)

print('Mean:')
print(mean)

Mean:
[2. 4. 6.]


In [12]:
var = np.zeros_like(mean)
denominator_var = 0

for thing in [A, B, C]:    
    m_ = ((thing-mean)**2).mean((0,1,2))
    
    print(var)
    print(m_)
    print()
    
    var = np.average(
        (var, m_),
        axis=0,
        weights=(denominator_var, len(thing))
    )
    
    denominator_var += len(thing)
    
print('Var: ')
print(var)

[0. 0. 0.]
[1. 4. 9.]

[1. 4. 9.]
[0. 0. 0.]

[0.5 2.  4.5]
[1. 4. 9.]

Var: 
[0.66666667 2.66666667 6.        ]


In [13]:
np.array([((A - mean)**2).mean((0,1,2)), ((B - mean)**2).mean((0,1,2)), ((C - mean)**2).mean((0,1,2))]).mean(0)

array([0.66666667, 2.66666667, 6.        ])

In [14]:
class Dummygen:
    def __len__(self):
        return 5
    
    def __getitem__(self, index):
        return ([A,B,C,D,E][index], None)
        
m, v = calc_stats(Dummygen())
print(np.sqrt(v))

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


[1.41421356 2.82842712 4.24264069]
