In [4]:
import shutil, glob, os

for file in glob.glob('a/*'):
    shutil.copy(file, f'b/{file.split(os.sep)[-1]}')

In [7]:
from nosaveddata import *
import torch
from torch import nn

a=torch.arange(2,device='cuda').long()[:,None].repeat_interleave(15,0)

a,torch.zeros(6,1,device='cuda').long()

(tensor([[0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1],
         [1]], device='cuda:0'),
 tensor([[0],
         [0],
         [0],
         [0],
         [0],
         [0]], device='cuda:0'))

In [42]:
import math
import torch

p1 = 0.6697
p2 = 0.6649
n = 10000

def statistical_difference(p1, p2, n):
    
    d=torch.tensor(p1-p2).abs()

    std = 1.65 * math.sqrt((p1*(1-p1) + p2*(1-p2))/n)
    
    difference = torch.tensor([d-std, d+std])
    
    return difference.sort()[0]

print(statistical_difference(0.834, 0.831, 100000))

tensor([0.0002, 0.0058])


In [13]:
from nosaveddata import *
import torch
from torch import nn

class IMPALA_Resnet(nn.Module):
    def __init__(self, first_channels=12, scale_width=1, norm=True, init=init_relu, act=nn.SiLU()):
        super().__init__()
        self.norm=norm
        self.init=init
        self.act =act
        
        self.cnn = nn.Sequential(self.get_block(first_channels, 16*scale_width),
                                 self.get_block(16*scale_width, 32*scale_width),
                                 self.get_block(32*scale_width, 32*scale_width, last_relu=True))
        params_count(self, 'IMPALA ResNet')
    def get_block(self, in_hiddens, out_hiddens, last_relu=False):
        
        blocks = nn.Sequential(DQN_Conv(in_hiddens, out_hiddens, 3, 1, 1, max_pool=True, act=self.act, norm=self.norm, init=self.init),
                               Residual_Block(out_hiddens, out_hiddens, norm=self.norm, act=self.act, init=self.init),
                               Residual_Block(out_hiddens, out_hiddens, norm=self.norm, act=self.act, init=self.init, out_act=self.act if last_relu else nn.Identity())
                              )
        
        return blocks
        
    def forward(self, X):
        return self.cnn(X)


class IMPALA_YY(nn.Module):
    def __init__(self, first_channels=12, scale_width=1, norm=True, init=init_relu, act=nn.SiLU()):
        super().__init__()
        self.norm=norm
        self.init=init
        self.act =act

        self.yin = self.get_yin(first_channels, 16*scale_width, 32*scale_width)
        
        self.yang = self.get_yang(first_channels, 16*scale_width)
                                 
        self.head = nn.Sequential(self.get_yang(16*scale_width, 32*scale_width),
                                  self.get_yang(32*scale_width, 32*scale_width, last_relu=True))
        
        params_count(self, 'IMPALA ResNet')

    def get_yin(self, in_hiddens, hiddens, out_hiddens):
        blocks = nn.Sequential(DQN_Conv(1, hiddens, 3, 1, 1, max_pool=True, act=self.act, norm=self.norm, init=self.init),
                               Residual_Block(hiddens, hiddens, norm=self.norm, act=self.act, init=self.init),
                               #DQN_Conv(hiddens, out_hiddens, 3, 1, 1, max_pool=True, act=self.act, norm=self.norm, init=self.init),
                               #Residual_Block(out_hiddens, out_hiddens, norm=self.norm, act=self.act, init=self.init),
                               #Residual_Block(out_hiddens, out_hiddens, norm=self.norm, act=self.act, init=self.init)
                              )
        return blocks          
        
    def get_yang(self, in_hiddens, out_hiddens, last_relu=False):
        
        blocks = nn.Sequential(DQN_Conv(in_hiddens, out_hiddens, 3, 1, 1, max_pool=True, act=self.act, norm=self.norm, init=self.init),
                               Residual_Block(out_hiddens, out_hiddens, norm=self.norm, act=self.act, init=self.init),
                               Residual_Block(out_hiddens, out_hiddens, norm=self.norm, act=self.act, init=self.init, out_act=self.act if last_relu else nn.Identity())
                              )
        
        return blocks
    
    def forward(self, X):

        y = self.yin(X[:,-3:].mean(-3)[:,None])
        x = self.yang(X)
        
        X = x*(1-y) + x + y
        
        return self.head(X)

model = IMPALA_Resnet(scale_width=4)
x=torch.randn(32,12,96,72)
model2 = IMPALA_YY(scale_width=4)

model(x).shape, model2(x).shape

IMPALA ResNet Parameters: 1.56M
IMPALA ResNet Parameters: 1.63M


(torch.Size([32, 128, 12, 9]), torch.Size([32, 128, 12, 9]))

In [28]:
import torch
from torch import nn
import torch.nn.functional as F
from nosaveddata import *

seed_np_torch(42)

def network_ema(target_network, new_network, alpha=0.5):
    for (param_name, param_target), param_new  in zip(target_network.cuda().named_parameters(), new_network.parameters()):
        if 'ln' in param_name: #layer norm
            param_target.data = param_new.data.clone()
        else:
            param_target.data = alpha * param_target.data + (1 - alpha) * param_new.data.clone()


class Modeld(nsd_Module):
    def __init__(self):
        super().__init__()

        self.linear = nn.Linear(10,32)
        self.ln = nn.LayerNorm(32)

    def forward(self,X):
        return self.ln(self.linear(X))

m = Modeld().cuda()
m_rand= Modeld().cuda()


optim=torch.optim.AdamW(m.parameters(), lr=1e-4)

for i in range(4000):
    x=torch.randn(1,10).cuda()
    
    loss = m(x).sum()
    loss.backward()
    
    optim.step()
    optim.zero_grad()

network_ema(m,m_rand)

m.ln.weight, m.linear.weight

(Parameter containing:
 tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([[ 0.1788,  0.1215, -0.1596,  0.0556,  0.0823, -0.0129, -0.1189,  0.1854,
          -0.0022, -0.1650],
         [-0.1079, -0.0155,  0.0935,  0.0209,  0.0326, -0.1374, -0.1405,  0.0014,
           0.1486,  0.0473],
         [-0.1218, -0.0415, -0.1404, -0.0332, -0.0325,  0.0417,  0.1003, -0.1978,
           0.1183, -0.2110],
         [ 0.1376,  0.0622,  0.0658,  0.1490, -0.1540, -0.0291,  0.1021,  0.0194,
          -0.0155, -0.1166],
         [ 0.1413,  0.0467,  0.0852, -0.0416, -0.0986, -0.0094,  0.0798, -0.0597,
          -0.0080,  0.0361],
         [-0.0403, -0.0299, -0.0763, -0.1011, -0.1358, -0.0595, -0.0660,  0.0495,
           0.0058, -0.1400],
         [ 0.1676, -0.0036,  0.1435, -0.1102, -0.0544,  0.0415, -0.0507, -0.1388,
          -0.

In [2]:
from nosaveddata import *
import torch
from torch import nn

model = nn.Linear(10,2).cuda()
model.apply(init_xavier)
model2 = nn.Linear(10,2).cuda()
network_ema(model, model2, 0)
model.apply(init_xavier)

model.weight.data==model2.weight.data

  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(


tensor([[False, False, False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False]],
       device='cuda:0')

<h1>Preprocessing</h1>

In [11]:
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
import os, glob
from nosaveddata import *


import torchvision
from torchvision import transforms

paths = glob.glob('C:/Users/Augusto/Python/PyTorch/RL/mc_data/4/2023_01_09_14_48_09_100636/*.jpg')
path = 'C:/Users/Augusto/Python/PyTorch/RL/mc_data/4/2023_01_09_14_48_09_100636/7,0,0,0,0,0,0,0,0,0,0,0,0,3,0,.jpg'



tfms = transforms.Compose([
                           transforms.Resize((96, 72)),
                           transforms.ToTensor()
                        ])

img = Image.open(path)
imgs=[]
for p in paths:
    imgs.append(tfms(Image.open(p)))
imgs=torch.stack(imgs)

print(imgs.shape)



imgs, augments_applied = preprocess_iwm_no_solarize(imgs)
    


#plt.imshow(img_tfms)
plot_imgs(imgs.permute(0,2,3,1))
augments_applied

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Augusto/Python/PyTorch/RL/mc_data/4/2023_01_09_14_48_09_100636/7,0,0,0,0,0,0,0,0,0,0,0,0,3,0,.jpg'

In [8]:
import torch
from torch import nn
import torch.nn.functional as F

from nosaveddata import *



def gray_scale_stacked(X, p=0.2, stacks=4):
    # Input: Tensor T e (B,C,T,D)
    
    probs = get_img_preprocessing_prob(X.shape[0], p, X.device)
    stacked_probs = probs.repeat_interleave(stacks,0)
    X = X.view(-1,X.shape[1]//stacks,*X.shape[-2:])
    
    gray_img = X.mean(1,keepdim=True).expand(-1,3,-1,-1)
    
    X = (1-stacked_probs)*X + stacked_probs*gray_img
    
    return X.view(X.shape[0]//stacks, -1, *X.shape[-2:]), probs.squeeze()

def gaussian_blur(X, p=0.2, stacks=4, sigma_min=0.1, sigma_max=2):
    # Input: Tensor T e (B,C,T,D)
    
    probs = get_img_preprocessing_prob(X.shape[0], p, X.device)
    tfms = transforms.GaussianBlur(3, (sigma_min, sigma_max))
    
    blurred = tfms(X)
    X = (1-probs)*X + probs*blurred
    
    return X, probs.squeeze()

def solarization_stacked(X, p=0.2, stacks=4):
    # Input: Tensor T e (B,C,T,D)

    probs = get_img_preprocessing_prob(X.shape[0], p, X.device)
    stacked_probs = probs.repeat_interleave(stacks,0)
    
    X = X.view(-1,X.shape[1]//stacks,*X.shape[-2:])
    
    tfms = transforms.RandomSolarize(0,p=1) # This prob is applied over all the batch or no image at all
    
    solarized = tfms(X)
    X = (1-stacked_probs)*X + stacked_probs*solarized
    
    return X.view(X.shape[0]//stacks, -1, *X.shape[-2:]), probs.squeeze()


def preprocess_iwm_stacked(imgs, p=0.2, stacks=4):
    # Applies the same preprocessing for all images in the sequence, but separated by each beach
    augments_applied=[]
    
    imgs, augmented = gray_scale_stacked(imgs, p, stacks)
    augments_applied.append(augmented)
    
    imgs, augmented = gaussian_blur_stacked(imgs, p, stacks)
    augments_applied.append(augmented)
    
    imgs, augmented = solarization_stacked(imgs, p, stacks)
    augments_applied.append(augmented)
    
    augments_applied = torch.stack(augments_applied,1)
    return imgs, augments_applied

preprocess_iwm_stacked(torch.randn(32,12,96,72, device='cuda'))[0].shape

torch.Size([32, 12, 96, 72])

In [None]:
plot_img(imgs[-1].permute(1,2,0))

<h1>DiT</h1>

In [3]:
import torch
from torch import nn
import torch.nn.functional as F

from nosaveddata import *

unet = UNet_DiT_S_4(in_channels=4).cuda()
x=torch.randn(32,4,32,32).cuda()
c=torch.randn(32,384).cuda()
t=torch.randint(0,1000,(32,)).cuda()
unet(x,t).shape

GPT Transformer Parameters: 31.91M


torch.Size([32, 4, 32, 32])

In [3]:
import torch
from torch import nn
import torch.nn.functional as F

from nosaveddata import *


model = DiT_Transformer(128, 8, 8, 108).cuda()

X = torch.randn(16,108,128).cuda()
c = torch.randn(16,128).cuda()

model(X,c).shape

DiT Transformer Parameters: 2.38M


torch.Size([16, 108, 128])

In [None]:
model = DiT_Transformer(512, 8, 8, 128).cuda()

X = torch.randn(16,128,512).cuda()
c = torch.randn(16,512).cuda()

model(X,c).shape