## 0.1 nn.Sequantial()

In [26]:
import torch 
from torch import nn 


class MyModel(nn.Module):

    def __init__(self,
                 in_channels: int,
                    out_channels: int):
        super().__init__()

        self.layer1 = nn.Linear(in_channels, out_channels)

        self.linear_layer = nn.Sequential(
            nn.Linear(in_channels, 2*out_channels),
            nn.Linear(2*out_channels, 3*out_channels),
            nn.Linear(3*out_channels, 4*out_channels),
            nn.Linear(4*out_channels, 5*out_channels),
            nn.Linear(5*out_channels, 6*out_channels),
            nn.Linear(6*out_channels, 7*out_channels),
            nn.Linear(7*out_channels, 8*out_channels),
            nn.Linear(8*out_channels,9*out_channels),
            nn.Linear(9*out_channels,10*out_channels),
            nn.Linear(10*out_channels,10*out_channels),
        )
        

        self.relu = nn.ReLU()

    def forward(self, x):

        x = self.layer1(x)

        torch.utils.checkpoint.checkpoint(
            self.linear_layer,
            x,
            use_reentrant=True
        )
        # x = self.linear_layer(x)
        x = self.relu(x)
        return x
    
model = MyModel(in_channels=100,
                out_channels=100)
model

MyModel(
  (layer1): Linear(in_features=100, out_features=100, bias=True)
  (linear_layer): Sequential(
    (0): Linear(in_features=100, out_features=200, bias=True)
    (1): Linear(in_features=200, out_features=300, bias=True)
    (2): Linear(in_features=300, out_features=400, bias=True)
    (3): Linear(in_features=400, out_features=500, bias=True)
    (4): Linear(in_features=500, out_features=600, bias=True)
    (5): Linear(in_features=600, out_features=700, bias=True)
    (6): Linear(in_features=700, out_features=800, bias=True)
    (7): Linear(in_features=800, out_features=900, bias=True)
    (8): Linear(in_features=900, out_features=1000, bias=True)
    (9): Linear(in_features=1000, out_features=1000, bias=True)
  )
  (relu): ReLU()
)

In [27]:


data = torch.randn(100)
target = torch.randn(100)
# output = model(data)

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-2)
scaler = torch.amp.GradScaler(device="cpu", enabled=True)

for epoch in range(100):

    optimizer.zero_grad()
    with torch.autocast(device_type="cpu", enabled=True):
        output = model(data)
        loss = loss_fn(output, target)
        print(f"{loss.item():.3f}")

    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()


1.112
0.744
0.721
0.720
0.714
0.711
0.711
0.710
0.705
0.699
0.697
0.698
0.700
0.700
0.699
0.697
0.696
0.695
0.694
0.693
0.693
0.694
0.694
0.694
0.693
0.692
0.691
0.691
0.692
0.692
0.692
0.692
0.692
0.691
0.691
0.691
0.691
0.691
0.691
0.691
0.691
0.691
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690
0.690


## 0.2 nn.ModuleList()

In [None]:
import torch 
from torch import nn 


class MyModel(nn.Module):

    def __init__(self,
                 in_channels: int,
                    out_channels: int):
        super().__init__()

        self.layer1 = nn.Linear(in_channels, 2*out_channels)


        self.linear_layers = nn.ModuleList([])
        for i in range(3):
            
            self.linear_layers.append(
                nn.Linear(2*out_channels, 2*out_channels),
            )



        self.relu = nn.ReLU()

    def forward(self, x):

        x = self.layer1(x)

        def create_custom_forward(module):
            def custom_forward(*inputs):
                return module(*inputs)
                
            return custom_forward

        for layer in self.linear_layers:
           sample = torch.utils.checkpoint.checkpoint(create_custom_forward(layer),
                                                      x,
                                                      use_reentrant=False)
        
        # activation function
        x = self.relu(sample)
        return x 
    
model = MyModel(in_channels=100,
                out_channels=100)

x = torch.randn(100)
out = model(x)
out



tensor([0.0000e+00, 2.3260e-01, 0.0000e+00, 3.1295e-01, 6.2666e-02, 8.1320e-01,
        0.0000e+00, 4.5498e-02, 9.0034e-02, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        7.0397e-02, 0.0000e+00, 3.4555e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 6.8577e-01, 2.4133e-01, 2.1677e-02, 1.4320e-01, 1.0488e-01,
        9.0700e-02, 0.0000e+00, 2.2227e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.8234e-01, 0.0000e+00, 0.0000e+00, 1.4475e-01, 0.0000e+00,
        7.2062e-02, 5.5174e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.2355e-02,
        3.5442e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 7.6579e-02,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.3682e-01, 5.5449e-01,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        2.6910e-01, 0.0000e+00, 7.0966e-02, 3.2540e-02, 0.0000e+00, 0.0000e+00,
        4.1872e-01, 0.0000e+00, 0.0000e+00, 4.6128e-01, 2.9216e-01, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 4.7815e-

In [4]:


data = torch.randn(100)
target = torch.randn(200)
# output = model(data)

loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-2)
scaler = torch.amp.GradScaler(device="cpu", enabled=True)

for epoch in range(100):

    optimizer.zero_grad()
    with torch.autocast(device_type="cpu", enabled=True):
        output = model(data)
        loss = loss_fn(output, target)
        print(f"{loss.item():.3f}")

    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()


0.976
0.774
0.685
0.677
0.666
0.650
0.655
0.658
0.651
0.630
0.626
0.641
0.630
0.614
0.621
0.624
0.618
0.616
0.612
0.611
0.615
0.612
0.608
0.608
0.608
0.608
0.608
0.606
0.606
0.607
0.605
0.604
0.606
0.603
0.604
0.601
0.600
0.603
0.602
0.600
0.601
0.601
0.600
0.600
0.601
0.600
0.599
0.583
0.587
0.594
0.587
0.582
0.584
0.587
0.586
0.582
0.581
0.583
0.585
0.583
0.581
0.581
0.582
0.582
0.582
0.581
0.581
0.581
0.581
0.581
0.581
0.580
0.581
0.581
0.581
0.581
0.581
0.581
0.581
0.581
0.581
0.580
0.580
0.580
0.581
0.581
0.581
0.581
0.582
0.583
0.586
0.590
0.593
0.591
0.584
0.580
0.583
0.586
0.582
0.580


In [2]:
from torch import nn 
import torch 

conv = nn.Conv3d(in_channels=3,
                 out_channels=3,
                 kernel_size=3,
                 padding=1,
                 )

x = torch.randn(2, 3, 1, 16, 16)
out = conv(x)
out.shape

torch.Size([2, 3, 1, 16, 16])

In [6]:
from torch import nn 
import torch 


transpose = nn.ConvTranspose3d(in_channels=3,
                               out_channels=32,
                               kernel_size=4,
                               padding=1)

x = torch.randn(2, 3, 1, 16, 16)

out = transpose(x)
out.shape

torch.Size([2, 32, 2, 17, 17])

In [8]:
x = torch.randn(2, 3, 1, 16, 16)

from torch.nn import functional as F 

model = F.interpolate(input=x,
                      scale_factor=(1, 2, 2),
                      mode="nearest")

model.shape

torch.Size([2, 3, 1, 32, 32])

In [1]:
print(1e6)

1000000.0


In [1]:
channel = [64, 128, 256, 512, 512]
len(channel)

5

In [14]:
import torch 

t = torch.randn(2, 3, 32, 32)

total = 0
for i in range(5):
    # print(sum(t.shape))
    out = t
    # print(out.shape)
    total += out
    print(total.shape)

torch.Size([2, 3, 32, 32])
torch.Size([2, 3, 32, 32])
torch.Size([2, 3, 32, 32])
torch.Size([2, 3, 32, 32])
torch.Size([2, 3, 32, 32])


In [19]:
import torch 

t = torch.randn(2, 3, 32, 32)
n = ~torch.isnan(t)
n


tensor([[[[True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          ...,
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True]],

         [[True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          ...,
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True]],

         [[True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          ...,
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ..., True, True, True],
          [True, True, True,  ...

In [20]:
print(1e-3)

0.001


In [1]:
for i, j in range(4), range(5):
    print(i), print("....", j)

ValueError: too many values to unpack (expected 2)

In [4]:
import torch 
# generator = torch.Generator(device='cpu').manual_seed(42)
# generator.device.type

line = torch.linspace(0, 1573, 1573)
line

tensor([0.0000e+00, 1.0006e+00, 2.0013e+00,  ..., 1.5710e+03, 1.5720e+03,
        1.5730e+03])

In [7]:
if 1e-5 > 5e-5:
    print("hello")
elif 1e-5 < 5e-5:
    print("world")

world


In [4]:
scale = max(256/512, 256/512)
new_width = round(512 * 0.5)
new_width

256

In [1]:
def adopt_weight(weight, global_step, threshold=0, value=0.0):
    if global_step < threshold:
        weight = value
    return weight

In [3]:
disc_factor = adopt_weight(weight=1.0,
                           global_step=0,
                           threshold=250000)
disc_factor

0.0

In [10]:
import torch 

img1 = (torch.randn(10, 3, 100, 100) * 2) - 1
img1.shape

torch.Size([10, 3, 100, 100])

In [14]:
_  = torch.manual_seed(123)

In [7]:
import torch 
from einops import repeat, reduce


x = torch.randn(2, 128, 8, 256, 256)
temb = torch.randn(2, 128)

t = x.shape[2]
temb = repeat(temb, 'b c -> (b t) c', t=t) 
print(temb.shape)
# temb = repeat(temb, '(b t) c -> b c', t=t)
temb = reduce(temb, '(b t) c -> b c', t=t, reduction="max")
print(temb.shape)

torch.Size([16, 128])
torch.Size([2, 128])


In [1]:
import torch
from torch import nn 


class Model(nn.Module):

    def __init__(self, 
                 in_channels,
                 out_channels):
        super().__init__()
        
        self.layer = nn.Sequential(
            nn.Conv3d(in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=3,
                    stride=1,
                    padding=0),
        
            nn.GroupNorm(num_groups=2,
                                    num_channels=in_channels),
            nn.ReLU(),
            
            nn.Conv3d(in_channels=out_channels,
                    out_channels=out_channels,
                    kernel_size=3,
                    ),
            nn.GroupNorm(num_groups=2,
                        num_channels=out_channels),
            nn.ReLU()
        
        )


    def forward(self, x):
        x = self.layer(x)
        print(f"what is the dtype: {x.dtype}")
        return x 
    





In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Model(in_channels=128,
              out_channels=128).to(device)

optim = torch.optim.Adam(params=model.parameters())
loss_fn = nn.MSELoss()
x = torch.randn(2, 128, 8, 256, 256, requires_grad=True).to(device)
target = torch.randn(2, 128, 4, 252, 252, requires_grad=True).to(device)
scaler = torch.GradScaler(device="cuda")

for epoch in range(100):
    optim.zero_grad()
    with torch.autocast(device_type="cuda", dtype=torch.float16):
        output = model(x)
        loss = loss_fn(output, target)
        print(f"loss: {loss} and dtype: {loss.dtype}")
    scaler.scale(loss).backward()
    scaler.step(optim)
    scaler.update()



    








what is the dtype: torch.float32
loss: 1.4965813159942627 and dtype: torch.float32
what is the dtype: torch.float32
loss: 1.3918112516403198 and dtype: torch.float32
what is the dtype: torch.float32
loss: 1.3645098209381104 and dtype: torch.float32
what is the dtype: torch.float32
loss: 1.3362582921981812 and dtype: torch.float32


KeyboardInterrupt: 