In [None]:
pip install einops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting einops
  Downloading einops-0.6.0-py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 169 kB/s 
[?25hInstalling collected packages: einops
Successfully installed einops-0.6.0


In [None]:
import torch, torchvision
from torch import nn
from torch.nn import init
import torch.nn.functional as F
from torch.utils.data import DataLoader,Dataset
import torchvision.transforms as transforms
from torchvision.datasets import MNIST,FashionMNIST,CIFAR10

from einops import rearrange, repeat
from tqdm.notebook import tqdm
from functools import partial
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import math, os, copy

In [None]:
"""
    Define U-net Architecture:
    Approximate reverse diffusion process by using U-net
    U-net of SR3 : U-net backbone + Positional Encoding of time + Multihead Self-Attention
"""

class PositionalEncoding(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, noise_level):
        # Input : tensor of value of coefficient alpha at specific step of diffusion process e.g. torch.Tensor([0.03])
        # Transform level of noise into representation of given desired dimension
        count = self.dim // 2
        step = torch.arange(count, dtype=noise_level.dtype, device=noise_level.device) / count
        encoding = noise_level.unsqueeze(1) * torch.exp(-math.log(1e4) * step.unsqueeze(0))
        encoding = torch.cat([torch.sin(encoding), torch.cos(encoding)], dim=-1)
        return encoding

class FeatureWiseAffine(nn.Module):
    def __init__(self, in_channels, out_channels, use_affine_level=False):
        super(FeatureWiseAffine, self).__init__()
        self.use_affine_level = use_affine_level
        self.noise_func = nn.Sequential(nn.Linear(in_channels, out_channels*(1+self.use_affine_level)))

    def forward(self, x, noise_embed):
        noise = self.noise_func(noise_embed).view(x.shape[0], -1, 1, 1)
        if self.use_affine_level:
            gamma, beta = noise.chunk(2, dim=1)
            x = (1 + gamma) * x + beta
        else:
            x = x + noise
        return x

class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)

class Upsample(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.up = nn.Upsample(scale_factor=2, mode="nearest")
        self.conv = nn.Conv2d(dim, dim, 3, padding=1)

    def forward(self, x):
        return self.conv(self.up(x))

class Downsample(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.conv = nn.Conv2d(dim, dim, 3, 2, 1)

    def forward(self, x):
        return self.conv(x)

class Block(nn.Module):
    def __init__(self, dim, dim_out, groups=32, dropout=0):
        super().__init__()
        self.block = nn.Sequential(
            nn.GroupNorm(groups, dim),
            Swish(),
            nn.Dropout(dropout) if dropout != 0 else nn.Identity(),
            nn.Conv2d(dim, dim_out, 3, padding=1)
        )

    def forward(self, x):
        return self.block(x)

In [None]:
# Linear Multi-head Self-attention
class SelfAtt(nn.Module):
    def __init__(self, channel_dim, num_heads, norm_groups=32):
        super(SelfAtt,self).__init__()        
        self.groupnorm = nn.GroupNorm(norm_groups, channel_dim)
        self.num_heads = num_heads
        self.qkv = nn.Conv2d(channel_dim, channel_dim * 3, 1, bias=False)
        self.proj = nn.Conv2d(channel_dim, channel_dim, 1)

    def forward(self,x):
        b, c, h, w = x.size()
        x = self.groupnorm(x)
        qkv = rearrange(self.qkv(x), "b (qkv heads c) h w -> (qkv) b heads c (h w)", heads=self.num_heads, qkv=3)
        queries, keys, values = qkv[0], qkv[1], qkv[2]

        keys = F.softmax(keys, dim=-1)
        att = torch.einsum('bhdn,bhen->bhde', keys, values)
        out = torch.einsum('bhde,bhdn->bhen', att, queries)
        out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.num_heads, h=h, w=w)

        return self.proj(out)


class ResBlock(nn.Module):
    def __init__(self, dim, dim_out, noise_level_emb_dim=None, dropout=0, 
                    num_heads=1, use_affine_level=False, norm_groups=32, att=True):
        super().__init__()
        self.noise_func = FeatureWiseAffine(noise_level_emb_dim, dim_out, use_affine_level)
        self.block1 = Block(dim, dim_out, groups=norm_groups)
        self.block2 = Block(dim_out, dim_out, groups=norm_groups, dropout=dropout)
        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
        self.att = att
        self.attn = SelfAtt(dim_out, num_heads=num_heads, norm_groups=norm_groups)

    def forward(self, x, time_emb):
        y = self.block1(x)
        y = self.noise_func(y, time_emb)
        y = self.block2(y)
        x = y + self.res_conv(x)
        if self.att:
            x = self.attn(x)
        return x

In [None]:
class UNet(nn.Module):
    def __init__(self, in_channel=6, out_channel=3, inner_channel=32, norm_groups=32,
        channel_mults=[1, 2, 4, 8, 8], res_blocks=3, dropout=0, img_size=128):
        super().__init__()

        noise_level_channel = inner_channel
        self.noise_level_mlp = nn.Sequential(
            PositionalEncoding(inner_channel),
            nn.Linear(inner_channel, inner_channel * 4),
            Swish(), 
            nn.Linear(inner_channel * 4, inner_channel)
        )

        num_mults = len(channel_mults)
        pre_channel = inner_channel
        feat_channels = [pre_channel]
        now_res = img_size

        # Downsampling stage of U-net
        downs = [nn.Conv2d(in_channel, inner_channel, kernel_size=3, padding=1)]
        for ind in range(num_mults):
            is_last = (ind == num_mults - 1)
            channel_mult = inner_channel * channel_mults[ind]
            for _ in range(0, res_blocks):
                downs.append(ResBlock(
                    pre_channel, channel_mult, noise_level_emb_dim=noise_level_channel, 
                    norm_groups=norm_groups, dropout=dropout))
                feat_channels.append(channel_mult)
                pre_channel = channel_mult
            if not is_last:
                downs.append(Downsample(pre_channel))
                feat_channels.append(pre_channel)
                now_res = now_res//2
        self.downs = nn.ModuleList(downs)

        self.mid = nn.ModuleList([
            ResBlock(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel, 
                            norm_groups=norm_groups, dropout=dropout),
            ResBlock(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel, 
                        norm_groups=norm_groups, dropout=dropout, att=False)
        ])

        # Upsampling stage of U-net
        ups = []
        for ind in reversed(range(num_mults)):
            is_last = (ind < 1)
            channel_mult = inner_channel * channel_mults[ind]
            for _ in range(0, res_blocks+1):
                ups.append(ResBlock(
                    pre_channel+feat_channels.pop(), channel_mult, noise_level_emb_dim=noise_level_channel, 
                    norm_groups=norm_groups, dropout=dropout))
                pre_channel = channel_mult
            if not is_last:
                ups.append(Upsample(pre_channel))
                now_res = now_res*2

        self.ups = nn.ModuleList(ups)

        self.final_conv = Block(pre_channel, out_channel, groups=norm_groups)

    def forward(self, x, noise_level):
        # Embedding of time step with noise coefficient alpha
        t = self.noise_level_mlp(noise_level)
        
        feats = []
        for layer in self.downs:
            if isinstance(layer, ResBlock):
                x = layer(x, t)
            else:
                x = layer(x)
            # print(x.shape)    
            feats.append(x)

        for layer in self.mid:
            x = layer(x, t)

        for layer in self.ups:
            if isinstance(layer, ResBlock):
                # print(x.shape)  
                # print("layer: ",x.shape,feats.pop().shape)
                # print(x[0].shape,feats.pop()[0].shape)
                x = layer(torch.cat((x, feats.pop()), dim=1), t)
                # x = layer(x,t)
            else:
                # print(x.shape)  
                x = layer(x)

        return self.final_conv(x)

In [None]:
"""
    Define Diffusion process framework to train desired model:
    Forward Diffusion process:
        Given original image x_0, apply Gaussian noise ε_t for each time step t
        After proper length of time step, image x_T reachs to pure Gaussian noise
    Objective of model f :
        model f is trained to predict actual added noise ε_t for each time step t
"""

class Diffusion(nn.Module):
    def __init__(self, model, device, img_size, LR_size, channels=3):
        super().__init__()
        self.channels = channels
        self.model = model.to(device)
        self.img_size = img_size
        self.LR_size = LR_size
        self.device = device

    def set_loss(self, loss_type):
        if loss_type == 'l1':
            self.loss_func = nn.L1Loss(reduction='sum')
        elif loss_type == 'l2':
            self.loss_func = nn.MSELoss(reduction='sum')
        else:
            raise NotImplementedError()

    def make_beta_schedule(self, schedule, n_timestep, linear_start=1e-4, linear_end=2e-2):
        if schedule == 'linear':
            betas = np.linspace(linear_start, linear_end, n_timestep, dtype=np.float64)
        elif schedule == 'warmup':
            warmup_frac=0.1
            betas = linear_end * np.ones(n_timestep, dtype=np.float64)
            warmup_time = int(n_timestep * warmup_frac)
            betas[:warmup_time] = np.linspace(linear_start, linear_end, warmup_time, dtype=np.float64)
        elif schedule == "cosine":
            cosine_s = 8e-3
            timesteps = torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
            alphas = timesteps / (1 + cosine_s) * math.pi / 2
            alphas = torch.cos(alphas).pow(2)
            alphas = alphas / alphas[0]
            betas = 1 - alphas[1:] / alphas[:-1]
            betas = betas.clamp(max=0.999)
        else:
            raise NotImplementedError(schedule)
        return betas

    def set_new_noise_schedule(self, schedule_opt):
        to_torch = partial(torch.tensor, dtype=torch.float32, device=self.device)

        betas = self.make_beta_schedule(
            schedule=schedule_opt['schedule'],
            n_timestep=schedule_opt['n_timestep'],
            linear_start=schedule_opt['linear_start'],
            linear_end=schedule_opt['linear_end'])
        betas = betas.detach().cpu().numpy() if isinstance(betas, torch.Tensor) else betas
        alphas = 1. - betas
        alphas_cumprod = np.cumprod(alphas, axis=0)
        alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
        self.sqrt_alphas_cumprod_prev = np.sqrt(np.append(1., alphas_cumprod))

        self.num_timesteps = int(len(betas))
        # Coefficient for forward diffusion q(x_t | x_{t-1}) and others
        self.register_buffer('betas', to_torch(betas))
        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
        self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
        self.register_buffer('pred_coef1', to_torch(np.sqrt(1. / alphas_cumprod)))
        self.register_buffer('pred_coef2', to_torch(np.sqrt(1. / alphas_cumprod - 1)))

        # Coefficient for reverse diffusion posterior q(x_{t-1} | x_t, x_0)
        variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod)
        self.register_buffer('variance', to_torch(variance))
        # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain
        self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(variance, 1e-20))))
        self.register_buffer('posterior_mean_coef1', to_torch(betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod)))
        self.register_buffer('posterior_mean_coef2', to_torch((1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod)))

    # Predict desired image x_0 from x_t with noise z_t -> Output is predicted x_0
    def predict_start(self, x_t, t, noise):
        return self.pred_coef1[t] * x_t - self.pred_coef2[t] * noise

    # Compute mean and log variance of posterior(reverse diffusion process) distribution
    def q_posterior(self, x_start, x_t, t):
        posterior_mean = self.posterior_mean_coef1[t] * x_start + self.posterior_mean_coef2[t] * x_t
        posterior_log_variance_clipped = self.posterior_log_variance_clipped[t]
        return posterior_mean, posterior_log_variance_clipped

    # Note that posterior q for reverse diffusion process is conditioned Gaussian distribution q(x_{t-1}|x_t, x_0)
    # Thus to compute desired posterior q, we need original image x_0 in ideal, 
    # but it's impossible for actual training procedure -> Thus we reconstruct desired x_0 and use this for posterior
    def p_mean_variance(self, x, t, clip_denoised: bool, condition_x=None):
        batch_size = x.shape[0]
        noise_level = torch.FloatTensor([self.sqrt_alphas_cumprod_prev[t+1]]).repeat(batch_size, 1).to(x.device)
        x_recon = self.predict_start(x, t, noise=self.model(torch.cat([condition_x, x], dim=1), noise_level))

        if clip_denoised:
            x_recon.clamp_(-1., 1.)

        mean, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
        return mean, posterior_log_variance

    # Progress single step of reverse diffusion process
    # Given mean and log variance of posterior, sample reverse diffusion result from the posterior
    @torch.no_grad()
    def p_sample(self, x, t, clip_denoised=True, condition_x=None):
        mean, log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised, condition_x=condition_x)
        noise = torch.randn_like(x) if t > 0 else torch.zeros_like(x)
        return mean + noise * (0.5 * log_variance).exp()

    # Progress whole reverse diffusion process
    @torch.no_grad()
    def super_resolution(self, x_in):
        img = torch.rand_like(x_in, device=x_in.device)
        for i in reversed(range(0, self.num_timesteps)):
            img = self.p_sample(img, i, condition_x=x_in)
        return img

    # Compute loss to train the model
    def p_losses(self, x_in):
        x_start = x_in
        # lr_imgs = transforms.Resize(self.img_size)(transforms.Resize(self.LR_size)(x_in))
        lr_imgs = x_in + 0.1 * torch.randn(*x_in.shape).to(self.device)
        b, c, h, w = x_start.shape
        t = np.random.randint(1, self.num_timesteps + 1)
        sqrt_alpha = torch.FloatTensor(
            np.random.uniform(self.sqrt_alphas_cumprod_prev[t-1], self.sqrt_alphas_cumprod_prev[t], size=b)
        ).to(x_start.device)
        sqrt_alpha = sqrt_alpha.view(-1, 1, 1, 1)

        noise = torch.randn_like(x_start).to(x_start.device)
        # Perturbed image obtained by forward diffusion process at random time step t
        x_noisy = sqrt_alpha * x_start + (1 - sqrt_alpha**2).sqrt() * noise
        # The model predict actual noise added at time step t
        # print(lr_imgs.shape,x_noisy.shape)
        # print(sqrt_alpha.shape)
        pred_noise = self.model(torch.cat([lr_imgs, x_noisy], dim=1), noise_level=sqrt_alpha)
        # self.super_resolution()
        return self.loss_func(noise, pred_noise)

    def forward(self, x, *args, **kwargs):
        return self.p_losses(x, *args, **kwargs)

In [None]:
# Class to train & test desired model
class SR3():
    def __init__(self, device, img_size, LR_size, loss_type, dataloader, testloader, 
                    schedule_opt, save_path, load_path=None, load=False, 
                    in_channel=6, out_channel=3, inner_channel=32, norm_groups=8, 
                    channel_mults=(1, 2, 4, 8, 8), res_blocks=3, dropout=0, lr=1e-5, distributed=False):
        super(SR3, self).__init__()
        self.dataloader = dataloader
        self.testloader = testloader
        self.device = device
        self.save_path = save_path
        self.img_size = img_size
        self.LR_size = LR_size

        model = UNet(in_channel, out_channel, inner_channel, norm_groups, channel_mults, res_blocks, dropout, img_size)
        self.sr3 = Diffusion(model, device, img_size, LR_size, out_channel)

        # Apply weight initialization & set loss & set noise schedule
        self.sr3.apply(self.weights_init_orthogonal)
        self.sr3.set_loss(loss_type)
        self.sr3.set_new_noise_schedule(schedule_opt)

        if distributed:
            assert torch.cuda.is_available()
            self.sr3 = nn.DataParallel(self.sr3)

        self.optimizer = torch.optim.Adam(self.sr3.parameters(), lr=lr)

        params = sum(p.numel() for p in self.sr3.parameters())
        print(f"Number of model parameters : {params}")

        if load:
            self.load(load_path)

    def weights_init_orthogonal(self, m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            init.orthogonal_(m.weight.data, gain=1)
            if m.bias is not None:
                m.bias.data.zero_()
        elif classname.find('Linear') != -1:
            init.orthogonal_(m.weight.data, gain=1)
            if m.bias is not None:
                m.bias.data.zero_()
        elif classname.find('BatchNorm2d') != -1:
            init.constant_(m.weight.data, 1.0)
            init.constant_(m.bias.data, 0.0)

    def train(self, epoch, verbose):
        fixed_imgs = copy.deepcopy(next(iter(self.testloader)))
        fixed_imgs = fixed_imgs[0].to(self.device)
        # Transform to low-resolution images
        fixed_imgs1 = fixed_imgs
        # fixed_imgs = transforms.Resize(self.img_size)(transforms.Resize(self.LR_size)(fixed_imgs))
        fixed_imgs = fixed_imgs1 + 0.1 * torch.randn(*fixed_imgs1.shape).to(self.device)

        for i in tqdm(range(epoch)):
            train_loss = 0
            for _, imgs in enumerate(self.dataloader):
                # Initial imgs are high-resolution
                imgs = imgs[0].to(self.device)
                b, c, h, w = imgs.shape
    
                self.optimizer.zero_grad()
                loss = self.sr3(imgs)
                loss = loss.sum() / int(b*c*h*w)
                loss.backward()
                self.optimizer.step()
                train_loss += loss.item() * b

            if (i+1) % verbose == 0:
                self.sr3.eval()
                test_imgs = next(iter(self.testloader))
                test_imgs = test_imgs[0].to(self.device)
                b, c, h, w = test_imgs.shape

                with torch.no_grad():
                    val_loss = self.sr3(test_imgs)
                    val_loss = val_loss.sum() / int(b*c*h*w)
                self.sr3.train()

                train_loss = train_loss / len(self.dataloader)
                print(f'Epoch: {i+1} / loss:{train_loss:.3f} / val_loss:{val_loss.item():.3f}')

                # Save example of test images to check training
                plt.figure(figsize=(15,10))
                plt.subplot(1,3,1)
                plt.axis("off")
                plt.title("Conditional Inputs")
                plt.imshow(np.transpose(torchvision.utils.make_grid(fixed_imgs, 
                                                                    nrow=2, padding=1, normalize=True).cpu(),(1,2,0)))
                plt.subplot(1,3,2)
                plt.axis("off")
                plt.title("Original Inputs")
                plt.imshow(np.transpose(torchvision.utils.make_grid(fixed_imgs1, 
                                                                    nrow=2, padding=1, normalize=True).cpu(),(1,2,0)))
                plt.subplot(1,3,3)
                plt.axis("off")
                plt.title("Results")
                plt.imshow(np.transpose(torchvision.utils.make_grid(self.test(fixed_imgs).detach().cpu(), 
                                                                    nrow=2, padding=1, normalize=True),(1,2,0)))
                plt.savefig('SuperResolution_Result'+'28lr'+str(i)+'.jpg')
                plt.close()

                # Save model weight
                self.save(self.save_path)

    def test(self, imgs):
        # imgs_lr = transforms.Resize(self.img_size)(transforms.Resize(self.LR_size)(imgs))
        imgs_lr = imgs + 0.1 * torch.randn(*imgs.shape).to(self.device)
        self.sr3.eval()
        with torch.no_grad():
            if isinstance(self.sr3, nn.DataParallel):
                result_SR = self.sr3.module.super_resolution(imgs_lr)
            else:
                result_SR = self.sr3.super_resolution(imgs_lr)
        self.sr3.train()
        return result_SR

    def save(self, save_path):
        network = self.sr3
        if isinstance(self.sr3, nn.DataParallel):
            network = network.module
        state_dict = network.state_dict()
        for key, param in state_dict.items():
            state_dict[key] = param.cpu()
        torch.save(state_dict, save_path)

    def load(self, load_path):
        network = self.sr3
        if isinstance(self.sr3, nn.DataParallel):
            network = network.module
        network.load_state_dict(torch.load(load_path))
        print("Model loaded successfully")

In [None]:
batch_size = 100
LR_size = 32
img_size = 32
root = './data/ffhq_thumb'
testroot = './data/celeba_hq'



transforms_ = transforms.Compose([transforms.Resize(img_size), transforms.ToTensor(), 
                                        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
trainset = CIFAR10("./data", train=True, download=True,transform=transforms_)
testset = CIFAR10("./data", train=False, download=True,transform=transforms_)


# trainset = torchvision.datasets.CIFAR10(root='./data', train=True,

#                                         download=True, transform=transform)
# --------------------------------
train1 = np.where((np.array(trainset.targets) == 7) | (np.array(trainset.targets) == 5))[0]
test1 = np.where((np.array(testset.targets) == 7)| (np.array(testset.targets) == 5) )[0]
# client_2_idx = np.where((np.array(trainset.targets) == 2) | (np.array(trainset.targets) == 3))[0]
# client_3_idx = np.where((np.array(trainset.targets) == 4) | (np.array(trainset.targets) == 5))[0]
# client_4_idx = np.where((np.array(trainset.targets) == 6) | (np.array(trainset.targets) == 7))[0]
# client_5_idx = np.where((np.array(trainset.targets) == 8) | (np.array(trainset.targets) == 9))[0]

print(train1.data.shape)
train1_Set = torch.utils.data.Subset(trainset, train1)
test1_Set = torch.utils.data.Subset(testset, test1)


dataloader = torch.utils.data.DataLoader(train1_Set, batch_size=100, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test1_Set, batch_size=10, shuffle=True, num_workers=2)

# classDict = {'plane': 0, 'car': 1, 'bird': 2, 'cat': 3, 'deer': 4,
#              'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}

cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")
schedule_opt = {'schedule':'linear', 'n_timestep':2000, 'linear_start':1e-4, 'linear_end':0.05}

sr3 = SR3(device, img_size=img_size, LR_size=LR_size, loss_type='l1', 
            dataloader=dataloader, testloader=testloader, schedule_opt=schedule_opt, 
            save_path='./SR3.pt', load_path='./SR3.pt', load=False, inner_channel=96, 
            norm_groups=16, channel_mults=(1, 2, 2, 2), dropout=0.2, res_blocks=2, lr=1e-4, distributed=False)


Files already downloaded and verified
Files already downloaded and verified
(10000,)
Number of model parameters : 20743011


In [None]:
1e-1

0.1

In [None]:
#dataloader.dataset

<__main__.DatasetMaker at 0x7f3fb0f36940>

In [None]:
# gaussian noise added
sr3.train(epoch=10, verbose=1)
# Epoch: 1 / loss:62.339 / val_loss:0.464
# Epoch: 2 / loss:43.200 / val_loss:0.351
# Epoch: 3 / loss:37.751 / val_loss:0.304
# Epoch: 4 / loss:24.045 / val_loss:0.272
# Epoch: 5 / loss:17.456 / val_loss:0.113
# Epoch: 6 / loss:13.156 / val_loss:0.113
# Epoch: 7 / loss:12.487 / val_loss:0.563
# Epoch: 8 / loss:11.274 / val_loss:0.350
# Epoch: 9 / loss:12.383 / val_loss:0.070
# Epoch: 10 / loss:9.990 / val_loss:0.072

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:62.339 / val_loss:0.464
Epoch: 2 / loss:43.200 / val_loss:0.351
Epoch: 3 / loss:37.751 / val_loss:0.304
Epoch: 4 / loss:24.045 / val_loss:0.272
Epoch: 5 / loss:17.456 / val_loss:0.113
Epoch: 6 / loss:13.156 / val_loss:0.113
Epoch: 7 / loss:12.487 / val_loss:0.563
Epoch: 8 / loss:11.274 / val_loss:0.350
Epoch: 9 / loss:12.383 / val_loss:0.070
Epoch: 10 / loss:9.990 / val_loss:0.072


In [None]:
# gaussian noise added
sr3.train(epoch=10, verbose=1)
# Epoch: 1 / loss:9.171 / val_loss:0.056
# Epoch: 2 / loss:9.068 / val_loss:0.129
# Epoch: 3 / loss:8.605 / val_loss:0.060
# Epoch: 4 / loss:8.855 / val_loss:0.048
# Epoch: 5 / loss:7.183 / val_loss:0.047
# Epoch: 6 / loss:7.596 / val_loss:0.048
# Epoch: 7 / loss:9.189 / val_loss:0.040
# Epoch: 8 / loss:6.730 / val_loss:0.050
# Epoch: 9 / loss:6.930 / val_loss:0.042
# Epoch: 10 / loss:6.161 / val_loss:0.676

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:9.171 / val_loss:0.056
Epoch: 2 / loss:9.068 / val_loss:0.129
Epoch: 3 / loss:8.605 / val_loss:0.060
Epoch: 4 / loss:8.855 / val_loss:0.048
Epoch: 5 / loss:7.183 / val_loss:0.047
Epoch: 6 / loss:7.596 / val_loss:0.048
Epoch: 7 / loss:9.189 / val_loss:0.040
Epoch: 8 / loss:6.730 / val_loss:0.050
Epoch: 9 / loss:6.930 / val_loss:0.042
Epoch: 10 / loss:6.161 / val_loss:0.676


In [None]:
sr3.train(epoch=10, verbose=1)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:67.812 / val_loss:0.501
Epoch: 2 / loss:41.299 / val_loss:0.271
Epoch: 3 / loss:26.303 / val_loss:0.477
Epoch: 4 / loss:26.237 / val_loss:0.172
Epoch: 5 / loss:21.394 / val_loss:0.141
Epoch: 6 / loss:16.497 / val_loss:0.107
Epoch: 7 / loss:15.043 / val_loss:0.096
Epoch: 8 / loss:14.290 / val_loss:0.276
Epoch: 9 / loss:15.649 / val_loss:0.281
Epoch: 10 / loss:13.772 / val_loss:0.099


In [None]:
sr3.train(epoch=10, verbose=1)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:80.132 / val_loss:0.707
Epoch: 2 / loss:70.288 / val_loss:0.650
Epoch: 3 / loss:65.007 / val_loss:0.590
Epoch: 4 / loss:58.850 / val_loss:0.745
Epoch: 5 / loss:54.118 / val_loss:0.468
Epoch: 6 / loss:48.001 / val_loss:0.413
Epoch: 7 / loss:43.963 / val_loss:0.373
Epoch: 8 / loss:41.843 / val_loss:0.342
Epoch: 9 / loss:37.064 / val_loss:0.312
Epoch: 10 / loss:36.538 / val_loss:0.538


In [None]:
sr3.train(epoch=10, verbose=1)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:66.786 / val_loss:0.475
Epoch: 2 / loss:40.649 / val_loss:0.271
Epoch: 3 / loss:30.632 / val_loss:0.207
Epoch: 4 / loss:25.355 / val_loss:0.176
Epoch: 5 / loss:27.632 / val_loss:0.380
Epoch: 6 / loss:21.768 / val_loss:0.154
Epoch: 7 / loss:18.169 / val_loss:0.211
Epoch: 8 / loss:17.559 / val_loss:0.763
Epoch: 9 / loss:16.206 / val_loss:0.103
Epoch: 10 / loss:15.060 / val_loss:0.098


In [None]:
sr3.train(epoch=10, verbose=1)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:15.102 / val_loss:0.271
Epoch: 2 / loss:16.580 / val_loss:0.133
Epoch: 3 / loss:12.683 / val_loss:0.087
Epoch: 4 / loss:11.894 / val_loss:0.076
Epoch: 5 / loss:11.713 / val_loss:0.078
Epoch: 6 / loss:11.556 / val_loss:0.070
Epoch: 7 / loss:12.807 / val_loss:0.084
Epoch: 8 / loss:12.420 / val_loss:0.080
Epoch: 9 / loss:13.183 / val_loss:0.078
Epoch: 10 / loss:14.902 / val_loss:0.094


In [None]:
sr3.train(epoch=10, verbose=1)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:314.543 / val_loss:0.928
Epoch: 2 / loss:81.836 / val_loss:0.804
Epoch: 3 / loss:79.839 / val_loss:0.795
Epoch: 4 / loss:79.814 / val_loss:0.797
Epoch: 5 / loss:79.804 / val_loss:0.793
Epoch: 6 / loss:79.775 / val_loss:0.798
Epoch: 7 / loss:79.806 / val_loss:0.795
Epoch: 8 / loss:80.116 / val_loss:0.809
Epoch: 9 / loss:79.983 / val_loss:0.800
Epoch: 10 / loss:80.423 / val_loss:0.803


In [None]:
# two classes
sr3.train(epoch=10, verbose=1)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 / loss:65.676 / val_loss:0.489
Epoch: 2 / loss:45.809 / val_loss:0.378
Epoch: 3 / loss:37.824 / val_loss:0.295
Epoch: 4 / loss:21.237 / val_loss:0.113
Epoch: 5 / loss:13.945 / val_loss:0.091
Epoch: 6 / loss:12.647 / val_loss:0.091
Epoch: 7 / loss:12.292 / val_loss:0.077
Epoch: 8 / loss:13.431 / val_loss:0.081
Epoch: 9 / loss:13.600 / val_loss:0.100
Epoch: 10 / loss:9.848 / val_loss:0.275


In [None]:
sr3.train(epoch=10, verbose=1)