In [1]:
import timm

import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
from __future__ import print_function, division, absolute_import
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import os
import sys

from __future__ import print_function 
from __future__ import division
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import matplotlib.pyplot as plt
from PIL import Image
import time
import copy
import pandas as pd
from tqdm import tqdm, tqdm_notebook
from scipy import stats

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error 

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)



PyTorch Version:  1.12.1
Torchvision Version:  0.13.1


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from timm.models.layers import trunc_normal_, DropPath
from timm.models.registry import register_model

class Block(nn.Module):
    r""" ConvNeXt Block. There are two equivalent implementations:
    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
    We use (2) as we find it slightly faster in PyTorch
    
    Args:
        dim (int): Number of input channels.
        drop_path (float): Stochastic depth rate. Default: 0.0
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
    """
    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
        super().__init__()
        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
        self.norm = LayerNorm(dim, eps=1e-6)
        self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
        self.act = nn.GELU()
        self.pwconv2 = nn.Linear(4 * dim, dim)
        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), 
                                    requires_grad=True) if layer_scale_init_value > 0 else None
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()

    def forward(self, x):
        input = x
        x = self.dwconv(x)
        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
        x = self.norm(x)
        x = self.pwconv1(x)
        x = self.act(x)
        x = self.pwconv2(x)
        if self.gamma is not None:
            x = self.gamma * x
        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)

        x = input + self.drop_path(x)
        return x

class ConvNeXt(nn.Module):
    r""" ConvNeXt
        A PyTorch impl of : `A ConvNet for the 2020s`  -
          https://arxiv.org/pdf/2201.03545.pdf

    Args:
        in_chans (int): Number of input image channels. Default: 3
        num_classes (int): Number of classes for classification head. Default: 1000
        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
        drop_path_rate (float): Stochastic depth rate. Default: 0.
        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
    """
    def __init__(self, in_chans=3, num_classes=1000, 
                 depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0., 
                 layer_scale_init_value=1e-6, head_init_scale=1.,
                 ):
        super().__init__()

        self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
        stem = nn.Sequential(
            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
        )
        self.downsample_layers.append(stem)
        for i in range(3):
            downsample_layer = nn.Sequential(
                    LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
                    nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
            )
            self.downsample_layers.append(downsample_layer)

        self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
        dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 
        cur = 0
        for i in range(4):
            stage = nn.Sequential(
                *[Block(dim=dims[i], drop_path=dp_rates[cur + j], 
                layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
            )
            self.stages.append(stage)
            cur += depths[i]

        self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
        self.head = nn.Linear(dims[-1], num_classes)

        self.apply(self._init_weights)
        self.head.weight.data.mul_(head_init_scale)
        self.head.bias.data.mul_(head_init_scale)

    def _init_weights(self, m):
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            trunc_normal_(m.weight, std=.02)
            nn.init.constant_(m.bias, 0)

    def forward_features(self, x):
        for i in range(4):
            x = self.downsample_layers[i](x)
            x = self.stages[i](x)
        return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)

    def forward(self, x):
        x = self.forward_features(x)
#         x = self.head(x)
        return x

class LayerNorm(nn.Module):
    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 
    shape (batch_size, height, width, channels) while channels_first corresponds to inputs 
    with shape (batch_size, channels, height, width).
    """
    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(normalized_shape))
        self.bias = nn.Parameter(torch.zeros(normalized_shape))
        self.eps = eps
        self.data_format = data_format
        if self.data_format not in ["channels_last", "channels_first"]:
            raise NotImplementedError 
        self.normalized_shape = (normalized_shape, )
    
    def forward(self, x):
        if self.data_format == "channels_last":
            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        elif self.data_format == "channels_first":
            u = x.mean(1, keepdim=True)
            s = (x - u).pow(2).mean(1, keepdim=True)
            x = (x - u) / torch.sqrt(s + self.eps)
            x = self.weight[:, None, None] * x + self.bias[:, None, None]
            return x

In [3]:
from torchvision import datasets, transforms

from timm.data.constants import \
    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
from timm.data import create_transform


def build_transform(is_train, args):
    resize_im = args.input_size > 32
    imagenet_default_mean_and_std = args.imagenet_default_mean_and_std
    mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
    std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD

    if is_train:
        # this should always dispatch to transforms_imagenet_train
        transform = create_transform(
            input_size=args.input_size,
            is_training=True,
            color_jitter=args.color_jitter,
            auto_augment=args.aa,
            interpolation=args.train_interpolation,
            re_prob=args.reprob,
            re_mode=args.remode,
            re_count=args.recount,
            mean=mean,
            std=std,
        )
        if not resize_im:
            transform.transforms[0] = transforms.RandomCrop(
                args.input_size, padding=4)
        return transform

    t = []
    if resize_im:
        # warping (no cropping) when evaluated at 384 or larger
        if args.input_size >= 384:  
            t.append(
            transforms.Resize((args.input_size, args.input_size), 
                            interpolation=transforms.InterpolationMode.BICUBIC), 
        )
            print(f"Warping {args.input_size} size input images...")
        else:
            if args.crop_pct is None:
                args.crop_pct = 224 / 256
            size = int(args.input_size / args.crop_pct)
            t.append(
                # to maintain same ratio w.r.t. 224 images
                transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC),  
            )
            t.append(transforms.CenterCrop(args.input_size))

    t.append(transforms.ToTensor())
    t.append(transforms.Normalize(mean, std))
    return transforms.Compose(t)

In [4]:
import sys
sys.path.append("../CLIP/")

In [5]:
import torch.utils.data as data
import clip

class CustomDataSet(data.Dataset):
    def __init__(self, root, transform):
        self.root = root
        self.samples = []
        self.label = []
        cnt = [0 for i in range(5)]
        for i in range(5):
            self.samples.extend([root + str(i) + '/' + path_image for path_image in os.listdir(root + '/' + str(i))])
            self.label.extend([i for j in range(len(os.listdir(root + '/' +str(i))))])
            cnt[i] += len(os.listdir(root + '/' + str(i)))
        print(cnt)
        
        self.transform = transform

    def __getitem__(self, index):
        target = self.label[index]
        sample = Image.open(self.samples[index]).convert('RGB')
        if(self.transform):
            sample = self.transform(sample)
        
        feature = X_train[self.samples[index]]
            
        return sample, feature, target

    def __len__(self):
        length = len(self.samples)
        return length
    def get_labels(self):
        return self.label

In [6]:
Model, Preprocess = clip.load("ViT-B/32", device="cuda:1")

In [None]:
text = ["food", "drink", "dirty", "woman", "man", 
                "face", "messy", "untidy", "watermark", "blur", 
                "paper", "page", "person", "behavior", "road", 
                "house", "food", "drink", "woman", "man", 
                "body", "crowd", "vehicle", "poster", "food", 
                "drink", "sharpness", "natural scene", "organized", "sharpness", 
                "tidiness", "food", "drink", "natural scene", "garden, field", 
                "very organized", "sharpness", "tidiness", "food, drink", 
                "natural scene", "garden field"]

In [7]:
data_train = CustomDataSet('../Data/data70k/train/', False)
data_test = CustomDataSet('../Data/data70k/test/', False)

# data_train = CustomDataSet1('../Data/data70k/train/', [], isTrain = True)
# data_test = CustomDataSet1('../Data/data70k/test/', [], isTrain = False)

X_train, X_test = {}, {}

device = "cuda:1"

# text=["human",
#      "human body parts",
#      "nice picture",
#      "bad image",
#      "natural landscape",
#      "leftovers",
#      "delicious food",
#      "animal"]
text = torch.cat([clip.tokenize(f"{c}") for c in text]).to(device)

# image_features = Model.encode_image(inputs.unsqueeze(0).to(device))
with torch.no_grad():
    text_features = Model.encode_text(text)
text_features /= text_features.norm(dim=-1, keepdim=True)

[7977, 20927, 11643, 1418, 33]
[2660, 6976, 3882, 473, 11]


In [8]:
train = pd.read_csv("vector_train_naver2.csv")

In [9]:
X_train, X_test = {}, {}

for i in tqdm(range(train.shape[0])):
    X_train[train.loc[i]["Unnamed: 0"]] = np.array(train.loc[i][1:9]).astype(np.float64)

100%|█████████████████████████████████████| 56000/56000 [00:11<00:00, 4952.15it/s]


In [10]:
class DataModule(pl.LightningDataModule):

    def setup(self, stage):
        # transforms for images
        class args:
            input_size = 224
            imagenet_default_mean_and_std = True
            color_jitter = 0.4
            aa = "rand-m9-mstd0.5-inc1"
            train_interpolation = "bicubic"
            reprob = 0.25
            remode = "pixel"
            recount = 1
            crop_pct = None


        data_transforms = {
            'train': build_transform(is_train = True, args=args),
            'val': build_transform(is_train = False, args=args),
        }
        
        data_train = '../Data/data70k/train/'
        data_test = '../Data/data70k/test/'
        data_val = '../Data/data70k/valid/'
        
        self.batch_size = 128
        self.data_train = CustomDataSet(data_train, data_transforms['train'])
        self.data_val = CustomDataSet(data_test, data_transforms['val'])
                               
    def train_dataloader(self):
        return DataLoader(self.data_train, 
#                           sampler=ImbalancedDatasetSampler(self.data_train),
                          batch_size=self.batch_size, num_workers=8, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.data_val, batch_size=self.batch_size, num_workers=8)

In [11]:
class ConvNext(pl.LightningModule):
    def __init__(self, num_classes):
        super(ConvNext, self).__init__()
        self.training_step_outputs = np.array([])
        self.training_step_label = np.array([])
        self.validation_step_outputs = np.array([])
        self.validation_step_label = np.array([])
        
#         self.base_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")

#         for param in self.base_model.parameters():
#             param.requires_grad = False
        checkpoint = torch.load("./model_ckpt_1k_tiny/checkpoint-best.pth")
        
        self.base_model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], num_classes=5)
        self.base_model.load_state_dict(checkpoint["model"])
#         self.base_model = torch.nn.Sequential(*list(self.base.children())[:-1])
#         self.base.eval()
#         self.base_model
        for param in self.base_model.parameters():
            param.requires_grad = False
        
        self.convNext_clip = 776
        
        self.fc_au = nn.Sequential(
            # nn.Dropout(p = 0.4),
            nn.LayerNorm(self.convNext_clip),
            nn.Linear(self.convNext_clip, self.convNext_clip),
            nn.GELU(),
            nn.Dropout(p = 0.5),
        )
        
        self.reg_head = nn.Sequential(
            nn.LayerNorm(self.convNext_clip),
            nn.Linear(self.convNext_clip, 256),
            nn.GELU(),
            nn.Dropout(p = 0.3),
            nn.LayerNorm(256),
            nn.Linear(256, num_classes),
            nn.GELU(),
#             nn.Linear(128, 1),
#             nn.GELU(),
        )
        for param in self.reg_head.parameters():
            param.requires_grad = True
        
#     def MSEloss(self, outputs, labels):
#         return torch.nn.MSELoss()(outputs, labels)
    
    def MAEloss(self, outputs, labels):
        return nn.L1Loss()(outputs, labels)
    
    def MAE_3_4(self, outputs, labels):
        pred_scores_3_4 = []
        gt_scores_3_4 = []
        
        pred_scores_3 = []
        gt_scores_3 = []
        
        pred_scores_4 = []
        gt_scores_4 = []
        test_mae_3_4 = 0
        test_mse_3_4 = 0
        test_mae_3 = 0
        test_mse_3 = 0
        test_mae_4 = 0
        test_mse_4 = 0
        
        for idx, gt in enumerate(labels):
            if(gt == 3 or gt == 4):
                gt_scores_3_4.append(labels[idx])
                pred_scores_3_4.append(outputs[idx])
            if(gt == 3):
                gt_scores_3.append(labels[idx])
                pred_scores_3.append(outputs[idx])
            if(gt == 4):
                gt_scores_4.append(labels[idx])
                pred_scores_4.append(outputs[idx])
        
        if(len(pred_scores_3_4) != 0):
            test_mae_3_4 = mean_absolute_error(pred_scores_3_4, gt_scores_3_4)
            test_mse_3_4 = mean_squared_error(pred_scores_3_4, gt_scores_3_4)
        
        if(len(pred_scores_3) != 0):
            test_mae_3 = mean_absolute_error(pred_scores_3, gt_scores_3)
            test_mse_3 = mean_squared_error(pred_scores_3, gt_scores_3)
        
        if(len(pred_scores_4) != 0):
            test_mae_4 = mean_absolute_error(pred_scores_4, gt_scores_4)
            test_mse_4 = mean_squared_error(pred_scores_4, gt_scores_4)
        
        return test_mae_3_4, test_mse_3_4, test_mae_3, test_mse_3, test_mae_4, test_mse_4

    
    def forward(self, x, x_clip):        
        x1 = self.base_model(x)
#         print("x", x.size())

        c = torch.cat((x_clip.squeeze(dim=1), x1), dim=1)
#         print(c.size())

#         print(x.size())
        x = self.fc_au(c) + c
#         print(x.size())
        x = self.reg_head(x)
        x = F.log_softmax(x, dim=1)
        return x    
    
    def training_step(self, train_batch, batch_idx):
        x, x_clip, y = train_batch
        x_clip = x_clip.float()
#         x = x.float()
#         y = y.float()
    
    
        logits = self.forward(x, x_clip)
#         outputs = logits
        outputs = torch.argmax(logits, -1)
#         loss = self.MAEloss(logits, y)
        loss = F.nll_loss(logits, y)
        
        
        self.training_step_outputs = np.append(self.training_step_outputs, 
                                               outputs.detach().cpu().numpy()).flatten()
        self.training_step_label = np.append(self.training_step_label, 
                                               y.detach().cpu().numpy()).flatten()
        return loss
        
    
    def validation_step(self, val_batch, batch_idx):
        x, x_clip, y = val_batch
        x_clip = x_clip.float()
#         x = x.float()
#         y = y.float()
        
        logits = self.forward(x, x_clip)
#         outputs = logits
        outputs = torch.argmax(logits, -1)
#         loss = self.MAEloss(logits, y)
        loss = F.nll_loss(logits, y)
        
        self.validation_step_outputs = np.append(self.validation_step_outputs, 
                                               outputs.detach().cpu().numpy()).flatten()
        self.validation_step_label = np.append(self.validation_step_label, 
                                               y.detach().cpu().numpy()).flatten()
    
        
    def training_epoch_end(self, training_step_outputs):
        if(self.training_step_outputs.shape[0] != 0):
            mse_loss = mean_squared_error(self.training_step_outputs, self.training_step_label)
            mae_loss = mean_absolute_error(self.training_step_outputs, self.training_step_label)

            print('training loss mse', mse_loss)
            print('training loss mae', mae_loss)
            self.training_step_outputs = np.array([])  # free memory
            self.training_step_label = np.array([])  # free memory
        
    def validation_epoch_end(self, validation_step_outputs):
        if(self.validation_step_outputs.shape[0] != 0):
            mse_loss = mean_squared_error(self.validation_step_outputs, self.validation_step_label)
            mae_loss = mean_absolute_error(self.validation_step_outputs, self.validation_step_label)
            
            test_mae_3_4, test_mse_3_4, test_mae_3, \
            test_mse_3, test_mae_4, test_mse_4 = self.MAE_3_4(self.validation_step_outputs, 
                                                         self.validation_step_label)
            score = [test_mae_3_4, test_mse_3_4, test_mae_3, test_mse_3, test_mae_4, test_mse_4]

            label = ["test_mae_3_4", 
                     "test_mse_3_4", 
                     "test_mae_3", 
                     "test_mse_3", 
                     "test_mae_4", 
                     "test_mse_4"]

            for a,b in zip(label, score):
                print(a, b)

            print('validation loss mse', mse_loss)
            print('validation loss mae', mae_loss)
            
            df = pd.DataFrame(self.validation_step_outputs, columns=['float'])
            df.to_csv("./validation.csv", index=False)
            
            self.validation_step_outputs = np.array([])  # free memory
            self.validation_step_label = np.array([])  # free memory
        
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-2)
        return optimizer

In [12]:
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
save_dir = './'
data_module = DataModule()
model = ConvNext(num_classes=5)

checkpoint_callback = ModelCheckpoint(dirpath=save_dir, save_weights_only=True)

trainer = pl.Trainer(accelerator="gpu", devices=[2], max_epochs=20)
# trainer = pl.Trainer(accelerator="cpu", max_epochs=10)
trainer.fit(model, data_module)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


[7977, 20927, 11643, 1418, 33]
[2660, 6976, 3882, 473, 11]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name       | Type       | Params
------------------------------------------
0 | base_model | ConvNeXt   | 27.8 M
1 | fc_au      | Sequential | 604 K 
2 | reg_head   | Sequential | 234 K 
------------------------------------------
28.7 M    Trainable params
0         Non-trainable params
28.7 M    Total params
114.650   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

test_mae_3_4 0
test_mse_3_4 0
test_mae_3 0
test_mse_3 0
test_mae_4 0
test_mse_4 0
validation loss mse 0.007547393281927215
validation loss mae 0.06994104675322887


  return F.l1_loss(input, target, reduction=self.reduction)


Training: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 1.986750062339562
test_mse_3_4 3.969386542613895
test_mae_3 1.964022795443303
test_mse_3 3.857385541023857
test_mae_4 2.964022538878701
test_mse_4 8.785429610985519
validation loss mse 0.5993970000021884
validation loss mae 0.5506789675271098
training loss mse 0.6454377996735388
training loss mae 0.5724767986400167


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 1.9755366602219826
test_mse_3_4 3.924955830347901
test_mae_3 1.952809392271788
test_mse_3 3.813464522547124
test_mae_4 2.9528091820803555
test_mse_4 8.71908206578131
validation loss mse 0.5968042562795153
validation loss mae 0.5548993377936873
training loss mse 0.6073928597837984
training loss mae 0.5455547407318441


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0127400492341065
test_mse_3_4 4.073333248081294
test_mae_3 1.9900127772801017
test_mse_3 3.9601508537381784
test_mae_4 2.9900127432563086
test_mse_4 8.94017620483528
validation loss mse 0.6063729477816016
validation loss mae 0.5408966439125487
training loss mse 0.6080986445438029
training loss mae 0.5414372305270234


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0211305344892927
test_mse_3_4 4.107179381011557
test_mae_3 1.9984032618823062
test_mse_3 3.9936155971018747
test_mae_4 2.9984032565897163
test_mse_4 8.990422089127863
validation loss mse 0.6089135342535661
validation loss mae 0.5377385911473613
training loss mse 0.6086846992207606
training loss mae 0.5394180403762706


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0251623608849267
test_mse_3_4 4.123493331746662
test_mae_3 2.002435088157654
test_mse_3 4.009746282284951
test_mae_4 3.002435088157654
test_mse_4 9.014616458600258
validation loss mse 0.6101844225889157
validation loss mae 0.538647438607681
training loss mse 0.608901952441439
training loss mae 0.5383868249585342


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.021141669966958
test_mse_3_4 4.107224393878476
test_mae_3 1.998414397239685
test_mse_3 3.9936601030948538
test_mae_4 2.998414397239685
test_mse_4 8.990488897574224
validation loss mse 0.6089169675462759
validation loss mae 0.5377343327058177
training loss mse 0.6090654877346343
training loss mae 0.5379898447148306


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0206400372765283
test_mse_3_4 4.105196904046542
test_mae_3 1.9979127645492554
test_mse_3 3.9916554147488483
test_mae_4 2.9979127645492554
test_mse_4 8.987480943847359
validation loss mse 0.6087611048410586
validation loss mae 0.5379231346111573
training loss mse 0.6089959995774309
training loss mae 0.5381919276181264


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0206240632317285
test_mse_3_4 4.105132348712752
test_mae_3 1.9978967905044556
test_mse_3 3.9915915855080044
test_mae_4 2.9978967905044556
test_mse_4 8.987385166516916
validation loss mse 0.608756149800753
validation loss mae 0.5379291468391315
training loss mse 0.6090298080184803
training loss mae 0.5379139073088406


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.019583962180398
test_mse_3_4 4.1009301240979275
test_mae_3 1.996856689453125
test_mse_3 3.987436638213694
test_mae_4 2.996856689453125
test_mse_4 8.981150017119944
validation loss mse 0.6084346159370215
validation loss mae 0.5383206146680497
training loss mse 0.6090625443060319
training loss mae 0.5378076734216538


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0208095528862695
test_mse_3_4 4.105881992838056
test_mae_3 1.9980822801589966
test_mse_3 3.9923327982853745
test_mae_4 2.9980822801589966
test_mse_4 8.988497358603368
validation loss mse 0.6088137188763493
validation loss mae 0.5378593332068339
training loss mse 0.6089778461479868
training loss mae 0.5379267096721


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0197897174141626
test_mse_3_4 4.101761246373634
test_mae_3 1.9970624446868896
test_mse_3 3.988258407978776
test_mae_4 2.9970624446868896
test_mse_4 8.982383297352555
validation loss mse 0.6084980508503927
validation loss mae 0.538243173582352
training loss mse 0.6090195524709873
training loss mae 0.5379455453103779


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0201965787193994
test_mse_3_4 4.103404960471218
test_mae_3 1.9974693059921265
test_mse_3 3.9898836283806673
test_mae_4 2.9974693059921265
test_mse_4 8.98482224036492
validation loss mse 0.6086237365773765
validation loss mae 0.5380900412385011
training loss mse 0.6090139554241634
training loss mae 0.5379105963373849


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.024895034053109
test_mse_3_4 4.122410642734592
test_mae_3 2.002167761325836
test_mse_3 4.008675744492509
test_mae_4 3.002167761325836
test_mse_4 9.013011267144181
validation loss mse 0.6100991489360826
validation loss mae 0.5384816814619989
training loss mse 0.6090088160027008
training loss mae 0.5379075740208438


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0211845853111963
test_mse_3_4 4.107397871701244
test_mae_3 1.9984573125839233
test_mse_3 3.993831630220157
test_mae_4 2.9984573125839233
test_mse_4 8.990746255388004
validation loss mse 0.6089303251775955
validation loss mae 0.5377181804515586
training loss mse 0.6090370064481071
training loss mae 0.5378897007397058


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.020923040129922
test_mse_3_4 4.106340677929618
test_mae_3 1.998195767402649
test_mse_3 3.992786324865861
test_mae_4 2.998195767402649
test_mse_4 8.989177859671159
validation loss mse 0.6088489750171895
validation loss mae 0.5378166194677931
training loss mse 0.6090297141894092
training loss mae 0.5378688623737259


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.020790360190652
test_mse_3_4 4.105804423641117
test_mae_3 1.998063087463379
test_mse_3 3.99225610148369
test_mae_4 2.998063087463379
test_mse_4 8.988382276410448
validation loss mse 0.6088077589872517
validation loss mae 0.5378665568538775
training loss mse 0.6090797407205901
training loss mae 0.5378263631706164


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.023805699565194
test_mse_3_4 4.118000253394214
test_mae_3 2.001078426837921
test_mse_3 4.0043148703561275
test_mae_4 3.001078426837921
test_mse_4 9.00647172403197
validation loss mse 0.6097531437886562
validation loss mae 0.5378062349526376
training loss mse 0.6090656948919702
training loss mae 0.537932361209442


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.024211964823983
test_mse_3_4 4.119644822338221
test_mae_3 2.00148469209671
test_mse_3 4.005940972697461
test_mae_4 3.00148469209671
test_mse_4 9.008910356890881
validation loss mse 0.60988190825361
validation loss mae 0.5380581414643364
training loss mse 0.6090589397711532
training loss mae 0.5378546777838916


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.020243070342324
test_mse_3_4 4.103592807067833
test_mae_3 1.9975157976150513
test_mse_3 3.990069361721694
test_mae_4 2.9975157976150513
test_mse_4 8.985100956951797
validation loss mse 0.6086381196332545
validation loss mae 0.5380725429630538
training loss mse 0.6090118039745465
training loss mae 0.5378536814433903


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Validation: 0it [00:00, ?it/s]

  return F.l1_loss(input, target, reduction=self.reduction)


test_mae_3_4 2.0218575217507104
test_mse_3_4 4.110118582061577
test_mae_3 1.9991302490234375
test_mse_3 3.9965217525605112
test_mae_4 2.9991302490234375
test_mse_4 8.994782250607386
validation loss mse 0.609140261980343
validation loss mae 0.5374649041313015
training loss mse 0.6090244031356338
training loss mae 0.5379098813300122


`Trainer.fit` stopped: `max_epochs=20` reached.
