In [1]:
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
import seaborn as sns

from sklearn import preprocessing
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
from torch.autograd import Function
import torch.optim as optim

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.metrics.functional import classification

import cv2
import geffnet
import math

from sklearn.manifold import TSNE
from scipy.spatial import ConvexHull
import inspect

import warnings
warnings.filterwarnings('ignore')

#model save
import pickle
from pickle import dump, load

import tidalUtl.PrpUtl as prp
import tidalUtl.EdaUtl as eda

In [2]:
#import sys
#sys.path.append('../input/iterative-stratification/iterative-stratification-master')
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

https://www.kaggle.com/tidalryoku/new-baseline-pytorch-moa/

# Version

__ver1__<br>
baseline：CV:0.01465 LB:0.01874<br>
__ver2__<br>
Hyperopt, 2Layer：CV:0.01460 LB:0.01869<br>
__ver3__<br>
3Layer：CV:0.01464 LB:0.01868<br>
__ver4__<br>
MLSMOTE baseline：CV:0.01476 LB:0.01978<br>
__ver5__<br>
2Layer,refactoring：CV:0.01476 LB:0.01869<br>
__ver6__<br>
rankGauss：CV:0.01456 LB:0.01865<br>
__ver7__<br>
labelSmoothing：CV:0.01502 LB:0.01859<br>

# Config

In [3]:
INPUT = "/home/tidal/ML_Data/MoA/lish-moa"
OUTPUT = "/home/tidal/ML_Data/MoA/output"
#INPUT = "/Users/hfuis/ML_Data/MoA/lish-moa"
#OUTPUT = "/Users/hfuis/ML_Data/MoA/output"

SUBMIT = OUTPUT + "/submittion/"
SAVEMODEL = OUTPUT + "/model/pytorchLightning_Efficientnet"

SAVEDEEPINSIGHT = OUTPUT + "/DeepInsightModel/"
SAVELOGSCALE = OUTPUT + "/LogScaler/"

In [4]:
#Loading
trainFeature = pd.read_csv(INPUT + '/train_features.csv')
testFeature = pd.read_csv(INPUT + '/test_features.csv')
trainTargetScored = pd.read_csv(INPUT + '/train_targets_scored.csv')
sample_submission = pd.read_csv(INPUT + '/sample_submission.csv')
drug = pd.read_csv(INPUT + '/train_drug.csv')

In [5]:
GENES = [col for col in trainFeature.columns if col.startswith('g-')] #gから始まる列名のセット
CELLS = [col for col in trainFeature.columns if col.startswith('c-')] #cから始まる列名のセット
category_features = ["cp_type", "cp_dose"]
numeric_features = [c for c in trainFeature.columns if c != "sig_id" and c not in category_features]
all_features = category_features + numeric_features

#efficientnet
model_type = "b3"
pretrained_model = f"tf_efficientnet_{model_type}_ns"
#experiment_name = f"deepinsight_efficientnet_v4_{model_type}"
num_workers = 2
gpus = [0]

if model_type == "b0":
    batch_size =  128#128
    infer_batch_size =  128#256
    image_size = 224  # B0
    drop_rate = 0.2  # B0
    resolution = 224
elif model_type == "b3":
    batch_size = 32
    infer_batch_size = 64
    image_size = 300  # B3
    drop_rate = 0.3  # B3
    resolution = 300
elif model_type == "b5":
    batch_size = 8
    infer_batch_size = 16
    image_size = 456  # B5
    drop_rate = 0.4  # B5
    resolution = 456
elif model_type == "b7":
    batch_size = 2
    infer_batch_size = 4
    # image_size = 800  # B7
    image_size = 772  # B7
    drop_rate = 0.5  # B7
    resolution = 772
    
# DeepInsight Transform
perplexity = 5

drop_connect_rate = 0.2
fc_size = 512

# Swap Noise
swap_prob = 0.15
swap_portion = 0.1

SAVEOOF = f"{OUTPUT}/OOF/Efficientnet{model_type}/"

In [6]:
#Seed固定
def seed_everything(seed=42):
    #data取得についてのランダム性固定
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    #cudnnによる演算の安定化(評価値の安定)
    torch.backends.cudnn.deterministic = True
    
    #os.environ['PYTHONHASHSEED'] = str(seed)
    
seed_everything(seed=42)

In [7]:
#HyperParameter
param_space = {'hidden_size1': 512, 
               'hidden_size2': 512, 
               'dropOutRate1': 0.20393004966355735, 
               'dropOutRate2': 0.39170486751620137,
               'rankGauss_n_quantiles': 488.0393350201078,
               'leakyReluSlope': 0.01973893854348531,
              }

# Preprocessing

## Category Feature Encoding

In [8]:
def categoryFeatureEnc(trainFeature, testFeature):
    train = trainFeature.copy()
    test = testFeature.copy()
    for df in [train, test]:
        df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1})
        df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
        df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1})
    
    return train, test

## Log scaler

In [9]:
# Modified from DeepInsight Transform
# https://github.com/alok-ai-lab/DeepInsight/blob/master/pyDeepInsight/image_transformer.py


class LogScaler:
    """Log normalize and scale data

    Log normalization and scaling procedure as described as norm-2 in the
    DeepInsight paper supplementary information.
    
    Note: The dimensions of input matrix is (N samples, d features)
    """
    def __init__(self):
        self._min0 = None
        self._max = None

    """
    Use this as a preprocessing step in inference mode.
    """
    def fit(self, X, y=None):
        # Min. of training set per feature
        self._min0 = X.min(axis=0)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Global max. of training set from X_norm
        self._max = X_norm.max()

    """
    For training set only.
    """
    def fit_transform(self, X, y=None):
        # Min. of training set per feature
        self._min0 = X.min(axis=0)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Global max. of training set from X_norm
        self._max = X_norm.max()

        # Normalized again by global max. of training set
        return (X_norm / self._max).clip(0, 1)

    """
    For validation and test set only.
    """
    def transform(self, X, y=None):
        # Adjust min. of each feature of X by _min0
        for i in range(X.shape[1]):
            X[:, i] = X[:, i].clip(min=self._min0[i], max=None)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Normalized again by global max. of training set
        return (X_norm / self._max).clip(0, 1)

## DeepInsight Transform

### テーブルデータをCNN用の画像に変換する

In [10]:
# Modified from DeepInsight Transform
# https://github.com/alok-ai-lab/DeepInsight/blob/master/pyDeepInsight/image_transformer.py

#詳細な説明は以下
# https://www.kaggle.com/markpeng/deepinsight-transforming-non-image-data-to-images

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import TSNE
from scipy.spatial import ConvexHull
from matplotlib import pyplot as plt
import inspect


class DeepInsightTransformer:
    """Transform features to an image matrix using dimensionality reduction

    This class takes in data normalized between 0 and 1 and converts it to a
    CNN compatible 'image' matrix

    """
    def __init__(self,
                 feature_extractor='tsne',
                 perplexity=30,
                 pixels=100,
                 random_state=None,
                 n_jobs=None):
        """Generate an ImageTransformer instance

        Args:
            feature_extractor: string of value ('tsne', 'pca', 'kpca') or a
                class instance with method `fit_transform` that returns a
                2-dimensional array of extracted features.
            pixels: int (square matrix) or tuple of ints (height, width) that
                defines the size of the image matrix.
            random_state: int or RandomState. Determines the random number
                generator, if present, of a string defined feature_extractor.
            n_jobs: The number of parallel jobs to run for a string defined
                feature_extractor.
        """
        self.random_state = random_state
        self.n_jobs = n_jobs

        if isinstance(feature_extractor, str):
            fe = feature_extractor.casefold()
            if fe == 'tsne_exact'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='exact',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'tsne'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='barnes_hut',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'pca'.casefold():
                fe = PCA(n_components=2, random_state=self.random_state)
            elif fe == 'kpca'.casefold():
                fe = KernelPCA(n_components=2,
                               kernel='rbf',
                               random_state=self.random_state,
                               n_jobs=self.n_jobs)
            else:
                raise ValueError(("Feature extraction method '{}' not accepted"
                                  ).format(feature_extractor))
            self._fe = fe
        elif hasattr(feature_extractor, 'fit_transform') and \
                inspect.ismethod(feature_extractor.fit_transform):
            self._fe = feature_extractor
        else:
            raise TypeError('Parameter feature_extractor is not a '
                            'string nor has method "fit_transform"')

        if isinstance(pixels, int):
            pixels = (pixels, pixels)

        # The resolution of transformed image
        self._pixels = pixels
        self._xrot = None

    def fit(self, X, y=None, plot=False):
        """Train the image transformer from the training set (X)

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
            y: Ignored. Present for continuity with scikit-learn
            plot: boolean of whether to produce a scatter plot showing the
                feature reduction, hull points, and minimum bounding rectangle

        Returns:
            self: object
        """
        # Transpose to get (n_features, n_samples)
        X = X.T

        # Perform dimensionality reduction
        x_new = self._fe.fit_transform(X)

        # Get the convex hull for the points
        chvertices = ConvexHull(x_new).vertices
        hull_points = x_new[chvertices]

        # Determine the minimum bounding rectangle
        mbr, mbr_rot = self._minimum_bounding_rectangle(hull_points)

        # Rotate the matrix
        # Save the rotated matrix in case user wants to change the pixel size
        self._xrot = np.dot(mbr_rot, x_new.T).T

        # Determine feature coordinates based on pixel dimension
        self._calculate_coords()

        # plot rotation diagram if requested
        if plot is True:
            # Create subplots
            fig, ax = plt.subplots(1, 1, figsize=(10, 7), squeeze=False)
            ax[0, 0].scatter(x_new[:, 0],
                             x_new[:, 1],
                             cmap=plt.cm.get_cmap("jet", 10),
                             marker="x",
                             alpha=1.0)
            ax[0, 0].fill(x_new[chvertices, 0],
                          x_new[chvertices, 1],
                          edgecolor='r',
                          fill=False)
            ax[0, 0].fill(mbr[:, 0], mbr[:, 1], edgecolor='g', fill=False)
            plt.gca().set_aspect('equal', adjustable='box')
            plt.show()
        return self

    @property
    def pixels(self):
        """The image matrix dimensions

        Returns:
            tuple: the image matrix dimensions (height, width)

        """
        return self._pixels

    @pixels.setter
    def pixels(self, pixels):
        """Set the image matrix dimension

        Args:
            pixels: int or tuple with the dimensions (height, width)
            of the image matrix

        """
        if isinstance(pixels, int):
            pixels = (pixels, pixels)
        self._pixels = pixels
        # recalculate coordinates if already fit
        if hasattr(self, '_coords'):
            self._calculate_coords()

    def _calculate_coords(self):
        """Calculate the matrix coordinates of each feature based on the
        pixel dimensions.
        """
        ax0_coord = np.digitize(self._xrot[:, 0],
                                bins=np.linspace(min(self._xrot[:, 0]),
                                                 max(self._xrot[:, 0]),
                                                 self._pixels[0])) - 1
        ax1_coord = np.digitize(self._xrot[:, 1],
                                bins=np.linspace(min(self._xrot[:, 1]),
                                                 max(self._xrot[:, 1]),
                                                 self._pixels[1])) - 1
        self._coords = np.stack((ax0_coord, ax1_coord))

    def transform(self, X, empty_value=0):
        """Transform the input matrix into image matrices

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
                where n_features matches the training set.
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """

        # Group by location (x1, y1) of each feature
        # Tranpose to get (n_features, n_samples)
        img_coords = pd.DataFrame(np.vstack(
            (self._coords, X.clip(0, 1))).T).groupby(
                [0, 1],  # (x1, y1)
                as_index=False).mean()

        img_matrices = []
        blank_mat = np.zeros(self._pixels)
        if empty_value != 0:
            blank_mat[:] = empty_value
        for z in range(2, img_coords.shape[1]):
            img_matrix = blank_mat.copy()
            img_matrix[img_coords[0].astype(int),
                       img_coords[1].astype(int)] = img_coords[z]
            img_matrices.append(img_matrix)

        return img_matrices

    def fit_transform(self, X, empty_value=0):
        """Train the image transformer from the training set (X) and return
        the transformed data.

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """
        self.fit(X)
        return self.transform(X, empty_value=empty_value)

    def feature_density_matrix(self):
        """Generate image matrix with feature counts per pixel

        Returns:
            img_matrix (ndarray): matrix with feature counts per pixel
        """
        fdmat = np.zeros(self._pixels)
        # Group by location (x1, y1) of each feature
        # Tranpose to get (n_features, n_samples)
        coord_cnt = (
            pd.DataFrame(self._coords.T).assign(count=1).groupby(
                [0, 1],  # (x1, y1)
                as_index=False).count())
        fdmat[coord_cnt[0].astype(int),
              coord_cnt[1].astype(int)] = coord_cnt['count']
        return fdmat

    @staticmethod
    def _minimum_bounding_rectangle(hull_points):
        """Find the smallest bounding rectangle for a set of points.

        Modified from JesseBuesking at https://stackoverflow.com/a/33619018
        Returns a set of points representing the corners of the bounding box.

        Args:
            hull_points : an nx2 matrix of hull coordinates

        Returns:
            (tuple): tuple containing
                coords (ndarray): coordinates of the corners of the rectangle
                rotmat (ndarray): rotation matrix to align edges of rectangle
                    to x and y
        """

        pi2 = np.pi / 2.

        # Calculate edge angles
        edges = hull_points[1:] - hull_points[:-1]
        angles = np.arctan2(edges[:, 1], edges[:, 0])
        angles = np.abs(np.mod(angles, pi2))
        angles = np.unique(angles)

        # Find rotation matrices
        rotations = np.vstack([
            np.cos(angles),
            np.cos(angles - pi2),
            np.cos(angles + pi2),
            np.cos(angles)
        ]).T
        rotations = rotations.reshape((-1, 2, 2))

        # Apply rotations to the hull
        rot_points = np.dot(rotations, hull_points.T)

        # Find the bounding points
        min_x = np.nanmin(rot_points[:, 0], axis=1)
        max_x = np.nanmax(rot_points[:, 0], axis=1)
        min_y = np.nanmin(rot_points[:, 1], axis=1)
        max_y = np.nanmax(rot_points[:, 1], axis=1)

        # Find the box with the best area
        areas = (max_x - min_x) * (max_y - min_y)
        best_idx = np.argmin(areas)

        # Return the best box
        x1 = max_x[best_idx]
        x2 = min_x[best_idx]
        y1 = max_y[best_idx]
        y2 = min_y[best_idx]
        rotmat = rotations[best_idx]

        # Generate coordinates
        coords = np.zeros((4, 2))
        coords[0] = np.dot([x1, y2], rotmat)
        coords[1] = np.dot([x2, y2], rotmat)
        coords[2] = np.dot([x2, y1], rotmat)
        coords[3] = np.dot([x1, y1], rotmat)

        return coords, rotmat

## Collecting

__train,testにターゲット値も連結__

In [11]:
def Collecting(trainFeature, testFeature, trainTargetScored):
    #Pkey(sig_id)でfeatureとtargetを内部結合。
    train = trainFeature.merge(trainTargetScored, on='sig_id')
    test = testFeature.merge(sample_submission, on='sig_id')
    
    return train, test

## Preprocessing Summary

In [12]:
def preprocessing(param, trainFeature, testFeature, trainTargetScored):
    
    train, test = categoryFeatureEnc(trainFeature, testFeature)
    
    train, test = Collecting(train, test, trainTargetScored)
    
    return train, test

## Work

## Visualization

In [13]:
%%time
trainVsl, testVsl = preprocessing(param_space, trainFeature, testFeature, trainTargetScored)

CPU times: user 53.4 ms, sys: 44 ms, total: 97.4 ms
Wall time: 96.7 ms


In [14]:
trainVsl

Unnamed: 0,sig_id,cp_type,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_000644bb2,1,0.0,0,1.0620,0.5577,-0.2479,-0.6208,-0.1944,-1.0120,...,0,0,0,0,0,0,0,0,0,0
1,id_000779bfc,1,1.0,0,0.0743,0.4087,0.2991,0.0604,1.0190,0.5207,...,0,0,0,0,0,0,0,0,0,0
2,id_000a6266a,1,0.5,0,0.6280,0.5817,1.5540,-0.0764,-0.0323,1.2390,...,0,0,0,0,0,0,0,0,0,0
3,id_0015fd391,1,0.5,0,-0.5138,-0.2491,-0.2656,0.5288,4.0620,-0.8095,...,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,1,1.0,1,-0.3254,-0.4009,0.9700,0.6919,1.4180,-0.8244,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23809,id_fffb1ceed,1,0.0,1,0.1394,-0.0636,-0.1112,-0.5080,-0.4713,0.7201,...,0,0,0,0,0,0,0,0,0,0
23810,id_fffb70c0c,1,0.0,1,-1.3260,0.3478,-0.3743,0.9905,-0.7178,0.6621,...,0,0,0,0,0,0,0,0,0,0
23811,id_fffc1c3f4,0,0.5,1,0.3942,0.3756,0.3109,-0.7389,0.5505,-0.0159,...,0,0,0,0,0,0,0,0,0,0
23812,id_fffcb9e7c,1,0.0,0,0.6660,0.2324,0.4392,0.2044,0.8531,-0.0343,...,0,0,0,0,0,0,0,0,0,0


In [15]:
testVsl

Unnamed: 0,sig_id,cp_type,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,1,0.0,0,-0.5458,0.1306,-0.5135,0.4408,1.5500,-0.1644,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
1,id_001897cda,1,1.0,0,-0.1829,0.2320,1.2080,-0.4522,-0.3652,-0.3319,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
2,id_002429b5b,0,0.0,0,0.1852,-0.1404,-0.3911,0.1310,-1.4380,0.2455,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
3,id_00276f245,1,0.0,1,0.4828,0.1955,0.3825,0.4244,-0.5855,-1.2020,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
4,id_0027f1083,1,0.5,0,-0.3979,-1.2680,1.9130,0.2057,-0.5864,-0.0166,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,id_ff7004b87,1,0.0,0,0.4571,-0.5743,3.3930,-0.6202,0.8557,1.6240,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
3978,id_ff925dd0d,1,0.0,0,-0.5885,-0.2548,2.5850,0.3456,0.4401,0.3107,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
3979,id_ffb710450,1,1.0,0,-0.3985,-0.1554,0.2677,-0.6813,0.0152,0.4791,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
3980,id_ffbb869f2,1,0.5,1,-1.0960,-1.7750,-0.3977,1.0160,-1.3350,-0.2207,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5


In [16]:
trainFeature

Unnamed: 0,sig_id,cp_type,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,id_000644bb2,trt_cp,24,D1,1.0620,0.5577,-0.2479,-0.6208,-0.1944,-1.0120,...,0.2862,0.2584,0.8076,0.5523,-0.1912,0.6584,-0.3981,0.2139,0.3801,0.4176
1,id_000779bfc,trt_cp,72,D1,0.0743,0.4087,0.2991,0.0604,1.0190,0.5207,...,-0.4265,0.7543,0.4708,0.0230,0.2957,0.4899,0.1522,0.1241,0.6077,0.7371
2,id_000a6266a,trt_cp,48,D1,0.6280,0.5817,1.5540,-0.0764,-0.0323,1.2390,...,-0.7250,-0.6297,0.6103,0.0223,-1.3240,-0.3174,-0.6417,-0.2187,-1.4080,0.6931
3,id_0015fd391,trt_cp,48,D1,-0.5138,-0.2491,-0.2656,0.5288,4.0620,-0.8095,...,-2.0990,-0.6441,-5.6300,-1.3780,-0.8632,-1.2880,-1.6210,-0.8784,-0.3876,-0.8154
4,id_001626bd3,trt_cp,72,D2,-0.3254,-0.4009,0.9700,0.6919,1.4180,-0.8244,...,0.0042,0.0048,0.6670,1.0690,0.5523,-0.3031,0.1094,0.2885,-0.3786,0.7125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23809,id_fffb1ceed,trt_cp,24,D2,0.1394,-0.0636,-0.1112,-0.5080,-0.4713,0.7201,...,0.1969,0.0262,-0.8121,0.3434,0.5372,-0.3246,0.0631,0.9171,0.5258,0.4680
23810,id_fffb70c0c,trt_cp,24,D2,-1.3260,0.3478,-0.3743,0.9905,-0.7178,0.6621,...,0.4286,0.4426,0.0423,-0.3195,-0.8086,-0.9798,-0.2084,-0.1224,-0.2715,0.3689
23811,id_fffc1c3f4,ctl_vehicle,48,D2,0.3942,0.3756,0.3109,-0.7389,0.5505,-0.0159,...,0.5409,0.3755,0.7343,0.2807,0.4116,0.6422,0.2256,0.7592,0.6656,0.3808
23812,id_fffcb9e7c,trt_cp,24,D1,0.6660,0.2324,0.4392,0.2044,0.8531,-0.0343,...,-0.1105,0.4258,-0.2012,0.1506,1.5230,0.7101,0.1732,0.7015,-0.6290,0.0740


In [17]:
testFeature

Unnamed: 0,sig_id,cp_type,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,...,c-90,c-91,c-92,c-93,c-94,c-95,c-96,c-97,c-98,c-99
0,id_0004d9e33,trt_cp,24,D1,-0.5458,0.1306,-0.5135,0.4408,1.5500,-0.1644,...,0.0981,0.7978,-0.1430,-0.2067,-0.2303,-0.1193,0.0210,-0.0502,0.1510,-0.7750
1,id_001897cda,trt_cp,72,D1,-0.1829,0.2320,1.2080,-0.4522,-0.3652,-0.3319,...,-0.1190,-0.1852,-1.0310,-1.3670,-0.3690,-0.5382,0.0359,-0.4764,-1.3810,-0.7300
2,id_002429b5b,ctl_vehicle,24,D1,0.1852,-0.1404,-0.3911,0.1310,-1.4380,0.2455,...,-0.2261,0.3370,-1.3840,0.8604,-1.9530,-1.0140,0.8662,1.0160,0.4924,-0.1942
3,id_00276f245,trt_cp,24,D2,0.4828,0.1955,0.3825,0.4244,-0.5855,-1.2020,...,0.1260,0.1570,-0.1784,-1.1200,-0.4325,-0.9005,0.8131,-0.1305,0.5645,-0.5809
4,id_0027f1083,trt_cp,48,D1,-0.3979,-1.2680,1.9130,0.2057,-0.5864,-0.0166,...,0.4965,0.7578,-0.1580,1.0510,0.5742,1.0900,-0.2962,-0.5313,0.9931,1.8380
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977,id_ff7004b87,trt_cp,24,D1,0.4571,-0.5743,3.3930,-0.6202,0.8557,1.6240,...,-1.1790,-0.6422,-0.4367,0.0159,-0.6539,-0.4791,-1.2680,-1.1280,-0.4167,-0.6600
3978,id_ff925dd0d,trt_cp,24,D1,-0.5885,-0.2548,2.5850,0.3456,0.4401,0.3107,...,0.0210,0.5780,-0.5888,0.8057,0.9312,1.2730,0.2614,-0.2790,-0.0131,-0.0934
3979,id_ffb710450,trt_cp,72,D1,-0.3985,-0.1554,0.2677,-0.6813,0.0152,0.4791,...,0.4418,0.9153,-0.1862,0.4049,0.9568,0.4666,0.0461,0.5888,-0.4205,-0.1504
3980,id_ffbb869f2,trt_cp,48,D2,-1.0960,-1.7750,-0.3977,1.0160,-1.3350,-0.2207,...,0.3079,-0.4473,-0.8192,0.7785,0.3133,0.1286,-0.2618,0.5074,0.7430,-0.0484


# Fitting

## Config about Fitting

In [18]:
#configは辞書化しておく。
def Config_about_Fitting(train, test, target, folds):
    confFitting = {}
    
    #Fitするときに"y"として使う列の列名配列
    confFitting["target_cols"] = target.drop('sig_id', axis=1).columns.values.tolist()
    #Fitするときに"X"として使う列の列名配列
    #kfold, id等はここで削除。
    feature_cols = [c for c in folds.columns if c not in confFitting["target_cols"]]
    confFitting["feature_cols"] = [c for c in feature_cols if c not in ['kfold','sig_id']]
    #特徴量、ターゲットのサイズ
    confFitting["num_features"]=len(confFitting["feature_cols"])
    confFitting["num_targets"]=len(confFitting["target_cols"])
    
    return confFitting

## fitTransformer

In [19]:
def fitPreprocessingModel(param, train, test, valid, fold, seed):
    
    #LOG SCALER
    train_all_features = train[all_features].copy().values
    valid_all_features = valid[all_features].copy().values
    test_all_features = test[all_features].copy().values
    
    all_scaler = LogScaler()
    train_all_features = all_scaler.fit_transform(train_all_features)
    test_all_features = all_scaler.transform(test_all_features)
    valid_all_features = all_scaler.transform(valid_all_features)
    
    train[all_features] = train_all_features
    test[all_features] = test_all_features
    valid[all_features] = valid_all_features
    
    dump(all_scaler, open(f"{SAVELOGSCALE}/seed{seed}_fold{fold}_LogScaleTransformer.pkl", 'wb'), pickle.HIGHEST_PROTOCOL)
    
    #DeepInsight
    all_it = DeepInsightTransformer(feature_extractor='tsne_exact',
                                    pixels=resolution,
                                    perplexity=5,
                                    random_state=1120,
                                    n_jobs=-1)
    all_it.fit(train_all_features, plot=False)
    
    dump(all_it, open(f"{SAVEDEEPINSIGHT}/seed{seed}_fold{fold}_DeepInsightTransformer.pkl", 'wb'), pickle.HIGHEST_PROTOCOL)
    
    return all_scaler, all_it, train, test, valid

In [20]:
def PreprocessingLoadTransform(param, test, fold, seed):
    test_all_features = test[all_features].copy().values
    
    #LOG SCALER
    all_scaler = load(open(f"{SAVELOGSCALE}/seed{seed}_fold{fold}_LogScaleTransformer.pkl", 'rb'))
    test_all_features = all_scaler.transform(test_all_features)
    
    #DeepInsight
    all_it = load(open(f"{SAVEDEEPINSIGHT}/seed{seed}_fold{fold}_DeepInsightTransformer.pkl", 'rb'))
    
    return all_scaler, all_it, test

## Dataset Classes

In [21]:
class MoAImageSwapDataset(torch.utils.data.Dataset):
    def __init__(self,
                 features,
                 labels,
                 transformer,
                 swap_prob=0.15,
                 swap_portion=0.1):
        self.features = features
        self.labels = labels
        self.transformer = transformer
        self.swap_prob = swap_prob
        self.swap_portion = swap_portion

    def __getitem__(self, index):
        normalized = self.features[index, :]

        # Swap row features randomly
        normalized = self.add_swap_noise(index, normalized)

        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=1 to follow the setup in the paper
        image = self.transformer.transform(normalized, empty_value=1)[0]

        # Resize to target size
        gene_cht = cv2.resize(image, (image_size, image_size),
                              interpolation=cv2.INTER_CUBIC)

        # Convert to 3 channels
        image = np.repeat(gene_cht[np.newaxis, :, :], 3, axis=0)

        return {"x": image, "y": self.labels[index, :]}

    def add_swap_noise(self, index, X):
        if np.random.rand() < self.swap_prob:
            swap_index = np.random.randint(self.features.shape[0], size=1)[0]
            # Select only gene expression and cell viability features
            swap_features = np.random.choice(
                np.array(range(3, self.features.shape[1])),
                size=int(self.features.shape[1] * self.swap_portion),
                replace=False)
            X[swap_features] = self.features[swap_index, swap_features]

        return X

    def __len__(self):
        return self.features.shape[0]
    
class MoAImageDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transformer):
        self.features = features
        self.labels = labels
        self.transformer = transformer

    def __getitem__(self, index):
        normalized = self.features[index, :]
        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=1 to follow the setup in the paper
        image = self.transformer.transform(normalized, empty_value=1)[0]

        # Resize to target size
        gene_cht = cv2.resize(image, (image_size, image_size),
                              interpolation=cv2.INTER_CUBIC)

        # Convert to 3 channels
        image = np.repeat(gene_cht[np.newaxis, :, :], 3, axis=0)

        return {"x": image, "y": self.labels[index, :]}

    def __len__(self):
        return self.features.shape[0]


class TestDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transformer):
        self.features = features
        self.labels = labels
        self.transformer = transformer

    def __getitem__(self, index):
        normalized = self.features[index, :]
        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=1 to follow the setup in the paper
        image = self.transformer.transform(normalized, empty_value=1)[0]

        # Resize to target size
        gene_cht = cv2.resize(image, (image_size, image_size),
                              interpolation=cv2.INTER_CUBIC)

        # Convert to 3 channels
        image = np.repeat(gene_cht[np.newaxis, :, :], 3, axis=0)

        return {"x": image, "y": -1}

    def __len__(self):
        return self.features.shape[0]

## weight initialization

In [22]:
# Reference: https://github.com/rwightman/gen-efficientnet-pytorch/blob/master/geffnet/efficientnet_builder.py#L672
def initialize_weight_goog(m, n='', fix_group_fanout=True):
    # weight init as per Tensorflow Official impl
    # https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
    if isinstance(m, nn.Conv2d):
        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        if fix_group_fanout:
            fan_out //= m.groups
        m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
        if m.bias is not None:
            m.bias.data.zero_()
    elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1.0)
        m.bias.data.zero_()
    elif isinstance(m, nn.Linear):
        fan_out = m.weight.size(0)  # fan-out
        fan_in = 0
        if 'routing_fn' in n:
            fan_in = m.weight.size(1)
        init_range = 1.0 / math.sqrt(fan_in + fan_out)
        m.weight.data.uniform_(-init_range, init_range)
        m.bias.data.zero_()


def initialize_weight_default(m, n=''):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1.0)
        m.bias.data.zero_()
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight,
                                 mode='fan_in',
                                 nonlinearity='linear')

## Loss, Metric

In [23]:
#loss
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self):
        super(LabelSmoothingCrossEntropy, self).__init__()
    def forward(self, x, target, smoothing=0.001):
        confidence = 1. - smoothing
        logprobs = F.log_softmax(x, dim=-1)
        bcs_loss = nn.BCEWithLogitsLoss()(x, target)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = confidence * bcs_loss + smoothing * smooth_loss
        return loss.mean()

In [24]:
#metric
#nn.BCEWithLogitsLoss()

## Model architect

In [26]:
class MoAEfficientNet(pl.LightningModule):
    def __init__(
            self,
            pretrained_model_name,
            training_set=(None, None),  # tuple
            valid_set=(None, None),  # tuple
            test_set=None,
            transformer=None,
            num_classes=206,
            in_chans=3,
            drop_rate=0.,
            drop_connect_rate=0.,
            fc_size=512,
            learning_rate=1e-3,
            weight_init='goog'):
        super(MoAEfficientNet, self).__init__()

        self.train_data, self.train_labels = training_set
        self.valid_data, self.valid_labels = valid_set
        self.test_data = test_set
        self.transformer = transformer

        self.backbone = getattr(geffnet, pretrained_model)(
            pretrained=True,
            in_chans=in_chans,
            drop_rate=drop_rate,
            drop_connect_rate=drop_connect_rate,
            weight_init=weight_init)

        self.backbone.classifier = nn.Sequential(
            nn.Linear(self.backbone.classifier.in_features, fc_size,
                      bias=True), nn.ELU(),
            nn.Linear(fc_size, num_classes, bias=True))

        if self.training:
            for m in self.backbone.classifier.modules():
                initialize_weight_goog(m)

        # Save passed hyperparameters
        self.save_hyperparameters("pretrained_model_name", "num_classes",
                                  "in_chans", "drop_rate", "drop_connect_rate",
                                  "weight_init", "fc_size", "learning_rate")

    def forward(self, x):
        return self.backbone(x)

    def training_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)

        loss = F.binary_cross_entropy_with_logits(logits, y, reduction="mean")
        #loss = LabelSmoothingCrossEntropy()

        self.log('train_loss',
                 loss,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)

        val_loss = F.binary_cross_entropy_with_logits(logits,
                                                      y,
                                                      reduction="mean")

        self.log('val_loss',
                 val_loss,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True)

        return val_loss

    def test_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)
        return {"pred_logits": logits}

    def test_epoch_end(self, output_results):
        all_outputs = torch.cat([out["pred_logits"] for out in output_results],
                                dim=0)
        print("Logits:", all_outputs)
        pred_probs = F.sigmoid(all_outputs).detach().cpu().numpy()
        print("Predictions: ", pred_probs)
        return {"pred_probs": pred_probs}

    def setup(self, stage=None):
        #         self.train_dataset = MoAImageDataset(self.train_data,
        #                                              self.train_labels,
        #                                              self.transformer)
        self.train_dataset = MoAImageSwapDataset(self.train_data,
                                                 self.train_labels,
                                                 self.transformer,
                                                 swap_prob=swap_prob,
                                                 swap_portion=swap_portion)

        self.val_dataset = MoAImageDataset(self.valid_data, self.valid_labels,
                                           self.transformer)

        self.test_dataset = TestDataset(self.test_data, None, self.transformer)

    def train_dataloader(self):
        train_dataloader = DataLoader(self.train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers,
                                      pin_memory=True,
                                      drop_last=False)
        print(f"Train iterations: {len(train_dataloader)}")
        return train_dataloader

    def val_dataloader(self):
        val_dataloader = DataLoader(self.val_dataset,
                                    batch_size=infer_batch_size,
                                    shuffle=False,
                                    num_workers=num_workers,
                                    pin_memory=True,
                                    drop_last=False)
        print(f"Validate iterations: {len(val_dataloader)}")
        return val_dataloader

    def test_dataloader(self):
        test_dataloader = DataLoader(self.test_dataset,
                                     batch_size=infer_batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     pin_memory=True,
                                     drop_last=False)
        print(f"Test iterations: {len(test_dataloader)}")
        return test_dataloader

    def configure_optimizers(self):
        print(f"Initial Learning Rate: {self.hparams.learning_rate:.6f}")
        optimizer = optim.Adam(self.parameters(),
                               lr=self.hparams.learning_rate,
                               weight_decay=1e-5)

        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=20,
                                                         eta_min=0,
                                                         last_epoch=-1)

        return [optimizer], [scheduler]

# Run

## HyperParameter

In [27]:
# HyperParameters
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 25
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
NFOLDS = 5
EARLY_STOPPING_STEPS = 10
EARLY_STOP = True

## CV folds

In [28]:
def CV_folds(train, target):
    folds = train.copy()
    
    mskf = MultilabelStratifiedKFold(n_splits=NFOLDS)
    
    for f, (t_idx, v_idx) in enumerate(mskf.split(X=train, y=target)):
        folds.loc[v_idx, 'kfold'] = int(f)
    
    folds['kfold'] = folds['kfold'].astype(int)
    
    return folds

In [29]:
def CV_folds_drug_id(train, target):
    ###drug_idを考慮####
    
    targets = target.columns[1:]
    
    # foldsにdrug_id付与
    folds = train.copy()
    folds = folds.merge(drug, on='sig_id', how='left') 
    
    # LOCATE DRUGS
    vc = folds.drug_id.value_counts()
    vc1 = vc.loc[vc<=18].index.sort_values()
    vc2 = vc.loc[vc>18].index.sort_values()
    
    # STRATIFY DRUGS 18X OR LESS
    dct1 = {}; dct2 = {}
    skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, 
              random_state=42)
    tmp = folds.groupby('drug_id')[targets].mean().loc[vc1]
    for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
        dd = {k:fold for k in tmp.index[idxV].values}
        dct1.update(dd)
    
    # STRATIFY DRUGS MORE THAN 18X
    skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, 
              random_state=42)
    tmp = folds.loc[folds.drug_id.isin(vc2)].reset_index(drop=True)
    for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
        dd = {k:fold for k in tmp.sig_id[idxV].values}
        dct2.update(dd)
    
    # ASSIGN NFOLDS
    folds['kfold'] = folds.drug_id.map(dct1)
    folds.loc[folds.kfold.isna(),'kfold'] =\
        folds.loc[folds.kfold.isna(),'sig_id'].map(dct2)
    folds.kfold = folds.kfold.astype('int8')
    
    folds = folds.drop('drug_id', axis=1)
    
    return folds

In [30]:
%%time
#Preprocessing Data
trainVsl, testVsl = preprocessing(param_space, trainFeature, testFeature, trainTargetScored)
#CV folds
foldsVsl = CV_folds_drug_id(trainVsl, trainTargetScored)

foldsVsl.head(5)

CPU times: user 474 ms, sys: 137 ms, total: 611 ms
Wall time: 609 ms


Unnamed: 0,sig_id,cp_type,cp_time,cp_dose,g-0,g-1,g-2,g-3,g-4,g-5,...,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor,kfold
0,id_000644bb2,1,0.0,0,1.062,0.5577,-0.2479,-0.6208,-0.1944,-1.012,...,0,0,0,0,0,0,0,0,0,4
1,id_000779bfc,1,1.0,0,0.0743,0.4087,0.2991,0.0604,1.019,0.5207,...,0,0,0,0,0,0,0,0,0,2
2,id_000a6266a,1,0.5,0,0.628,0.5817,1.554,-0.0764,-0.0323,1.239,...,0,0,0,0,0,0,0,0,0,4
3,id_0015fd391,1,0.5,0,-0.5138,-0.2491,-0.2656,0.5288,4.062,-0.8095,...,0,0,0,0,0,0,0,0,0,0
4,id_001626bd3,1,1.0,1,-0.3254,-0.4009,0.97,0.6919,1.418,-0.8244,...,0,0,0,0,0,0,0,0,0,1


## Single Fold Running

In [31]:
def run_training(confFitting, Tester, fold, seed, param,
                 folds, train, test, target):
    
    seed_everything(seed)
    
    train = folds
    
    trn_idx = train[train['kfold'] != fold].index
    val_idx = train[train['kfold'] == fold].index
    
    train_df = train[train['kfold'] != fold].reset_index(drop=True)
    valid_df = train[train['kfold'] == fold].reset_index(drop=True)
    
    x_train, y_train  = train_df[confFitting["feature_cols"]], train_df[confFitting["target_cols"]].values
    x_valid, y_valid =  valid_df[confFitting["feature_cols"]], valid_df[confFitting["target_cols"]].values
    x_test = test[confFitting["feature_cols"]]
    
    #データセットをイメージ化するトランスフォーマー。
    #ここでLogScaleも実施。
    all_scaler, all_it, x_train, x_test, x_valid = fitPreprocessingModel(param_space, x_train, x_test, x_valid, fold, seed)
    x_train = x_train.values
    x_valid = x_valid.values
    x_test = x_test.values
    
    #model class 定義
    model = MoAEfficientNet(
        pretrained_model_name=pretrained_model,
        training_set=(x_train, y_train),  # tuple
        valid_set=(x_valid, y_valid),  # tuple
        test_set=np.concatenate([x_valid, x_test], 0), #予測用のデータセット
        transformer=all_it,
        drop_rate=drop_rate,
        drop_connect_rate=drop_connect_rate,
        fc_size=fc_size,
        weight_init='goog')
    
    #データセット定義(データセットをイメージ化)
    model.setup()
    
    #学習クラス
    trainer = Trainer(
        default_root_dir=SAVEMODEL,
        max_epochs=EPOCHS,
        logger=False,
        gpus=gpus,
        distributed_backend="dp",  # multiple-gpus, 1 machine
        precision=16)
    
    #学習
    trainer.fit(model)
    
    #save model
    trainer.save_checkpoint(f"{SAVEMODEL}/model{model_type}_SEED{seed}_FOLD{fold}.ckpt")
    
    #予測
    output = trainer.test(model, verbose=False)[0]
    allPred = output["pred_probs"]
    
    #out of fold
    oof = np.zeros((len(train), target.iloc[:, 1:].shape[1]))
    oof[val_idx] = allPred[0:x_valid.shape[0]]
    
    #Predictions
    predictions = allPred[x_valid.shape[0]:]
    
    
    return oof, predictions


## K-Fold Running

In [32]:
def run_k_fold(Tester, NFOLDS, seed, param,
               folds, train, test, target, confFitting):
    oof = np.zeros((len(train), confFitting["num_targets"]))
    predictions = np.zeros((len(test), confFitting["num_targets"]))
    
    for fold in range(NFOLDS):
        if Tester:
            print('=' * 20, 'Fold', fold, '=' * 20)
        oof_, pred_ = run_training(confFitting, Tester, fold, seed, param,
                                   folds, train, test, target)
        
        predictions += pred_ / NFOLDS
        oof += oof_
        
    return oof, predictions

## CV Evaluation

In [33]:
 def CV_Evaluation(confFitting, oof, train, target):
    #CV score : OOFの評価結果。
    #OOF(学習モデルによるtrain dataの予測)
    train[confFitting["target_cols"]] = oof
    #target(予測結果)：ここで処理「cp_type = ctl_vehicleのレコードを削除」で抜けたところに0を入れている。
    valid_results = trainTargetScored.drop(columns=confFitting["target_cols"]).merge(train[['sig_id']+confFitting["target_cols"]], on='sig_id', how='left').fillna(0)
    
    y_true = trainTargetScored[confFitting["target_cols"]].values
    y_pred = valid_results[confFitting["target_cols"]].values
    
    score = 0
    for i in range(confFitting["num_targets"]):
        score_ = log_loss(y_true[:, i], y_pred[:, i]) #問題の評価指標によって変わる。
        score += score_ / target.shape[1]
        
    print("CV log_loss: ", score)
    
    #OOF save
    np.save(SAVEOOF + 'oof', y_pred)
    
    return score

## Postprocessing

In [34]:
# 特になし

## Submit

In [35]:
def Submit(confFitting, predictions, test):
    test[confFitting["target_cols"]] = predictions
    sub = sample_submission.drop(columns=confFitting["target_cols"]).merge(test[['sig_id']+confFitting["target_cols"]], on='sig_id', how='left').fillna(0)
    sub.to_csv(f'{SUBMIT}submission.csv', index=False)

    print("sub.shape" + str(sub.shape))
    
    return

# Execute

In [36]:
def Exec(param):
    
    #Tester(True/False)
    Tester = True
    
    #Preprocessing Data
    train, test = preprocessing(param, trainFeature, testFeature, trainTargetScored)
    
    #CV folds
    folds = CV_folds_drug_id(train, trainTargetScored)
    
    #Config about Fitting
    confFitting = Config_about_Fitting(train, test, trainTargetScored, folds)
    
    # Averaging on multiple SEEDS
    SEED = [42]
    oof = np.zeros((len(train), confFitting["num_targets"]))
    predictions = np.zeros((len(test), confFitting["num_targets"]))
    
    ### RUN ###
    for seed in SEED:
        if Tester:
            print('~' * 20, 'SEED', seed, '~' * 20)
        oof_, predictions_ = run_k_fold(Tester, NFOLDS, seed, param,
                                       folds, train, test, trainTargetScored, confFitting)
        oof += oof_ / len(SEED)
        predictions += predictions_ / len(SEED)
    
    #CV 評価
    score = CV_Evaluation(confFitting, oof, train, trainTargetScored)
    
    # 課題提出
    Submit(confFitting, predictions, test)
    
    return score


In [36]:
%%time
#b3
score= Exec(param_space)
print("score: " + str(score))

~~~~~~~~~~~~~~~~~~~~ SEED 42 ~~~~~~~~~~~~~~~~~~~~


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 11 M  


Initial Learning Rate: 0.001000
Validate iterations: 76
Train iterations: 594                                                 
Epoch 0:  89%|████████▊ | 594/670 [03:37<00:27,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  89%|████████▉ | 595/670 [03:37<00:27,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  89%|████████▉ | 596/670 [03:37<00:27,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  89%|████████▉ | 597/670 [03:37<00:26,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  89%|████████▉ | 598/670 [03:38<00:26,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  89%|████████▉ | 599/670 [03:38<00:25,  2.75it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  90%|████████▉ | 600/670 [03:38<00:25,  2.75it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  90%|████████▉ | 6

Epoch 0:  99%|█████████▉| 663/670 [03:49<00:02,  2.89it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  99%|█████████▉| 664/670 [03:49<00:02,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  99%|█████████▉| 665/670 [03:49<00:01,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0:  99%|█████████▉| 666/670 [03:49<00:01,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0: 100%|█████████▉| 667/670 [03:49<00:01,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0: 100%|█████████▉| 668/670 [03:49<00:00,  2.91it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0: 100%|█████████▉| 669/670 [03:50<00:00,  2.91it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0192]
Epoch 0: 100%|██████████| 670/670 [03:50<00:00,  2.91it/s, loss=0.020, val_loss_step=0.0209, train_loss_step=0.0192, val_loss_epoch=0.0206]
Epoch 1:  89%|████████▊ | 594/670 [03:37<00:27, 

Epoch 1:  95%|█████████▌| 637/670 [03:44<00:11,  2.84it/s, loss=0.019, val_loss_step=0.0209, train_loss_step=0.0172, val_loss_epoch=0.0206, train_loss_epoch=0.0263]
Epoch 1:  95%|█████████▌| 638/670 [03:44<00:11,  2.84it/s, loss=0.019, val_loss_step=0.0209, train_loss_step=0.0172, val_loss_epoch=0.0206, train_loss_epoch=0.0263]
Epoch 1:  95%|█████████▌| 639/670 [03:45<00:10,  2.84it/s, loss=0.019, val_loss_step=0.0209, train_loss_step=0.0172, val_loss_epoch=0.0206, train_loss_epoch=0.0263]
Epoch 1:  96%|█████████▌| 640/670 [03:45<00:10,  2.84it/s, loss=0.019, val_loss_step=0.0209, train_loss_step=0.0172, val_loss_epoch=0.0206, train_loss_epoch=0.0263]
Epoch 1:  96%|█████████▌| 641/670 [03:45<00:10,  2.84it/s, loss=0.019, val_loss_step=0.0209, train_loss_step=0.0172, val_loss_epoch=0.0206, train_loss_epoch=0.0263]
Epoch 1:  96%|█████████▌| 642/670 [03:45<00:09,  2.85it/s, loss=0.019, val_loss_step=0.0209, train_loss_step=0.0172, val_loss_epoch=0.0206, train_loss_epoch=0.0263]
Epoch 1:  

Epoch 2:  91%|█████████ | 609/670 [03:39<00:22,  2.77it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  91%|█████████ | 610/670 [03:40<00:21,  2.77it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  91%|█████████ | 611/670 [03:40<00:21,  2.77it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  91%|█████████▏| 612/670 [03:40<00:20,  2.78it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  91%|█████████▏| 613/670 [03:40<00:20,  2.78it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  92%|█████████▏| 614/670 [03:40<00:20,  2.78it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  

Epoch 2:  98%|█████████▊| 658/670 [03:48<00:04,  2.88it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  98%|█████████▊| 659/670 [03:48<00:03,  2.89it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  99%|█████████▊| 660/670 [03:48<00:03,  2.89it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  99%|█████████▊| 661/670 [03:48<00:03,  2.89it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  99%|█████████▉| 662/670 [03:48<00:02,  2.89it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  99%|█████████▉| 663/670 [03:49<00:02,  2.89it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0184, val_loss_epoch=0.0205, train_loss_epoch=0.0194]
Epoch 2:  

Epoch 3:  94%|█████████▍| 630/670 [03:43<00:14,  2.82it/s, loss=0.019, val_loss_step=0.025, train_loss_step=0.0193, val_loss_epoch=0.0228, train_loss_epoch=0.0189]
Epoch 3:  94%|█████████▍| 631/670 [03:43<00:13,  2.82it/s, loss=0.019, val_loss_step=0.025, train_loss_step=0.0193, val_loss_epoch=0.0228, train_loss_epoch=0.0189]
Epoch 3:  94%|█████████▍| 632/670 [03:43<00:13,  2.82it/s, loss=0.019, val_loss_step=0.025, train_loss_step=0.0193, val_loss_epoch=0.0228, train_loss_epoch=0.0189]
Epoch 3:  94%|█████████▍| 633/670 [03:44<00:13,  2.82it/s, loss=0.019, val_loss_step=0.025, train_loss_step=0.0193, val_loss_epoch=0.0228, train_loss_epoch=0.0189]
Epoch 3:  95%|█████████▍| 634/670 [03:44<00:12,  2.83it/s, loss=0.019, val_loss_step=0.025, train_loss_step=0.0193, val_loss_epoch=0.0228, train_loss_epoch=0.0189]
Epoch 3:  95%|█████████▍| 635/670 [03:44<00:12,  2.83it/s, loss=0.019, val_loss_step=0.025, train_loss_step=0.0193, val_loss_epoch=0.0228, train_loss_epoch=0.0189]
Epoch 3:  95%|██

Epoch 4:  90%|████████▉ | 602/670 [03:38<00:24,  2.76it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  90%|█████████ | 603/670 [03:38<00:24,  2.76it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  90%|█████████ | 604/670 [03:38<00:23,  2.76it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  90%|█████████ | 605/670 [03:38<00:23,  2.76it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  90%|█████████ | 606/670 [03:39<00:23,  2.77it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  91%|█████████ | 607/670 [03:39<00:22,  2.77it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  

Epoch 4:  97%|█████████▋| 651/670 [03:46<00:06,  2.87it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  97%|█████████▋| 652/670 [03:46<00:06,  2.87it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  97%|█████████▋| 653/670 [03:47<00:05,  2.88it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  98%|█████████▊| 654/670 [03:47<00:05,  2.88it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  98%|█████████▊| 655/670 [03:47<00:05,  2.88it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  98%|█████████▊| 656/670 [03:47<00:04,  2.88it/s, loss=0.018, val_loss_step=0.0218, train_loss_step=0.0235, val_loss_epoch=0.0202, train_loss_epoch=0.0186]
Epoch 4:  

Epoch 5:  93%|█████████▎| 623/670 [03:41<00:16,  2.81it/s, loss=0.019, val_loss_step=0.0222, train_loss_step=0.0203, val_loss_epoch=0.0214, train_loss_epoch=0.0183]
Epoch 5:  93%|█████████▎| 624/670 [03:42<00:16,  2.81it/s, loss=0.019, val_loss_step=0.0222, train_loss_step=0.0203, val_loss_epoch=0.0214, train_loss_epoch=0.0183]
Epoch 5:  93%|█████████▎| 625/670 [03:42<00:16,  2.81it/s, loss=0.019, val_loss_step=0.0222, train_loss_step=0.0203, val_loss_epoch=0.0214, train_loss_epoch=0.0183]
Epoch 5:  93%|█████████▎| 626/670 [03:42<00:15,  2.81it/s, loss=0.019, val_loss_step=0.0222, train_loss_step=0.0203, val_loss_epoch=0.0214, train_loss_epoch=0.0183]
Epoch 5:  94%|█████████▎| 627/670 [03:42<00:15,  2.82it/s, loss=0.019, val_loss_step=0.0222, train_loss_step=0.0203, val_loss_epoch=0.0214, train_loss_epoch=0.0183]
Epoch 5:  94%|█████████▎| 628/670 [03:42<00:14,  2.82it/s, loss=0.019, val_loss_step=0.0222, train_loss_step=0.0203, val_loss_epoch=0.0214, train_loss_epoch=0.0183]
Epoch 5:  

Epoch 6:  89%|████████▉ | 595/670 [03:37<00:27,  2.74it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  89%|████████▉ | 596/670 [03:37<00:26,  2.74it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  89%|████████▉ | 597/670 [03:37<00:26,  2.74it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  89%|████████▉ | 598/670 [03:37<00:26,  2.75it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  89%|████████▉ | 599/670 [03:37<00:25,  2.75it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  90%|████████▉ | 600/670 [03:38<00:25,  2.75it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  

Epoch 6:  96%|█████████▌| 644/670 [03:45<00:09,  2.86it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  96%|█████████▋| 645/670 [03:45<00:08,  2.86it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  96%|█████████▋| 646/670 [03:45<00:08,  2.86it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  97%|█████████▋| 647/670 [03:45<00:08,  2.86it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  97%|█████████▋| 648/670 [03:46<00:07,  2.87it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  97%|█████████▋| 649/670 [03:46<00:07,  2.87it/s, loss=0.017, val_loss_step=0.0878, train_loss_step=0.0175, val_loss_epoch=0.0784, train_loss_epoch=0.0181]
Epoch 6:  

Epoch 7:  92%|█████████▏| 616/670 [03:40<00:19,  2.80it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7:  92%|█████████▏| 617/670 [03:40<00:18,  2.80it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7:  92%|█████████▏| 618/670 [03:40<00:18,  2.80it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7:  92%|█████████▏| 619/670 [03:40<00:18,  2.80it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7:  93%|█████████▎| 620/670 [03:41<00:17,  2.81it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7:  93%|█████████▎| 621/670 [03:41<00:17,  2.81it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7:  

Epoch 7:  99%|█████████▉| 665/670 [03:48<00:01,  2.91it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7:  99%|█████████▉| 666/670 [03:48<00:01,  2.91it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7: 100%|█████████▉| 667/670 [03:48<00:01,  2.91it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7: 100%|█████████▉| 668/670 [03:49<00:00,  2.92it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7: 100%|█████████▉| 669/670 [03:49<00:00,  2.92it/s, loss=0.018, val_loss_step=0.0195, train_loss_step=0.0161, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 7: 100%|██████████| 670/670 [03:49<00:00,  2.92it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0161, val_loss_epoch=0.0205, train_loss_epoch=0.0177]
Epoch 8:  

Epoch 8:  95%|█████████▌| 637/670 [03:43<00:11,  2.85it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0205, val_loss_epoch=0.0205, train_loss_epoch=0.0174]
Epoch 8:  95%|█████████▌| 638/670 [03:43<00:11,  2.85it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0205, val_loss_epoch=0.0205, train_loss_epoch=0.0174]
Epoch 8:  95%|█████████▌| 639/670 [03:44<00:10,  2.85it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0205, val_loss_epoch=0.0205, train_loss_epoch=0.0174]
Epoch 8:  96%|█████████▌| 640/670 [03:44<00:10,  2.85it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0205, val_loss_epoch=0.0205, train_loss_epoch=0.0174]
Epoch 8:  96%|█████████▌| 641/670 [03:44<00:10,  2.86it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0205, val_loss_epoch=0.0205, train_loss_epoch=0.0174]
Epoch 8:  96%|█████████▌| 642/670 [03:44<00:09,  2.86it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0205, val_loss_epoch=0.0205, train_loss_epoch=0.0174]
Epoch 8:  

Epoch 9:  91%|█████████ | 609/670 [03:39<00:21,  2.78it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  91%|█████████ | 610/670 [03:39<00:21,  2.78it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  91%|█████████ | 611/670 [03:39<00:21,  2.79it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  91%|█████████▏| 612/670 [03:39<00:20,  2.79it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  91%|█████████▏| 613/670 [03:39<00:20,  2.79it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  92%|█████████▏| 614/670 [03:39<00:20,  2.79it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  

Epoch 9:  98%|█████████▊| 658/670 [03:47<00:04,  2.89it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  98%|█████████▊| 659/670 [03:47<00:03,  2.90it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  99%|█████████▊| 660/670 [03:47<00:03,  2.90it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  99%|█████████▊| 661/670 [03:47<00:03,  2.90it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  99%|█████████▉| 662/670 [03:47<00:02,  2.90it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  99%|█████████▉| 663/670 [03:48<00:02,  2.91it/s, loss=0.017, val_loss_step=0.0254, train_loss_step=0.0164, val_loss_epoch=0.0245, train_loss_epoch=0.0172]
Epoch 9:  

Epoch 10:  94%|█████████▍| 630/670 [03:42<00:14,  2.83it/s, loss=0.017, val_loss_step=0.149, train_loss_step=0.0135, val_loss_epoch=0.146, train_loss_epoch=0.017]
Epoch 10:  94%|█████████▍| 631/670 [03:42<00:13,  2.84it/s, loss=0.017, val_loss_step=0.149, train_loss_step=0.0135, val_loss_epoch=0.146, train_loss_epoch=0.017]
Epoch 10:  94%|█████████▍| 632/670 [03:42<00:13,  2.84it/s, loss=0.017, val_loss_step=0.149, train_loss_step=0.0135, val_loss_epoch=0.146, train_loss_epoch=0.017]
Epoch 10:  94%|█████████▍| 633/670 [03:42<00:13,  2.84it/s, loss=0.017, val_loss_step=0.149, train_loss_step=0.0135, val_loss_epoch=0.146, train_loss_epoch=0.017]
Epoch 10:  95%|█████████▍| 634/670 [03:42<00:12,  2.84it/s, loss=0.017, val_loss_step=0.149, train_loss_step=0.0135, val_loss_epoch=0.146, train_loss_epoch=0.017]
Epoch 10:  95%|█████████▍| 635/670 [03:43<00:12,  2.85it/s, loss=0.017, val_loss_step=0.149, train_loss_step=0.0135, val_loss_epoch=0.146, train_loss_epoch=0.017]
Epoch 10:  95%|███████

Epoch 11:  90%|████████▉ | 602/670 [03:37<00:24,  2.77it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  90%|█████████ | 603/670 [03:37<00:24,  2.77it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  90%|█████████ | 604/670 [03:37<00:23,  2.77it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  90%|█████████ | 605/670 [03:38<00:23,  2.77it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  90%|█████████ | 606/670 [03:38<00:23,  2.78it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  91%|█████████ | 607/670 [03:38<00:22,  2.78it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoc

Epoch 11:  97%|█████████▋| 651/670 [03:45<00:06,  2.88it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  97%|█████████▋| 652/670 [03:45<00:06,  2.89it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  97%|█████████▋| 653/670 [03:46<00:05,  2.89it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  98%|█████████▊| 654/670 [03:46<00:05,  2.89it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  98%|█████████▊| 655/670 [03:46<00:05,  2.89it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoch 11:  98%|█████████▊| 656/670 [03:46<00:04,  2.89it/s, loss=0.017, val_loss_step=0.0256, train_loss_step=0.0183, val_loss_epoch=0.0279, train_loss_epoch=0.0169]
Epoc

Epoch 12:  93%|█████████▎| 623/670 [03:41<00:16,  2.82it/s, loss=0.017, val_loss_step=0.161, train_loss_step=0.0159, val_loss_epoch=0.149, train_loss_epoch=0.0166]
Epoch 12:  93%|█████████▎| 624/670 [03:41<00:16,  2.82it/s, loss=0.017, val_loss_step=0.161, train_loss_step=0.0159, val_loss_epoch=0.149, train_loss_epoch=0.0166]
Epoch 12:  93%|█████████▎| 625/670 [03:41<00:15,  2.82it/s, loss=0.017, val_loss_step=0.161, train_loss_step=0.0159, val_loss_epoch=0.149, train_loss_epoch=0.0166]
Epoch 12:  93%|█████████▎| 626/670 [03:41<00:15,  2.83it/s, loss=0.017, val_loss_step=0.161, train_loss_step=0.0159, val_loss_epoch=0.149, train_loss_epoch=0.0166]
Epoch 12:  94%|█████████▎| 627/670 [03:41<00:15,  2.83it/s, loss=0.017, val_loss_step=0.161, train_loss_step=0.0159, val_loss_epoch=0.149, train_loss_epoch=0.0166]
Epoch 12:  94%|█████████▎| 628/670 [03:41<00:14,  2.83it/s, loss=0.017, val_loss_step=0.161, train_loss_step=0.0159, val_loss_epoch=0.149, train_loss_epoch=0.0166]
Epoch 12:  94%|█

Epoch 13:  89%|████████▉ | 595/670 [03:36<00:27,  2.75it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  89%|████████▉ | 596/670 [03:36<00:26,  2.76it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  89%|████████▉ | 597/670 [03:36<00:26,  2.76it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  89%|████████▉ | 598/670 [03:36<00:26,  2.76it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  89%|████████▉ | 599/670 [03:36<00:25,  2.76it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  90%|████████▉ | 600/670 [03:36<00:25,  2.77it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  90%|███████

Epoch 13:  96%|█████████▋| 645/670 [03:44<00:08,  2.87it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  96%|█████████▋| 646/670 [03:44<00:08,  2.87it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  97%|█████████▋| 647/670 [03:44<00:07,  2.88it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  97%|█████████▋| 648/670 [03:45<00:07,  2.88it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  97%|█████████▋| 649/670 [03:45<00:07,  2.88it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  97%|█████████▋| 650/670 [03:45<00:06,  2.88it/s, loss=0.016, val_loss_step=0.095, train_loss_step=0.014, val_loss_epoch=0.105, train_loss_epoch=0.0165]
Epoch 13:  97%|███████

Epoch 14:  92%|█████████▏| 617/670 [03:39<00:18,  2.81it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14:  92%|█████████▏| 618/670 [03:40<00:18,  2.81it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14:  92%|█████████▏| 619/670 [03:40<00:18,  2.81it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14:  93%|█████████▎| 620/670 [03:40<00:17,  2.81it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14:  93%|█████████▎| 621/670 [03:40<00:17,  2.82it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14:  93%|█████████▎| 622/670 [03:40<00:17,  2.82it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14:  93%|█

Epoch 14:  99%|█████████▉| 666/670 [03:48<00:01,  2.92it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14: 100%|█████████▉| 667/670 [03:48<00:01,  2.92it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14: 100%|█████████▉| 668/670 [03:48<00:00,  2.92it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14: 100%|█████████▉| 669/670 [03:48<00:00,  2.93it/s, loss=0.016, val_loss_step=0.0237, train_loss_step=0.014, val_loss_epoch=0.025, train_loss_epoch=0.0164]
Epoch 14: 100%|██████████| 670/670 [03:48<00:00,  2.93it/s, loss=0.016, val_loss_step=0.0552, train_loss_step=0.014, val_loss_epoch=0.0565, train_loss_epoch=0.0164]
Epoch 15:  89%|████████▊ | 594/670 [03:36<00:27,  2.75it/s, loss=0.017, val_loss_step=0.0552, train_loss_step=0.0173, val_loss_epoch=0.0565, train_loss_epoch=0.0163] 
Validating: 

Epoch 15:  95%|█████████▌| 638/670 [03:43<00:11,  2.85it/s, loss=0.017, val_loss_step=0.0552, train_loss_step=0.0173, val_loss_epoch=0.0565, train_loss_epoch=0.0163]
Epoch 15:  95%|█████████▌| 639/670 [03:43<00:10,  2.85it/s, loss=0.017, val_loss_step=0.0552, train_loss_step=0.0173, val_loss_epoch=0.0565, train_loss_epoch=0.0163]
Epoch 15:  96%|█████████▌| 640/670 [03:43<00:10,  2.86it/s, loss=0.017, val_loss_step=0.0552, train_loss_step=0.0173, val_loss_epoch=0.0565, train_loss_epoch=0.0163]
Epoch 15:  96%|█████████▌| 641/670 [03:44<00:10,  2.86it/s, loss=0.017, val_loss_step=0.0552, train_loss_step=0.0173, val_loss_epoch=0.0565, train_loss_epoch=0.0163]
Epoch 15:  96%|█████████▌| 642/670 [03:44<00:09,  2.86it/s, loss=0.017, val_loss_step=0.0552, train_loss_step=0.0173, val_loss_epoch=0.0565, train_loss_epoch=0.0163]
Epoch 15:  96%|█████████▌| 643/670 [03:44<00:09,  2.86it/s, loss=0.017, val_loss_step=0.0552, train_loss_step=0.0173, val_loss_epoch=0.0565, train_loss_epoch=0.0163]
Epoc

Epoch 16:  91%|█████████ | 610/670 [03:38<00:21,  2.79it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  91%|█████████ | 611/670 [03:38<00:21,  2.79it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  91%|█████████▏| 612/670 [03:38<00:20,  2.80it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  91%|█████████▏| 613/670 [03:39<00:20,  2.80it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  92%|█████████▏| 614/670 [03:39<00:20,  2.80it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  92%|█████████▏| 615/670 [03:39<00:19,  2.80it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoc

Epoch 16:  98%|█████████▊| 659/670 [03:46<00:03,  2.91it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  99%|█████████▊| 660/670 [03:46<00:03,  2.91it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  99%|█████████▊| 661/670 [03:47<00:03,  2.91it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  99%|█████████▉| 662/670 [03:47<00:02,  2.91it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  99%|█████████▉| 663/670 [03:47<00:02,  2.91it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoch 16:  99%|█████████▉| 664/670 [03:47<00:02,  2.92it/s, loss=0.015, val_loss_step=0.0216, train_loss_step=0.0214, val_loss_epoch=0.0221, train_loss_epoch=0.0161]
Epoc

Epoch 17:  94%|█████████▍| 631/670 [03:42<00:13,  2.84it/s, loss=0.016, val_loss_step=0.0183, train_loss_step=0.0181, val_loss_epoch=0.0185, train_loss_epoch=0.016]
Epoch 17:  94%|█████████▍| 632/670 [03:42<00:13,  2.84it/s, loss=0.016, val_loss_step=0.0183, train_loss_step=0.0181, val_loss_epoch=0.0185, train_loss_epoch=0.016]
Epoch 17:  94%|█████████▍| 633/670 [03:42<00:13,  2.85it/s, loss=0.016, val_loss_step=0.0183, train_loss_step=0.0181, val_loss_epoch=0.0185, train_loss_epoch=0.016]
Epoch 17:  95%|█████████▍| 634/670 [03:42<00:12,  2.85it/s, loss=0.016, val_loss_step=0.0183, train_loss_step=0.0181, val_loss_epoch=0.0185, train_loss_epoch=0.016]
Epoch 17:  95%|█████████▍| 635/670 [03:42<00:12,  2.85it/s, loss=0.016, val_loss_step=0.0183, train_loss_step=0.0181, val_loss_epoch=0.0185, train_loss_epoch=0.016]
Epoch 17:  95%|█████████▍| 636/670 [03:42<00:11,  2.85it/s, loss=0.016, val_loss_step=0.0183, train_loss_step=0.0181, val_loss_epoch=0.0185, train_loss_epoch=0.016]
Epoch 17: 

Epoch 18:  90%|█████████ | 603/670 [03:37<00:24,  2.77it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  90%|█████████ | 604/670 [03:37<00:23,  2.77it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  90%|█████████ | 605/670 [03:37<00:23,  2.78it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  90%|█████████ | 606/670 [03:38<00:23,  2.78it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  91%|█████████ | 607/670 [03:38<00:22,  2.78it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  91%|█████████ | 608/670 [03:38<00:22,  2.78it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18: 

Epoch 18:  97%|█████████▋| 652/670 [03:45<00:06,  2.89it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  97%|█████████▋| 653/670 [03:45<00:05,  2.89it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  98%|█████████▊| 654/670 [03:46<00:05,  2.89it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  98%|█████████▊| 655/670 [03:46<00:05,  2.90it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  98%|█████████▊| 656/670 [03:46<00:04,  2.90it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18:  98%|█████████▊| 657/670 [03:46<00:04,  2.90it/s, loss=0.016, val_loss_step=0.0193, train_loss_step=0.0171, val_loss_epoch=0.0188, train_loss_epoch=0.016]
Epoch 18: 

Epoch 19:  93%|█████████▎| 624/670 [03:40<00:16,  2.83it/s, loss=0.015, val_loss_step=0.0174, train_loss_step=0.014, val_loss_epoch=0.0176, train_loss_epoch=0.0159]
Epoch 19:  93%|█████████▎| 625/670 [03:40<00:15,  2.83it/s, loss=0.015, val_loss_step=0.0174, train_loss_step=0.014, val_loss_epoch=0.0176, train_loss_epoch=0.0159]
Epoch 19:  93%|█████████▎| 626/670 [03:41<00:15,  2.83it/s, loss=0.015, val_loss_step=0.0174, train_loss_step=0.014, val_loss_epoch=0.0176, train_loss_epoch=0.0159]
Epoch 19:  94%|█████████▎| 627/670 [03:41<00:15,  2.83it/s, loss=0.015, val_loss_step=0.0174, train_loss_step=0.014, val_loss_epoch=0.0176, train_loss_epoch=0.0159]
Epoch 19:  94%|█████████▎| 628/670 [03:41<00:14,  2.84it/s, loss=0.015, val_loss_step=0.0174, train_loss_step=0.014, val_loss_epoch=0.0176, train_loss_epoch=0.0159]
Epoch 19:  94%|█████████▍| 629/670 [03:41<00:14,  2.84it/s, loss=0.015, val_loss_step=0.0174, train_loss_step=0.014, val_loss_epoch=0.0176, train_loss_epoch=0.0159]
Epoch 19: 

Epoch 20:  89%|████████▉ | 596/670 [03:36<00:26,  2.76it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  89%|████████▉ | 597/670 [03:36<00:26,  2.76it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  89%|████████▉ | 598/670 [03:36<00:26,  2.76it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  89%|████████▉ | 599/670 [03:36<00:25,  2.76it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  90%|████████▉ | 600/670 [03:36<00:25,  2.77it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  90%|████████▉ | 601/670 [03:37<00:24,  2.77it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoc

Epoch 20:  96%|█████████▋| 645/670 [03:44<00:08,  2.87it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  96%|█████████▋| 646/670 [03:44<00:08,  2.88it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  97%|█████████▋| 647/670 [03:44<00:07,  2.88it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  97%|█████████▋| 648/670 [03:44<00:07,  2.88it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  97%|█████████▋| 649/670 [03:45<00:07,  2.88it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 20:  97%|█████████▋| 650/670 [03:45<00:06,  2.89it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0102, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoc

Epoch 21:  92%|█████████▏| 617/670 [03:39<00:18,  2.81it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21:  92%|█████████▏| 618/670 [03:39<00:18,  2.81it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21:  92%|█████████▏| 619/670 [03:40<00:18,  2.81it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21:  93%|█████████▎| 620/670 [03:40<00:17,  2.82it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21:  93%|█████████▎| 621/670 [03:40<00:17,  2.82it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21:  93%|█████████▎| 622/670 [03:40<00:17,  2.82it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoc

Epoch 21:  99%|█████████▉| 666/670 [03:47<00:01,  2.92it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21: 100%|█████████▉| 667/670 [03:48<00:01,  2.92it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21: 100%|█████████▉| 668/670 [03:48<00:00,  2.93it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21: 100%|█████████▉| 669/670 [03:48<00:00,  2.93it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 21: 100%|██████████| 670/670 [03:48<00:00,  2.93it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0164, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 22:  89%|████████▊ | 594/670 [03:35<00:27,  2.76it/s, loss=0.017, val_loss_step=0.0169, train_loss_step=0.0204, val_loss_epoch=0.0169, train_loss_epoch=0.0158] 
Val

Epoch 22:  95%|█████████▌| 638/670 [03:43<00:11,  2.86it/s, loss=0.017, val_loss_step=0.0169, train_loss_step=0.0204, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 22:  95%|█████████▌| 639/670 [03:43<00:10,  2.86it/s, loss=0.017, val_loss_step=0.0169, train_loss_step=0.0204, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 22:  96%|█████████▌| 640/670 [03:43<00:10,  2.86it/s, loss=0.017, val_loss_step=0.0169, train_loss_step=0.0204, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 22:  96%|█████████▌| 641/670 [03:43<00:10,  2.87it/s, loss=0.017, val_loss_step=0.0169, train_loss_step=0.0204, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 22:  96%|█████████▌| 642/670 [03:43<00:09,  2.87it/s, loss=0.017, val_loss_step=0.0169, train_loss_step=0.0204, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 22:  96%|█████████▌| 643/670 [03:43<00:09,  2.87it/s, loss=0.017, val_loss_step=0.0169, train_loss_step=0.0204, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoc

Epoch 23:  91%|█████████ | 610/670 [03:38<00:21,  2.80it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████ | 611/670 [03:38<00:21,  2.80it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████▏| 612/670 [03:38<00:20,  2.80it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████▏| 613/670 [03:38<00:20,  2.80it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  92%|█████████▏| 614/670 [03:38<00:19,  2.81it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  92%|█████████▏| 615/670 [03:39<00:19,  2.81it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoc

Epoch 23:  98%|█████████▊| 659/670 [03:46<00:03,  2.91it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▊| 660/670 [03:46<00:03,  2.91it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▊| 661/670 [03:46<00:03,  2.92it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▉| 662/670 [03:46<00:02,  2.92it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▉| 663/670 [03:47<00:02,  2.92it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▉| 664/670 [03:47<00:02,  2.92it/s, loss=0.016, val_loss_step=0.0172, train_loss_step=0.0148, val_loss_epoch=0.0173, train_loss_epoch=0.0159]
Epoc

Epoch 24:  94%|█████████▍| 631/670 [03:42<00:13,  2.84it/s, loss=0.016, val_loss_step=0.0173, train_loss_step=0.0162, val_loss_epoch=0.0177, train_loss_epoch=0.0159]
Epoch 24:  94%|█████████▍| 632/670 [03:42<00:13,  2.84it/s, loss=0.016, val_loss_step=0.0173, train_loss_step=0.0162, val_loss_epoch=0.0177, train_loss_epoch=0.0159]
Epoch 24:  94%|█████████▍| 633/670 [03:42<00:13,  2.84it/s, loss=0.016, val_loss_step=0.0173, train_loss_step=0.0162, val_loss_epoch=0.0177, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 634/670 [03:42<00:12,  2.85it/s, loss=0.016, val_loss_step=0.0173, train_loss_step=0.0162, val_loss_epoch=0.0177, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 635/670 [03:42<00:12,  2.85it/s, loss=0.016, val_loss_step=0.0173, train_loss_step=0.0162, val_loss_epoch=0.0177, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 636/670 [03:43<00:11,  2.85it/s, loss=0.016, val_loss_step=0.0173, train_loss_step=0.0162, val_loss_epoch=0.0177, train_loss_epoch=0.0159]
Epoc

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 11 M  


Initial Learning Rate: 0.001000
Validate iterations: 75
Train iterations: 596                                                 
Epoch 0:  89%|████████▉ | 596/671 [03:37<00:27,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  89%|████████▉ | 597/671 [03:38<00:27,  2.73it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  89%|████████▉ | 598/671 [03:38<00:26,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  89%|████████▉ | 599/671 [03:38<00:26,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  89%|████████▉ | 600/671 [03:38<00:25,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  90%|████████▉ | 601/671 [03:39<00:25,  2.74it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  90%|████████▉ | 602/671 [03:39<00:25,  2.75it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  90%|████████▉ | 6

Epoch 0:  99%|█████████▉| 665/671 [03:49<00:02,  2.89it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  99%|█████████▉| 666/671 [03:50<00:01,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0:  99%|█████████▉| 667/671 [03:50<00:01,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0: 100%|█████████▉| 668/671 [03:50<00:01,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0: 100%|█████████▉| 669/671 [03:50<00:00,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0: 100%|█████████▉| 670/671 [03:50<00:00,  2.90it/s, loss=0.020, val_loss_step=0.698, train_loss_step=0.0165]
Epoch 0: 100%|██████████| 671/671 [03:51<00:00,  2.90it/s, loss=0.020, val_loss_step=0.0182, train_loss_step=0.0165, val_loss_epoch=0.0197]
Epoch 1:  89%|████████▉ | 596/671 [03:38<00:27,  2.73it/s, loss=0.019, val_loss_step=0.0182, train_loss_step=0.0101, val_loss_epoch=0.0197, train_loss_epoch=0.0257]

Epoch 1:  95%|█████████▌| 640/671 [03:46<00:10,  2.83it/s, loss=0.019, val_loss_step=0.0182, train_loss_step=0.0101, val_loss_epoch=0.0197, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 641/671 [03:46<00:10,  2.83it/s, loss=0.019, val_loss_step=0.0182, train_loss_step=0.0101, val_loss_epoch=0.0197, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 642/671 [03:46<00:10,  2.83it/s, loss=0.019, val_loss_step=0.0182, train_loss_step=0.0101, val_loss_epoch=0.0197, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 643/671 [03:46<00:09,  2.84it/s, loss=0.019, val_loss_step=0.0182, train_loss_step=0.0101, val_loss_epoch=0.0197, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 644/671 [03:46<00:09,  2.84it/s, loss=0.019, val_loss_step=0.0182, train_loss_step=0.0101, val_loss_epoch=0.0197, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 645/671 [03:46<00:09,  2.84it/s, loss=0.019, val_loss_step=0.0182, train_loss_step=0.0101, val_loss_epoch=0.0197, train_loss_epoch=0.0257]
Epoch 1:  

Epoch 2:  91%|█████████▏| 613/671 [03:41<00:20,  2.77it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 614/671 [03:41<00:20,  2.77it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 615/671 [03:41<00:20,  2.77it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 616/671 [03:41<00:19,  2.77it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 617/671 [03:42<00:19,  2.78it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 618/671 [03:42<00:19,  2.78it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  92%|██

Epoch 2:  99%|█████████▊| 662/671 [03:49<00:03,  2.88it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 663/671 [03:49<00:02,  2.88it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 664/671 [03:50<00:02,  2.88it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 665/671 [03:50<00:02,  2.89it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 666/671 [03:50<00:01,  2.89it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 667/671 [03:50<00:01,  2.89it/s, loss=0.019, val_loss_step=0.0208, train_loss_step=0.0274, val_loss_epoch=0.022, train_loss_epoch=0.0195]
Epoch 2: 100%|██

Epoch 3:  95%|█████████▍| 635/671 [03:44<00:12,  2.82it/s, loss=0.017, val_loss_step=0.0448, train_loss_step=0.00259, val_loss_epoch=0.0459, train_loss_epoch=0.0186]
Epoch 3:  95%|█████████▍| 636/671 [03:45<00:12,  2.82it/s, loss=0.017, val_loss_step=0.0448, train_loss_step=0.00259, val_loss_epoch=0.0459, train_loss_epoch=0.0186]
Epoch 3:  95%|█████████▍| 637/671 [03:45<00:12,  2.83it/s, loss=0.017, val_loss_step=0.0448, train_loss_step=0.00259, val_loss_epoch=0.0459, train_loss_epoch=0.0186]
Epoch 3:  95%|█████████▌| 638/671 [03:45<00:11,  2.83it/s, loss=0.017, val_loss_step=0.0448, train_loss_step=0.00259, val_loss_epoch=0.0459, train_loss_epoch=0.0186]
Epoch 3:  95%|█████████▌| 639/671 [03:45<00:11,  2.83it/s, loss=0.017, val_loss_step=0.0448, train_loss_step=0.00259, val_loss_epoch=0.0459, train_loss_epoch=0.0186]
Epoch 3:  95%|█████████▌| 640/671 [03:45<00:10,  2.83it/s, loss=0.017, val_loss_step=0.0448, train_loss_step=0.00259, val_loss_epoch=0.0459, train_loss_epoch=0.0186]
Epoc

Epoch 4:  91%|█████████ | 608/671 [03:40<00:22,  2.76it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  91%|█████████ | 609/671 [03:40<00:22,  2.76it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  91%|█████████ | 610/671 [03:40<00:22,  2.76it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  91%|█████████ | 611/671 [03:40<00:21,  2.77it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  91%|█████████ | 612/671 [03:41<00:21,  2.77it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  91%|█████████▏| 613/671 [03:41<00:20,  2.77it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  

Epoch 4:  98%|█████████▊| 657/671 [03:48<00:04,  2.87it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  98%|█████████▊| 658/671 [03:48<00:04,  2.87it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  98%|█████████▊| 659/671 [03:49<00:04,  2.88it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  98%|█████████▊| 660/671 [03:49<00:03,  2.88it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  99%|█████████▊| 661/671 [03:49<00:03,  2.88it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  99%|█████████▊| 662/671 [03:49<00:03,  2.88it/s, loss=0.017, val_loss_step=0.0237, train_loss_step=0.0067, val_loss_epoch=0.0233, train_loss_epoch=0.0182]
Epoch 4:  

Epoch 5:  94%|█████████▍| 630/671 [03:43<00:14,  2.82it/s, loss=0.018, val_loss_step=0.0326, train_loss_step=0.0121, val_loss_epoch=0.0325, train_loss_epoch=0.0178]
Epoch 5:  94%|█████████▍| 631/671 [03:43<00:14,  2.82it/s, loss=0.018, val_loss_step=0.0326, train_loss_step=0.0121, val_loss_epoch=0.0325, train_loss_epoch=0.0178]
Epoch 5:  94%|█████████▍| 632/671 [03:43<00:13,  2.82it/s, loss=0.018, val_loss_step=0.0326, train_loss_step=0.0121, val_loss_epoch=0.0325, train_loss_epoch=0.0178]
Epoch 5:  94%|█████████▍| 633/671 [03:44<00:13,  2.83it/s, loss=0.018, val_loss_step=0.0326, train_loss_step=0.0121, val_loss_epoch=0.0325, train_loss_epoch=0.0178]
Epoch 5:  94%|█████████▍| 634/671 [03:44<00:13,  2.83it/s, loss=0.018, val_loss_step=0.0326, train_loss_step=0.0121, val_loss_epoch=0.0325, train_loss_epoch=0.0178]
Epoch 5:  95%|█████████▍| 635/671 [03:44<00:12,  2.83it/s, loss=0.018, val_loss_step=0.0326, train_loss_step=0.0121, val_loss_epoch=0.0325, train_loss_epoch=0.0178]
Epoch 5:  

Epoch 6:  90%|████████▉ | 603/671 [03:38<00:24,  2.75it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  90%|█████████ | 604/671 [03:39<00:24,  2.76it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  90%|█████████ | 605/671 [03:39<00:23,  2.76it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  90%|█████████ | 606/671 [03:39<00:23,  2.76it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  90%|█████████ | 607/671 [03:39<00:23,  2.76it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  91%|█████████ | 608/671 [03:39<00:22,  2.77it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  

Epoch 6:  97%|█████████▋| 652/671 [03:47<00:06,  2.87it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  97%|█████████▋| 653/671 [03:47<00:06,  2.87it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  97%|█████████▋| 654/671 [03:47<00:05,  2.87it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  98%|█████████▊| 655/671 [03:47<00:05,  2.88it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  98%|█████████▊| 656/671 [03:47<00:05,  2.88it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  98%|█████████▊| 657/671 [03:48<00:04,  2.88it/s, loss=0.018, val_loss_step=0.0174, train_loss_step=0.0194, val_loss_epoch=0.0195, train_loss_epoch=0.0177]
Epoch 6:  

Epoch 7:  93%|█████████▎| 625/671 [03:42<00:16,  2.81it/s, loss=0.019, val_loss_step=0.0169, train_loss_step=0.0256, val_loss_epoch=0.018, train_loss_epoch=0.0176]
Epoch 7:  93%|█████████▎| 626/671 [03:42<00:16,  2.81it/s, loss=0.019, val_loss_step=0.0169, train_loss_step=0.0256, val_loss_epoch=0.018, train_loss_epoch=0.0176]
Epoch 7:  93%|█████████▎| 627/671 [03:42<00:15,  2.81it/s, loss=0.019, val_loss_step=0.0169, train_loss_step=0.0256, val_loss_epoch=0.018, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▎| 628/671 [03:43<00:15,  2.81it/s, loss=0.019, val_loss_step=0.0169, train_loss_step=0.0256, val_loss_epoch=0.018, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▎| 629/671 [03:43<00:14,  2.82it/s, loss=0.019, val_loss_step=0.0169, train_loss_step=0.0256, val_loss_epoch=0.018, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▍| 630/671 [03:43<00:14,  2.82it/s, loss=0.019, val_loss_step=0.0169, train_loss_step=0.0256, val_loss_epoch=0.018, train_loss_epoch=0.0176]
Epoch 7:  94%|██

Epoch 8:  89%|████████▉ | 598/671 [03:38<00:26,  2.74it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  89%|████████▉ | 599/671 [03:38<00:26,  2.74it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  89%|████████▉ | 600/671 [03:38<00:25,  2.75it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  90%|████████▉ | 601/671 [03:38<00:25,  2.75it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  90%|████████▉ | 602/671 [03:38<00:25,  2.75it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  90%|████████▉ | 603/671 [03:39<00:24,  2.75it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  

Epoch 8:  96%|█████████▋| 647/671 [03:46<00:08,  2.86it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  97%|█████████▋| 648/671 [03:46<00:08,  2.86it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  97%|█████████▋| 649/671 [03:46<00:07,  2.86it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  97%|█████████▋| 650/671 [03:46<00:07,  2.86it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  97%|█████████▋| 651/671 [03:47<00:06,  2.87it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  97%|█████████▋| 652/671 [03:47<00:06,  2.87it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0187, val_loss_epoch=0.0206, train_loss_epoch=0.0174]
Epoch 8:  

Epoch 9:  92%|█████████▏| 620/671 [03:41<00:18,  2.80it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9:  93%|█████████▎| 621/671 [03:41<00:17,  2.80it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9:  93%|█████████▎| 622/671 [03:41<00:17,  2.81it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9:  93%|█████████▎| 623/671 [03:41<00:17,  2.81it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9:  93%|█████████▎| 624/671 [03:42<00:16,  2.81it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9:  93%|█████████▎| 625/671 [03:42<00:16,  2.81it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9:  

Epoch 9: 100%|█████████▉| 669/671 [03:49<00:00,  2.91it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9: 100%|█████████▉| 670/671 [03:49<00:00,  2.92it/s, loss=0.017, val_loss_step=0.0281, train_loss_step=0.0091, val_loss_epoch=0.0281, train_loss_epoch=0.0173]
Epoch 9: 100%|██████████| 671/671 [03:50<00:00,  2.92it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0091, val_loss_epoch=0.0274, train_loss_epoch=0.0173]
Epoch 10:  89%|████████▉ | 596/671 [03:37<00:27,  2.74it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171] 
Validating: 0it [00:00, ?it/s][A
Epoch 10:  89%|████████▉ | 597/671 [03:37<00:26,  2.74it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171]
Epoch 10:  89%|████████▉ | 598/671 [03:37<00:26,  2.75it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.027

Epoch 10:  96%|█████████▌| 642/671 [03:45<00:10,  2.85it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171]
Epoch 10:  96%|█████████▌| 643/671 [03:45<00:09,  2.85it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171]
Epoch 10:  96%|█████████▌| 644/671 [03:45<00:09,  2.86it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171]
Epoch 10:  96%|█████████▌| 645/671 [03:45<00:09,  2.86it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171]
Epoch 10:  96%|█████████▋| 646/671 [03:45<00:08,  2.86it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171]
Epoch 10:  96%|█████████▋| 647/671 [03:45<00:08,  2.86it/s, loss=0.017, val_loss_step=0.0286, train_loss_step=0.0108, val_loss_epoch=0.0274, train_loss_epoch=0.0171]
Epoc

Epoch 11:  92%|█████████▏| 615/671 [03:40<00:20,  2.79it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  92%|█████████▏| 616/671 [03:40<00:19,  2.79it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  92%|█████████▏| 617/671 [03:40<00:19,  2.79it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  92%|█████████▏| 618/671 [03:41<00:18,  2.80it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  92%|█████████▏| 619/671 [03:41<00:18,  2.80it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  92%|█████████▏| 620/671 [03:41<00:18,  2.80it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11: 

Epoch 11:  99%|█████████▉| 664/671 [03:48<00:02,  2.90it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  99%|█████████▉| 665/671 [03:48<00:02,  2.90it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  99%|█████████▉| 666/671 [03:49<00:01,  2.91it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11:  99%|█████████▉| 667/671 [03:49<00:01,  2.91it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11: 100%|█████████▉| 668/671 [03:49<00:01,  2.91it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11: 100%|█████████▉| 669/671 [03:49<00:00,  2.91it/s, loss=0.018, val_loss_step=0.0224, train_loss_step=0.0255, val_loss_epoch=0.0238, train_loss_epoch=0.017]
Epoch 11: 

Epoch 12:  95%|█████████▍| 637/671 [03:44<00:11,  2.84it/s, loss=0.018, val_loss_step=0.0153, train_loss_step=0.029, val_loss_epoch=0.0176, train_loss_epoch=0.0168]
Epoch 12:  95%|█████████▌| 638/671 [03:44<00:11,  2.84it/s, loss=0.018, val_loss_step=0.0153, train_loss_step=0.029, val_loss_epoch=0.0176, train_loss_epoch=0.0168]
Epoch 12:  95%|█████████▌| 639/671 [03:44<00:11,  2.85it/s, loss=0.018, val_loss_step=0.0153, train_loss_step=0.029, val_loss_epoch=0.0176, train_loss_epoch=0.0168]
Epoch 12:  95%|█████████▌| 640/671 [03:44<00:10,  2.85it/s, loss=0.018, val_loss_step=0.0153, train_loss_step=0.029, val_loss_epoch=0.0176, train_loss_epoch=0.0168]
Epoch 12:  96%|█████████▌| 641/671 [03:44<00:10,  2.85it/s, loss=0.018, val_loss_step=0.0153, train_loss_step=0.029, val_loss_epoch=0.0176, train_loss_epoch=0.0168]
Epoch 12:  96%|█████████▌| 642/671 [03:45<00:10,  2.85it/s, loss=0.018, val_loss_step=0.0153, train_loss_step=0.029, val_loss_epoch=0.0176, train_loss_epoch=0.0168]
Epoch 12: 

Epoch 13:  91%|█████████ | 610/671 [03:39<00:21,  2.78it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  91%|█████████ | 611/671 [03:39<00:21,  2.78it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  91%|█████████ | 612/671 [03:39<00:21,  2.78it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  91%|█████████▏| 613/671 [03:39<00:20,  2.79it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  92%|█████████▏| 614/671 [03:40<00:20,  2.79it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  92%|█████████▏| 615/671 [03:40<00:20,  2.79it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoc

Epoch 13:  98%|█████████▊| 659/671 [03:47<00:04,  2.89it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  98%|█████████▊| 660/671 [03:47<00:03,  2.90it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  99%|█████████▊| 661/671 [03:48<00:03,  2.90it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  99%|█████████▊| 662/671 [03:48<00:03,  2.90it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  99%|█████████▉| 663/671 [03:48<00:02,  2.90it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoch 13:  99%|█████████▉| 664/671 [03:48<00:02,  2.90it/s, loss=0.017, val_loss_step=0.0193, train_loss_step=0.0211, val_loss_epoch=0.0237, train_loss_epoch=0.0167]
Epoc

Epoch 14:  94%|█████████▍| 632/671 [03:43<00:13,  2.83it/s, loss=0.016, val_loss_step=0.0165, train_loss_step=0.018, val_loss_epoch=0.0184, train_loss_epoch=0.0166]
Epoch 14:  94%|█████████▍| 633/671 [03:43<00:13,  2.83it/s, loss=0.016, val_loss_step=0.0165, train_loss_step=0.018, val_loss_epoch=0.0184, train_loss_epoch=0.0166]
Epoch 14:  94%|█████████▍| 634/671 [03:43<00:13,  2.83it/s, loss=0.016, val_loss_step=0.0165, train_loss_step=0.018, val_loss_epoch=0.0184, train_loss_epoch=0.0166]
Epoch 14:  95%|█████████▍| 635/671 [03:43<00:12,  2.84it/s, loss=0.016, val_loss_step=0.0165, train_loss_step=0.018, val_loss_epoch=0.0184, train_loss_epoch=0.0166]
Epoch 14:  95%|█████████▍| 636/671 [03:44<00:12,  2.84it/s, loss=0.016, val_loss_step=0.0165, train_loss_step=0.018, val_loss_epoch=0.0184, train_loss_epoch=0.0166]
Epoch 14:  95%|█████████▍| 637/671 [03:44<00:11,  2.84it/s, loss=0.016, val_loss_step=0.0165, train_loss_step=0.018, val_loss_epoch=0.0184, train_loss_epoch=0.0166]
Epoch 14: 

Epoch 15:  90%|█████████ | 605/671 [03:38<00:23,  2.77it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  90%|█████████ | 606/671 [03:38<00:23,  2.77it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  90%|█████████ | 607/671 [03:38<00:23,  2.77it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  91%|█████████ | 608/671 [03:39<00:22,  2.78it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  91%|█████████ | 609/671 [03:39<00:22,  2.78it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  91%|█████████ | 610/671 [03:39<00:21,  2.78it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164

Epoch 15:  97%|█████████▋| 654/671 [03:46<00:05,  2.88it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  98%|█████████▊| 655/671 [03:46<00:05,  2.89it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  98%|█████████▊| 656/671 [03:47<00:05,  2.89it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  98%|█████████▊| 657/671 [03:47<00:04,  2.89it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  98%|█████████▊| 658/671 [03:47<00:04,  2.89it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164]
Epoch 15:  98%|█████████▊| 659/671 [03:47<00:04,  2.89it/s, loss=0.015, val_loss_step=0.0153, train_loss_step=0.00995, val_loss_epoch=0.0181, train_loss_epoch=0.0164

Epoch 16:  93%|█████████▎| 627/671 [03:42<00:15,  2.82it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.015, val_loss_epoch=0.0166, train_loss_epoch=0.0163]
Epoch 16:  94%|█████████▎| 628/671 [03:42<00:15,  2.82it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.015, val_loss_epoch=0.0166, train_loss_epoch=0.0163]
Epoch 16:  94%|█████████▎| 629/671 [03:42<00:14,  2.82it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.015, val_loss_epoch=0.0166, train_loss_epoch=0.0163]
Epoch 16:  94%|█████████▍| 630/671 [03:42<00:14,  2.83it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.015, val_loss_epoch=0.0166, train_loss_epoch=0.0163]
Epoch 16:  94%|█████████▍| 631/671 [03:43<00:14,  2.83it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.015, val_loss_epoch=0.0166, train_loss_epoch=0.0163]
Epoch 16:  94%|█████████▍| 632/671 [03:43<00:13,  2.83it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.015, val_loss_epoch=0.0166, train_loss_epoch=0.0163]
Epoch 16: 

Epoch 17:  89%|████████▉ | 600/671 [03:37<00:25,  2.76it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  90%|████████▉ | 601/671 [03:37<00:25,  2.76it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  90%|████████▉ | 602/671 [03:37<00:24,  2.76it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  90%|████████▉ | 603/671 [03:37<00:24,  2.77it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  90%|█████████ | 604/671 [03:38<00:24,  2.77it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  90%|█████████ | 605/671 [03:38<00:23,  2.77it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17: 

Epoch 17:  97%|█████████▋| 649/671 [03:45<00:07,  2.87it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  97%|█████████▋| 650/671 [03:45<00:07,  2.88it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  97%|█████████▋| 651/671 [03:46<00:06,  2.88it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  97%|█████████▋| 652/671 [03:46<00:06,  2.88it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  97%|█████████▋| 653/671 [03:46<00:06,  2.88it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17:  97%|█████████▋| 654/671 [03:46<00:05,  2.89it/s, loss=0.017, val_loss_step=0.0143, train_loss_step=0.0157, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 17: 

Epoch 18:  93%|█████████▎| 622/671 [03:41<00:17,  2.81it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.0272, val_loss_epoch=0.0165, train_loss_epoch=0.0161]
Epoch 18:  93%|█████████▎| 623/671 [03:41<00:17,  2.81it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.0272, val_loss_epoch=0.0165, train_loss_epoch=0.0161]
Epoch 18:  93%|█████████▎| 624/671 [03:41<00:16,  2.81it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.0272, val_loss_epoch=0.0165, train_loss_epoch=0.0161]
Epoch 18:  93%|█████████▎| 625/671 [03:41<00:16,  2.82it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.0272, val_loss_epoch=0.0165, train_loss_epoch=0.0161]
Epoch 18:  93%|█████████▎| 626/671 [03:42<00:15,  2.82it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.0272, val_loss_epoch=0.0165, train_loss_epoch=0.0161]
Epoch 18:  93%|█████████▎| 627/671 [03:42<00:15,  2.82it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.0272, val_loss_epoch=0.0165, train_loss_epoch=0.0161]
Epoc

Epoch 18: 100%|██████████| 671/671 [03:50<00:00,  2.92it/s, loss=0.017, val_loss_step=0.0136, train_loss_step=0.0272, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  89%|████████▉ | 596/671 [03:37<00:27,  2.74it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161] 
Validating: 0it [00:00, ?it/s][A
Epoch 19:  89%|████████▉ | 597/671 [03:37<00:27,  2.74it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  89%|████████▉ | 598/671 [03:38<00:26,  2.74it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  89%|████████▉ | 599/671 [03:38<00:26,  2.74it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  89%|████████▉ | 600/671 [03:38<00:25,  2.75it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.

Epoch 19:  96%|█████████▌| 644/671 [03:46<00:09,  2.85it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  96%|█████████▌| 645/671 [03:46<00:09,  2.85it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  96%|█████████▋| 646/671 [03:46<00:08,  2.85it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  96%|█████████▋| 647/671 [03:46<00:08,  2.86it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  97%|█████████▋| 648/671 [03:46<00:08,  2.86it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoch 19:  97%|█████████▋| 649/671 [03:46<00:07,  2.86it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0125, val_loss_epoch=0.0164, train_loss_epoch=0.0161]
Epoc

Epoch 20:  92%|█████████▏| 617/671 [03:41<00:19,  2.79it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20:  92%|█████████▏| 618/671 [03:41<00:19,  2.79it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20:  92%|█████████▏| 619/671 [03:41<00:18,  2.79it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20:  92%|█████████▏| 620/671 [03:41<00:18,  2.79it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20:  93%|█████████▎| 621/671 [03:42<00:17,  2.80it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20:  93%|█████████▎| 622/671 [03:42<00:17,  2.80it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20: 

Epoch 20:  99%|█████████▉| 666/671 [03:49<00:01,  2.90it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20:  99%|█████████▉| 667/671 [03:49<00:01,  2.90it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20: 100%|█████████▉| 668/671 [03:49<00:01,  2.90it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20: 100%|█████████▉| 669/671 [03:50<00:00,  2.91it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20: 100%|█████████▉| 670/671 [03:50<00:00,  2.91it/s, loss=0.016, val_loss_step=0.0136, train_loss_step=0.0176, val_loss_epoch=0.0163, train_loss_epoch=0.016]
Epoch 20: 100%|██████████| 671/671 [03:50<00:00,  2.91it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.0176, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 21: 

Epoch 21:  95%|█████████▌| 639/671 [03:43<00:11,  2.86it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.00845, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 21:  95%|█████████▌| 640/671 [03:43<00:10,  2.86it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.00845, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 21:  96%|█████████▌| 641/671 [03:44<00:10,  2.86it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.00845, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 21:  96%|█████████▌| 642/671 [03:44<00:10,  2.86it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.00845, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 21:  96%|█████████▌| 643/671 [03:44<00:09,  2.87it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.00845, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 21:  96%|█████████▌| 644/671 [03:44<00:09,  2.87it/s, loss=0.017, val_loss_step=0.0137, train_loss_step=0.00845, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoc

Epoch 22:  91%|█████████ | 612/671 [03:39<00:21,  2.79it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  91%|█████████▏| 613/671 [03:39<00:20,  2.79it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  92%|█████████▏| 614/671 [03:39<00:20,  2.79it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  92%|█████████▏| 615/671 [03:40<00:20,  2.79it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  92%|█████████▏| 616/671 [03:40<00:19,  2.80it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  92%|█████████▏| 617/671 [03:40<00:19,  2.80it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoc

Epoch 22:  99%|█████████▊| 661/671 [03:47<00:03,  2.90it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  99%|█████████▊| 662/671 [03:48<00:03,  2.90it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  99%|█████████▉| 663/671 [03:48<00:02,  2.90it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  99%|█████████▉| 664/671 [03:48<00:02,  2.91it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  99%|█████████▉| 665/671 [03:48<00:02,  2.91it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoch 22:  99%|█████████▉| 666/671 [03:48<00:01,  2.91it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.00996, val_loss_epoch=0.0164, train_loss_epoch=0.016]
Epoc

Epoch 23:  94%|█████████▍| 634/671 [03:43<00:13,  2.84it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.0194, val_loss_epoch=0.0165, train_loss_epoch=0.016]
Epoch 23:  95%|█████████▍| 635/671 [03:43<00:12,  2.84it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.0194, val_loss_epoch=0.0165, train_loss_epoch=0.016]
Epoch 23:  95%|█████████▍| 636/671 [03:43<00:12,  2.84it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.0194, val_loss_epoch=0.0165, train_loss_epoch=0.016]
Epoch 23:  95%|█████████▍| 637/671 [03:43<00:11,  2.85it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.0194, val_loss_epoch=0.0165, train_loss_epoch=0.016]
Epoch 23:  95%|█████████▌| 638/671 [03:44<00:11,  2.85it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.0194, val_loss_epoch=0.0165, train_loss_epoch=0.016]
Epoch 23:  95%|█████████▌| 639/671 [03:44<00:11,  2.85it/s, loss=0.016, val_loss_step=0.0137, train_loss_step=0.0194, val_loss_epoch=0.0165, train_loss_epoch=0.016]
Epoch 23: 

Epoch 24:  90%|█████████ | 607/671 [03:38<00:23,  2.78it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  91%|█████████ | 608/671 [03:38<00:22,  2.78it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  91%|█████████ | 609/671 [03:38<00:22,  2.78it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  91%|█████████ | 610/671 [03:39<00:21,  2.78it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  91%|█████████ | 611/671 [03:39<00:21,  2.79it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  91%|█████████ | 612/671 [03:39<00:21,  2.79it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24: 

Epoch 24:  98%|█████████▊| 656/671 [03:46<00:05,  2.89it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  98%|█████████▊| 657/671 [03:46<00:04,  2.89it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  98%|█████████▊| 658/671 [03:47<00:04,  2.90it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  98%|█████████▊| 659/671 [03:47<00:04,  2.90it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  98%|█████████▊| 660/671 [03:47<00:03,  2.90it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24:  99%|█████████▊| 661/671 [03:47<00:03,  2.90it/s, loss=0.016, val_loss_step=0.0144, train_loss_step=0.0112, val_loss_epoch=0.017, train_loss_epoch=0.0161]
Epoch 24: 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 11 M  


Initial Learning Rate: 0.001000
Validate iterations: 74
Train iterations: 597                                                 
Epoch 0:  89%|████████▉ | 597/671 [03:38<00:27,  2.73it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  89%|████████▉ | 598/671 [03:38<00:26,  2.73it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0:  89%|████████▉ | 599/671 [03:39<00:26,  2.73it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0:  89%|████████▉ | 600/671 [03:39<00:25,  2.74it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0:  90%|████████▉ | 601/671 [03:39<00:25,  2.74it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0:  90%|████████▉ | 602/671 [03:39<00:25,  2.74it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0:  90%|████████▉ | 603/671 [03:39<00:24,  2.74it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0:  90%|█████████ | 6

Epoch 0:  99%|█████████▉| 666/671 [03:50<00:01,  2.89it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0:  99%|█████████▉| 667/671 [03:50<00:01,  2.89it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0: 100%|█████████▉| 668/671 [03:50<00:01,  2.89it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0: 100%|█████████▉| 669/671 [03:50<00:00,  2.90it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0: 100%|█████████▉| 670/671 [03:51<00:00,  2.90it/s, loss=0.021, val_loss_step=0.699, train_loss_step=0.0197]
Epoch 0: 100%|██████████| 671/671 [03:51<00:00,  2.90it/s, loss=0.021, val_loss_step=0.0273, train_loss_step=0.0197, val_loss_epoch=0.0304]
Epoch 1:  89%|████████▉ | 597/671 [03:38<00:27,  2.73it/s, loss=0.020, val_loss_step=0.0273, train_loss_step=0.0181, val_loss_epoch=0.0304, train_loss_epoch=0.0246]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  89%|████████▉ | 598/671 [03:39<00:26,  2.73it/s, loss=0.020, val_loss_s

Epoch 1:  96%|█████████▌| 642/671 [03:46<00:10,  2.83it/s, loss=0.020, val_loss_step=0.0273, train_loss_step=0.0181, val_loss_epoch=0.0304, train_loss_epoch=0.0246]
Epoch 1:  96%|█████████▌| 643/671 [03:46<00:09,  2.83it/s, loss=0.020, val_loss_step=0.0273, train_loss_step=0.0181, val_loss_epoch=0.0304, train_loss_epoch=0.0246]
Epoch 1:  96%|█████████▌| 644/671 [03:47<00:09,  2.84it/s, loss=0.020, val_loss_step=0.0273, train_loss_step=0.0181, val_loss_epoch=0.0304, train_loss_epoch=0.0246]
Epoch 1:  96%|█████████▌| 645/671 [03:47<00:09,  2.84it/s, loss=0.020, val_loss_step=0.0273, train_loss_step=0.0181, val_loss_epoch=0.0304, train_loss_epoch=0.0246]
Epoch 1:  96%|█████████▋| 646/671 [03:47<00:08,  2.84it/s, loss=0.020, val_loss_step=0.0273, train_loss_step=0.0181, val_loss_epoch=0.0304, train_loss_epoch=0.0246]
Epoch 1:  96%|█████████▋| 647/671 [03:47<00:08,  2.84it/s, loss=0.020, val_loss_step=0.0273, train_loss_step=0.0181, val_loss_epoch=0.0304, train_loss_epoch=0.0246]
Epoch 1:  

Epoch 2:  92%|█████████▏| 616/671 [03:42<00:19,  2.77it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 617/671 [03:42<00:19,  2.77it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 618/671 [03:42<00:19,  2.78it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 619/671 [03:42<00:18,  2.78it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 620/671 [03:42<00:18,  2.78it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  93%|█████████▎| 621/671 [03:43<00:17,  2.78it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  

Epoch 2:  99%|█████████▉| 665/671 [03:50<00:02,  2.88it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 666/671 [03:50<00:01,  2.89it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 667/671 [03:50<00:01,  2.89it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2: 100%|█████████▉| 668/671 [03:51<00:01,  2.89it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2: 100%|█████████▉| 669/671 [03:51<00:00,  2.89it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2: 100%|█████████▉| 670/671 [03:51<00:00,  2.90it/s, loss=0.019, val_loss_step=0.0162, train_loss_step=0.0192, val_loss_epoch=0.0202, train_loss_epoch=0.0195]
Epoch 2: 1

Epoch 3:  95%|█████████▌| 639/671 [03:45<00:11,  2.83it/s, loss=0.018, val_loss_step=0.0175, train_loss_step=0.0185, val_loss_epoch=0.0219, train_loss_epoch=0.0189]
Epoch 3:  95%|█████████▌| 640/671 [03:45<00:10,  2.83it/s, loss=0.018, val_loss_step=0.0175, train_loss_step=0.0185, val_loss_epoch=0.0219, train_loss_epoch=0.0189]
Epoch 3:  96%|█████████▌| 641/671 [03:46<00:10,  2.84it/s, loss=0.018, val_loss_step=0.0175, train_loss_step=0.0185, val_loss_epoch=0.0219, train_loss_epoch=0.0189]
Epoch 3:  96%|█████████▌| 642/671 [03:46<00:10,  2.84it/s, loss=0.018, val_loss_step=0.0175, train_loss_step=0.0185, val_loss_epoch=0.0219, train_loss_epoch=0.0189]
Epoch 3:  96%|█████████▌| 643/671 [03:46<00:09,  2.84it/s, loss=0.018, val_loss_step=0.0175, train_loss_step=0.0185, val_loss_epoch=0.0219, train_loss_epoch=0.0189]
Epoch 3:  96%|█████████▌| 644/671 [03:46<00:09,  2.84it/s, loss=0.018, val_loss_step=0.0175, train_loss_step=0.0185, val_loss_epoch=0.0219, train_loss_epoch=0.0189]
Epoch 3:  

Epoch 4:  91%|█████████▏| 613/671 [03:40<00:20,  2.78it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  92%|█████████▏| 614/671 [03:40<00:20,  2.78it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  92%|█████████▏| 615/671 [03:41<00:20,  2.78it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  92%|█████████▏| 616/671 [03:41<00:19,  2.78it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  92%|█████████▏| 617/671 [03:41<00:19,  2.79it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  92%|█████████▏| 618/671 [03:41<00:19,  2.79it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  

Epoch 4:  99%|█████████▊| 662/671 [03:48<00:03,  2.89it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  99%|█████████▉| 663/671 [03:49<00:02,  2.89it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  99%|█████████▉| 664/671 [03:49<00:02,  2.90it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  99%|█████████▉| 665/671 [03:49<00:02,  2.90it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  99%|█████████▉| 666/671 [03:49<00:01,  2.90it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4:  99%|█████████▉| 667/671 [03:49<00:01,  2.90it/s, loss=0.018, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0195, train_loss_epoch=0.0183]
Epoch 4: 1

Epoch 5:  95%|█████████▍| 636/671 [03:42<00:12,  2.85it/s, loss=0.018, val_loss_step=0.0151, train_loss_step=0.0178, val_loss_epoch=0.0183, train_loss_epoch=0.0179]
Epoch 5:  95%|█████████▍| 637/671 [03:42<00:11,  2.86it/s, loss=0.018, val_loss_step=0.0151, train_loss_step=0.0178, val_loss_epoch=0.0183, train_loss_epoch=0.0179]
Epoch 5:  95%|█████████▌| 638/671 [03:43<00:11,  2.86it/s, loss=0.018, val_loss_step=0.0151, train_loss_step=0.0178, val_loss_epoch=0.0183, train_loss_epoch=0.0179]
Epoch 5:  95%|█████████▌| 639/671 [03:43<00:11,  2.86it/s, loss=0.018, val_loss_step=0.0151, train_loss_step=0.0178, val_loss_epoch=0.0183, train_loss_epoch=0.0179]
Epoch 5:  95%|█████████▌| 640/671 [03:43<00:10,  2.86it/s, loss=0.018, val_loss_step=0.0151, train_loss_step=0.0178, val_loss_epoch=0.0183, train_loss_epoch=0.0179]
Epoch 5:  96%|█████████▌| 641/671 [03:43<00:10,  2.87it/s, loss=0.018, val_loss_step=0.0151, train_loss_step=0.0178, val_loss_epoch=0.0183, train_loss_epoch=0.0179]
Epoch 5:  

Epoch 6:  91%|█████████ | 610/671 [03:38<00:21,  2.79it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  91%|█████████ | 611/671 [03:38<00:21,  2.80it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  91%|█████████ | 612/671 [03:38<00:21,  2.80it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  91%|█████████▏| 613/671 [03:38<00:20,  2.80it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  92%|█████████▏| 614/671 [03:39<00:20,  2.80it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  92%|█████████▏| 615/671 [03:39<00:19,  2.81it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  

Epoch 6:  98%|█████████▊| 659/671 [03:46<00:04,  2.91it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  98%|█████████▊| 660/671 [03:46<00:03,  2.91it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  99%|█████████▊| 661/671 [03:46<00:03,  2.91it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  99%|█████████▊| 662/671 [03:47<00:03,  2.92it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  99%|█████████▉| 663/671 [03:47<00:02,  2.92it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  99%|█████████▉| 664/671 [03:47<00:02,  2.92it/s, loss=0.017, val_loss_step=0.0163, train_loss_step=0.0161, val_loss_epoch=0.0199, train_loss_epoch=0.0177]
Epoch 6:  

Epoch 7:  94%|█████████▍| 633/671 [03:41<00:13,  2.85it/s, loss=0.018, val_loss_step=0.0162, train_loss_step=0.0184, val_loss_epoch=0.0196, train_loss_epoch=0.0175]
Epoch 7:  94%|█████████▍| 634/671 [03:42<00:12,  2.85it/s, loss=0.018, val_loss_step=0.0162, train_loss_step=0.0184, val_loss_epoch=0.0196, train_loss_epoch=0.0175]
Epoch 7:  95%|█████████▍| 635/671 [03:42<00:12,  2.86it/s, loss=0.018, val_loss_step=0.0162, train_loss_step=0.0184, val_loss_epoch=0.0196, train_loss_epoch=0.0175]
Epoch 7:  95%|█████████▍| 636/671 [03:42<00:12,  2.86it/s, loss=0.018, val_loss_step=0.0162, train_loss_step=0.0184, val_loss_epoch=0.0196, train_loss_epoch=0.0175]
Epoch 7:  95%|█████████▍| 637/671 [03:42<00:11,  2.86it/s, loss=0.018, val_loss_step=0.0162, train_loss_step=0.0184, val_loss_epoch=0.0196, train_loss_epoch=0.0175]
Epoch 7:  95%|█████████▌| 638/671 [03:42<00:11,  2.86it/s, loss=0.018, val_loss_step=0.0162, train_loss_step=0.0184, val_loss_epoch=0.0196, train_loss_epoch=0.0175]
Epoch 7:  

Epoch 8:  90%|█████████ | 607/671 [03:37<00:22,  2.79it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  91%|█████████ | 608/671 [03:37<00:22,  2.79it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  91%|█████████ | 609/671 [03:37<00:22,  2.80it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  91%|█████████ | 610/671 [03:37<00:21,  2.80it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  91%|█████████ | 611/671 [03:38<00:21,  2.80it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  91%|█████████ | 612/671 [03:38<00:21,  2.80it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  91%|██

Epoch 8:  98%|█████████▊| 656/671 [03:45<00:05,  2.91it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  98%|█████████▊| 657/671 [03:45<00:04,  2.91it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  98%|█████████▊| 658/671 [03:45<00:04,  2.91it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  98%|█████████▊| 659/671 [03:46<00:04,  2.91it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  98%|█████████▊| 660/671 [03:46<00:03,  2.92it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  99%|█████████▊| 661/671 [03:46<00:03,  2.92it/s, loss=0.016, val_loss_step=0.016, train_loss_step=0.0149, val_loss_epoch=0.0208, train_loss_epoch=0.0173]
Epoch 8:  99%|██

Epoch 9:  94%|█████████▍| 630/671 [03:41<00:14,  2.85it/s, loss=0.017, val_loss_step=0.0212, train_loss_step=0.0191, val_loss_epoch=0.0243, train_loss_epoch=0.0171]
Epoch 9:  94%|█████████▍| 631/671 [03:41<00:14,  2.85it/s, loss=0.017, val_loss_step=0.0212, train_loss_step=0.0191, val_loss_epoch=0.0243, train_loss_epoch=0.0171]
Epoch 9:  94%|█████████▍| 632/671 [03:41<00:13,  2.85it/s, loss=0.017, val_loss_step=0.0212, train_loss_step=0.0191, val_loss_epoch=0.0243, train_loss_epoch=0.0171]
Epoch 9:  94%|█████████▍| 633/671 [03:41<00:13,  2.86it/s, loss=0.017, val_loss_step=0.0212, train_loss_step=0.0191, val_loss_epoch=0.0243, train_loss_epoch=0.0171]
Epoch 9:  94%|█████████▍| 634/671 [03:41<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0212, train_loss_step=0.0191, val_loss_epoch=0.0243, train_loss_epoch=0.0171]
Epoch 9:  95%|█████████▍| 635/671 [03:41<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0212, train_loss_step=0.0191, val_loss_epoch=0.0243, train_loss_epoch=0.0171]
Epoch 9:  

Epoch 10:  90%|█████████ | 604/671 [03:36<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  90%|█████████ | 605/671 [03:36<00:23,  2.79it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  90%|█████████ | 606/671 [03:36<00:23,  2.79it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  90%|█████████ | 607/671 [03:37<00:22,  2.80it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  91%|█████████ | 608/671 [03:37<00:22,  2.80it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  91%|█████████ | 609/671 [03:37<00:22,  2.80it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoc

Epoch 10:  97%|█████████▋| 653/671 [03:44<00:06,  2.91it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  97%|█████████▋| 654/671 [03:44<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  98%|█████████▊| 655/671 [03:45<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  98%|█████████▊| 656/671 [03:45<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  98%|█████████▊| 657/671 [03:45<00:04,  2.92it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoch 10:  98%|█████████▊| 658/671 [03:45<00:04,  2.92it/s, loss=0.016, val_loss_step=0.0626, train_loss_step=0.0187, val_loss_epoch=0.0651, train_loss_epoch=0.0169]
Epoc

Epoch 11:  93%|█████████▎| 627/671 [03:40<00:15,  2.84it/s, loss=0.017, val_loss_step=0.109, train_loss_step=0.0182, val_loss_epoch=0.126, train_loss_epoch=0.0167]
Epoch 11:  94%|█████████▎| 628/671 [03:40<00:15,  2.85it/s, loss=0.017, val_loss_step=0.109, train_loss_step=0.0182, val_loss_epoch=0.126, train_loss_epoch=0.0167]
Epoch 11:  94%|█████████▎| 629/671 [03:40<00:14,  2.85it/s, loss=0.017, val_loss_step=0.109, train_loss_step=0.0182, val_loss_epoch=0.126, train_loss_epoch=0.0167]
Epoch 11:  94%|█████████▍| 630/671 [03:41<00:14,  2.85it/s, loss=0.017, val_loss_step=0.109, train_loss_step=0.0182, val_loss_epoch=0.126, train_loss_epoch=0.0167]
Epoch 11:  94%|█████████▍| 631/671 [03:41<00:14,  2.85it/s, loss=0.017, val_loss_step=0.109, train_loss_step=0.0182, val_loss_epoch=0.126, train_loss_epoch=0.0167]
Epoch 11:  94%|█████████▍| 632/671 [03:41<00:13,  2.86it/s, loss=0.017, val_loss_step=0.109, train_loss_step=0.0182, val_loss_epoch=0.126, train_loss_epoch=0.0167]
Epoch 11:  94%|█

Epoch 12:  90%|████████▉ | 601/671 [03:36<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  90%|████████▉ | 602/671 [03:36<00:24,  2.78it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  90%|████████▉ | 603/671 [03:36<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  90%|█████████ | 604/671 [03:36<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  90%|█████████ | 605/671 [03:36<00:23,  2.79it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  90%|█████████ | 606/671 [03:36<00:23,  2.79it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoc

Epoch 12:  97%|█████████▋| 650/671 [03:44<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  97%|█████████▋| 651/671 [03:44<00:06,  2.90it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  97%|█████████▋| 652/671 [03:44<00:06,  2.90it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  97%|█████████▋| 653/671 [03:44<00:06,  2.91it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  97%|█████████▋| 654/671 [03:44<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoch 12:  98%|█████████▊| 655/671 [03:45<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0312, train_loss_step=0.0219, val_loss_epoch=0.0359, train_loss_epoch=0.0166]
Epoc

Epoch 13:  93%|█████████▎| 624/671 [03:39<00:16,  2.84it/s, loss=0.017, val_loss_step=0.0146, train_loss_step=0.0195, val_loss_epoch=0.0202, train_loss_epoch=0.0165]
Epoch 13:  93%|█████████▎| 625/671 [03:39<00:16,  2.84it/s, loss=0.017, val_loss_step=0.0146, train_loss_step=0.0195, val_loss_epoch=0.0202, train_loss_epoch=0.0165]
Epoch 13:  93%|█████████▎| 626/671 [03:40<00:15,  2.84it/s, loss=0.017, val_loss_step=0.0146, train_loss_step=0.0195, val_loss_epoch=0.0202, train_loss_epoch=0.0165]
Epoch 13:  93%|█████████▎| 627/671 [03:40<00:15,  2.85it/s, loss=0.017, val_loss_step=0.0146, train_loss_step=0.0195, val_loss_epoch=0.0202, train_loss_epoch=0.0165]
Epoch 13:  94%|█████████▎| 628/671 [03:40<00:15,  2.85it/s, loss=0.017, val_loss_step=0.0146, train_loss_step=0.0195, val_loss_epoch=0.0202, train_loss_epoch=0.0165]
Epoch 13:  94%|█████████▎| 629/671 [03:40<00:14,  2.85it/s, loss=0.017, val_loss_step=0.0146, train_loss_step=0.0195, val_loss_epoch=0.0202, train_loss_epoch=0.0165]
Epoc

Epoch 14:  89%|████████▉ | 598/671 [03:35<00:26,  2.77it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  89%|████████▉ | 599/671 [03:35<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  89%|████████▉ | 600/671 [03:35<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  90%|████████▉ | 601/671 [03:36<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  90%|████████▉ | 602/671 [03:36<00:24,  2.78it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  90%|████████▉ | 603/671 [03:36<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoc

Epoch 14:  96%|█████████▋| 647/671 [03:43<00:08,  2.89it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 648/671 [03:43<00:07,  2.89it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 649/671 [03:44<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 650/671 [03:44<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 651/671 [03:44<00:06,  2.90it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 652/671 [03:44<00:06,  2.90it/s, loss=0.016, val_loss_step=0.0226, train_loss_step=0.0153, val_loss_epoch=0.0258, train_loss_epoch=0.0164]
Epoc

Epoch 15:  93%|█████████▎| 621/671 [03:39<00:17,  2.83it/s, loss=0.015, val_loss_step=0.0491, train_loss_step=0.0139, val_loss_epoch=0.0569, train_loss_epoch=0.0162]
Epoch 15:  93%|█████████▎| 622/671 [03:39<00:17,  2.83it/s, loss=0.015, val_loss_step=0.0491, train_loss_step=0.0139, val_loss_epoch=0.0569, train_loss_epoch=0.0162]
Epoch 15:  93%|█████████▎| 623/671 [03:39<00:16,  2.84it/s, loss=0.015, val_loss_step=0.0491, train_loss_step=0.0139, val_loss_epoch=0.0569, train_loss_epoch=0.0162]
Epoch 15:  93%|█████████▎| 624/671 [03:39<00:16,  2.84it/s, loss=0.015, val_loss_step=0.0491, train_loss_step=0.0139, val_loss_epoch=0.0569, train_loss_epoch=0.0162]
Epoch 15:  93%|█████████▎| 625/671 [03:40<00:16,  2.84it/s, loss=0.015, val_loss_step=0.0491, train_loss_step=0.0139, val_loss_epoch=0.0569, train_loss_epoch=0.0162]
Epoch 15:  93%|█████████▎| 626/671 [03:40<00:15,  2.84it/s, loss=0.015, val_loss_step=0.0491, train_loss_step=0.0139, val_loss_epoch=0.0569, train_loss_epoch=0.0162]
Epoc

Epoch 15: 100%|█████████▉| 670/671 [03:47<00:00,  2.95it/s, loss=0.015, val_loss_step=0.0491, train_loss_step=0.0139, val_loss_epoch=0.0569, train_loss_epoch=0.0162]
Epoch 15: 100%|██████████| 671/671 [03:48<00:00,  2.94it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0139, val_loss_epoch=0.0177, train_loss_epoch=0.0162]
Epoch 16:  89%|████████▉ | 597/671 [03:34<00:26,  2.78it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161] 
Validating: 0it [00:00, ?it/s][A
Epoch 16:  89%|████████▉ | 598/671 [03:35<00:26,  2.78it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoch 16:  89%|████████▉ | 599/671 [03:35<00:25,  2.78it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoch 16:  89%|████████▉ | 600/671 [03:35<00:25,  2.79it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.

Epoch 16:  96%|█████████▌| 644/671 [03:42<00:09,  2.89it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoch 16:  96%|█████████▌| 645/671 [03:42<00:08,  2.90it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoch 16:  96%|█████████▋| 646/671 [03:42<00:08,  2.90it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoch 16:  96%|█████████▋| 647/671 [03:43<00:08,  2.90it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoch 16:  97%|█████████▋| 648/671 [03:43<00:07,  2.90it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoch 16:  97%|█████████▋| 649/671 [03:43<00:07,  2.91it/s, loss=0.015, val_loss_step=0.0131, train_loss_step=0.0205, val_loss_epoch=0.0177, train_loss_epoch=0.0161]
Epoc

Epoch 17:  92%|█████████▏| 618/671 [03:38<00:18,  2.83it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17:  92%|█████████▏| 619/671 [03:38<00:18,  2.83it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17:  92%|█████████▏| 620/671 [03:38<00:18,  2.83it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17:  93%|█████████▎| 621/671 [03:39<00:17,  2.84it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17:  93%|█████████▎| 622/671 [03:39<00:17,  2.84it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17:  93%|█████████▎| 623/671 [03:39<00:16,  2.84it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17: 

Epoch 17:  99%|█████████▉| 667/671 [03:46<00:01,  2.94it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17: 100%|█████████▉| 668/671 [03:46<00:01,  2.95it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17: 100%|█████████▉| 669/671 [03:46<00:00,  2.95it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17: 100%|█████████▉| 670/671 [03:47<00:00,  2.95it/s, loss=0.016, val_loss_step=0.0129, train_loss_step=0.0187, val_loss_epoch=0.0173, train_loss_epoch=0.016]
Epoch 17: 100%|██████████| 671/671 [03:47<00:00,  2.95it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0187, val_loss_epoch=0.0179, train_loss_epoch=0.016] 
Epoch 18:  89%|████████▉ | 597/671 [03:35<00:26,  2.78it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0216, val_loss_epoch=0.0179, train_loss_epoch=0.0159] 
Validatin

Epoch 18:  96%|█████████▌| 641/671 [03:42<00:10,  2.88it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0216, val_loss_epoch=0.0179, train_loss_epoch=0.0159]
Epoch 18:  96%|█████████▌| 642/671 [03:42<00:10,  2.88it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0216, val_loss_epoch=0.0179, train_loss_epoch=0.0159]
Epoch 18:  96%|█████████▌| 643/671 [03:42<00:09,  2.89it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0216, val_loss_epoch=0.0179, train_loss_epoch=0.0159]
Epoch 18:  96%|█████████▌| 644/671 [03:43<00:09,  2.89it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0216, val_loss_epoch=0.0179, train_loss_epoch=0.0159]
Epoch 18:  96%|█████████▌| 645/671 [03:43<00:08,  2.89it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0216, val_loss_epoch=0.0179, train_loss_epoch=0.0159]
Epoch 18:  96%|█████████▋| 646/671 [03:43<00:08,  2.89it/s, loss=0.016, val_loss_step=0.013, train_loss_step=0.0216, val_loss_epoch=0.0179, train_loss_epoch=0.0159]
Epoch 18: 

Epoch 19:  92%|█████████▏| 615/671 [03:38<00:19,  2.82it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 616/671 [03:38<00:19,  2.82it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 617/671 [03:38<00:19,  2.83it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 618/671 [03:38<00:18,  2.83it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 619/671 [03:38<00:18,  2.83it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 620/671 [03:38<00:18,  2.83it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoc

Epoch 19:  99%|█████████▉| 664/671 [03:46<00:02,  2.94it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  99%|█████████▉| 665/671 [03:46<00:02,  2.94it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  99%|█████████▉| 666/671 [03:46<00:01,  2.94it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19:  99%|█████████▉| 667/671 [03:46<00:01,  2.94it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19: 100%|█████████▉| 668/671 [03:46<00:01,  2.95it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoch 19: 100%|█████████▉| 669/671 [03:46<00:00,  2.95it/s, loss=0.015, val_loss_step=0.0125, train_loss_step=0.0104, val_loss_epoch=0.0169, train_loss_epoch=0.0159]
Epoc

Epoch 20:  95%|█████████▌| 638/671 [03:41<00:11,  2.88it/s, loss=0.017, val_loss_step=0.0127, train_loss_step=0.0144, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 20:  95%|█████████▌| 639/671 [03:41<00:11,  2.88it/s, loss=0.017, val_loss_step=0.0127, train_loss_step=0.0144, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 20:  95%|█████████▌| 640/671 [03:42<00:10,  2.88it/s, loss=0.017, val_loss_step=0.0127, train_loss_step=0.0144, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 20:  96%|█████████▌| 641/671 [03:42<00:10,  2.88it/s, loss=0.017, val_loss_step=0.0127, train_loss_step=0.0144, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 20:  96%|█████████▌| 642/671 [03:42<00:10,  2.89it/s, loss=0.017, val_loss_step=0.0127, train_loss_step=0.0144, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 20:  96%|█████████▌| 643/671 [03:42<00:09,  2.89it/s, loss=0.017, val_loss_step=0.0127, train_loss_step=0.0144, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoc

Epoch 21:  91%|█████████ | 612/671 [03:37<00:20,  2.81it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  91%|█████████▏| 613/671 [03:37<00:20,  2.81it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  92%|█████████▏| 614/671 [03:38<00:20,  2.81it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  92%|█████████▏| 615/671 [03:38<00:19,  2.82it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  92%|█████████▏| 616/671 [03:38<00:19,  2.82it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  92%|█████████▏| 617/671 [03:38<00:19,  2.82it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoc

Epoch 21:  99%|█████████▊| 661/671 [03:45<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  99%|█████████▊| 662/671 [03:46<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  99%|█████████▉| 663/671 [03:46<00:02,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  99%|█████████▉| 664/671 [03:46<00:02,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  99%|█████████▉| 665/671 [03:46<00:02,  2.94it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoch 21:  99%|█████████▉| 666/671 [03:46<00:01,  2.94it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0141, val_loss_epoch=0.0167, train_loss_epoch=0.0158]
Epoc

Epoch 22:  95%|█████████▍| 635/671 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0151, val_loss_epoch=0.0168, train_loss_epoch=0.0158]
Epoch 22:  95%|█████████▍| 636/671 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0151, val_loss_epoch=0.0168, train_loss_epoch=0.0158]
Epoch 22:  95%|█████████▍| 637/671 [03:41<00:11,  2.87it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0151, val_loss_epoch=0.0168, train_loss_epoch=0.0158]
Epoch 22:  95%|█████████▌| 638/671 [03:41<00:11,  2.88it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0151, val_loss_epoch=0.0168, train_loss_epoch=0.0158]
Epoch 22:  95%|█████████▌| 639/671 [03:41<00:11,  2.88it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0151, val_loss_epoch=0.0168, train_loss_epoch=0.0158]
Epoch 22:  95%|█████████▌| 640/671 [03:42<00:10,  2.88it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0151, val_loss_epoch=0.0168, train_loss_epoch=0.0158]
Epoc

Epoch 23:  91%|█████████ | 609/671 [03:36<00:22,  2.81it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████ | 610/671 [03:36<00:21,  2.81it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████ | 611/671 [03:37<00:21,  2.81it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████ | 612/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████▏| 613/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  92%|█████████▏| 614/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23: 

Epoch 23:  98%|█████████▊| 658/671 [03:44<00:04,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  98%|█████████▊| 659/671 [03:45<00:04,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  98%|█████████▊| 660/671 [03:45<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▊| 661/671 [03:45<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▊| 662/671 [03:45<00:03,  2.94it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▉| 663/671 [03:45<00:02,  2.94it/s, loss=0.016, val_loss_step=0.0126, train_loss_step=0.0137, val_loss_epoch=0.017, train_loss_epoch=0.0159]
Epoch 23: 

Epoch 24:  94%|█████████▍| 632/671 [03:40<00:13,  2.86it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.0179, val_loss_epoch=0.018, train_loss_epoch=0.0159]
Epoch 24:  94%|█████████▍| 633/671 [03:41<00:13,  2.86it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.0179, val_loss_epoch=0.018, train_loss_epoch=0.0159]
Epoch 24:  94%|█████████▍| 634/671 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.0179, val_loss_epoch=0.018, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 635/671 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.0179, val_loss_epoch=0.018, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 636/671 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.0179, val_loss_epoch=0.018, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 637/671 [03:41<00:11,  2.87it/s, loss=0.016, val_loss_step=0.0142, train_loss_step=0.0179, val_loss_epoch=0.018, train_loss_epoch=0.0159]
Epoch 24: 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 11 M  


Initial Learning Rate: 0.001000
Validate iterations: 74
Train iterations: 597                                                 
Epoch 0:  89%|████████▉ | 597/671 [03:35<00:26,  2.76it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  89%|████████▉ | 598/671 [03:36<00:26,  2.76it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0:  89%|████████▉ | 599/671 [03:36<00:26,  2.77it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0:  89%|████████▉ | 600/671 [03:36<00:25,  2.77it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0:  90%|████████▉ | 601/671 [03:36<00:25,  2.77it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0:  90%|████████▉ | 602/671 [03:37<00:24,  2.77it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0:  90%|████████▉ | 603/671 [03:37<00:24,  2.78it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0:  90%|█████████ | 6

Epoch 0:  99%|█████████▉| 666/671 [03:47<00:01,  2.92it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0:  99%|█████████▉| 667/671 [03:47<00:01,  2.93it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0: 100%|█████████▉| 668/671 [03:48<00:01,  2.93it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0: 100%|█████████▉| 669/671 [03:48<00:00,  2.93it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0: 100%|█████████▉| 670/671 [03:48<00:00,  2.93it/s, loss=0.020, val_loss_step=0.699, train_loss_step=0.0218]
Epoch 0: 100%|██████████| 671/671 [03:48<00:00,  2.93it/s, loss=0.020, val_loss_step=0.0209, train_loss_step=0.0218, val_loss_epoch=0.0205]
Epoch 1:  89%|████████▉ | 597/671 [03:36<00:26,  2.76it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0147, val_loss_epoch=0.0205, train_loss_epoch=0.0257]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  89%|████████▉ | 598/671 [03:36<00:26,  2.76it/s, loss=0.018, val_loss_s

Epoch 1:  96%|█████████▌| 642/671 [03:43<00:10,  2.87it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0147, val_loss_epoch=0.0205, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 643/671 [03:44<00:09,  2.87it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0147, val_loss_epoch=0.0205, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 644/671 [03:44<00:09,  2.87it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0147, val_loss_epoch=0.0205, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▌| 645/671 [03:44<00:09,  2.87it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0147, val_loss_epoch=0.0205, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▋| 646/671 [03:44<00:08,  2.88it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0147, val_loss_epoch=0.0205, train_loss_epoch=0.0257]
Epoch 1:  96%|█████████▋| 647/671 [03:44<00:08,  2.88it/s, loss=0.018, val_loss_step=0.0209, train_loss_step=0.0147, val_loss_epoch=0.0205, train_loss_epoch=0.0257]
Epoch 1:  

Epoch 2:  92%|█████████▏| 616/671 [03:39<00:19,  2.80it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 617/671 [03:40<00:19,  2.80it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 618/671 [03:40<00:18,  2.81it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 619/671 [03:40<00:18,  2.81it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  92%|█████████▏| 620/671 [03:40<00:18,  2.81it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  93%|█████████▎| 621/671 [03:40<00:17,  2.81it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  

Epoch 2:  99%|█████████▉| 665/671 [03:48<00:02,  2.92it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 666/671 [03:48<00:01,  2.92it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2:  99%|█████████▉| 667/671 [03:48<00:01,  2.92it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2: 100%|█████████▉| 668/671 [03:48<00:01,  2.92it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2: 100%|█████████▉| 669/671 [03:48<00:00,  2.92it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2: 100%|█████████▉| 670/671 [03:48<00:00,  2.93it/s, loss=0.018, val_loss_step=0.0205, train_loss_step=0.0149, val_loss_epoch=0.0193, train_loss_epoch=0.0195]
Epoch 2: 1

Epoch 3:  95%|█████████▌| 639/671 [03:43<00:11,  2.86it/s, loss=0.019, val_loss_step=0.0254, train_loss_step=0.0278, val_loss_epoch=0.0246, train_loss_epoch=0.019]
Epoch 3:  95%|█████████▌| 640/671 [03:43<00:10,  2.86it/s, loss=0.019, val_loss_step=0.0254, train_loss_step=0.0278, val_loss_epoch=0.0246, train_loss_epoch=0.019]
Epoch 3:  96%|█████████▌| 641/671 [03:43<00:10,  2.86it/s, loss=0.019, val_loss_step=0.0254, train_loss_step=0.0278, val_loss_epoch=0.0246, train_loss_epoch=0.019]
Epoch 3:  96%|█████████▌| 642/671 [03:44<00:10,  2.87it/s, loss=0.019, val_loss_step=0.0254, train_loss_step=0.0278, val_loss_epoch=0.0246, train_loss_epoch=0.019]
Epoch 3:  96%|█████████▌| 643/671 [03:44<00:09,  2.87it/s, loss=0.019, val_loss_step=0.0254, train_loss_step=0.0278, val_loss_epoch=0.0246, train_loss_epoch=0.019]
Epoch 3:  96%|█████████▌| 644/671 [03:44<00:09,  2.87it/s, loss=0.019, val_loss_step=0.0254, train_loss_step=0.0278, val_loss_epoch=0.0246, train_loss_epoch=0.019]
Epoch 3:  96%|██

Epoch 4:  91%|█████████▏| 613/671 [03:38<00:20,  2.80it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  92%|█████████▏| 614/671 [03:39<00:20,  2.80it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  92%|█████████▏| 615/671 [03:39<00:19,  2.80it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  92%|█████████▏| 616/671 [03:39<00:19,  2.81it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  92%|█████████▏| 617/671 [03:39<00:19,  2.81it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  92%|█████████▏| 618/671 [03:39<00:18,  2.81it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  

Epoch 4:  99%|█████████▊| 662/671 [03:47<00:03,  2.92it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  99%|█████████▉| 663/671 [03:47<00:02,  2.92it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  99%|█████████▉| 664/671 [03:47<00:02,  2.92it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  99%|█████████▉| 665/671 [03:47<00:02,  2.92it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  99%|█████████▉| 666/671 [03:47<00:01,  2.92it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4:  99%|█████████▉| 667/671 [03:47<00:01,  2.93it/s, loss=0.018, val_loss_step=0.0198, train_loss_step=0.0175, val_loss_epoch=0.0193, train_loss_epoch=0.0186]
Epoch 4: 1

Epoch 5:  95%|█████████▍| 636/671 [03:42<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0191, train_loss_step=0.0105, val_loss_epoch=0.019, train_loss_epoch=0.0183]
Epoch 5:  95%|█████████▍| 637/671 [03:42<00:11,  2.86it/s, loss=0.017, val_loss_step=0.0191, train_loss_step=0.0105, val_loss_epoch=0.019, train_loss_epoch=0.0183]
Epoch 5:  95%|█████████▌| 638/671 [03:42<00:11,  2.87it/s, loss=0.017, val_loss_step=0.0191, train_loss_step=0.0105, val_loss_epoch=0.019, train_loss_epoch=0.0183]
Epoch 5:  95%|█████████▌| 639/671 [03:42<00:11,  2.87it/s, loss=0.017, val_loss_step=0.0191, train_loss_step=0.0105, val_loss_epoch=0.019, train_loss_epoch=0.0183]
Epoch 5:  95%|█████████▌| 640/671 [03:42<00:10,  2.87it/s, loss=0.017, val_loss_step=0.0191, train_loss_step=0.0105, val_loss_epoch=0.019, train_loss_epoch=0.0183]
Epoch 5:  96%|█████████▌| 641/671 [03:43<00:10,  2.87it/s, loss=0.017, val_loss_step=0.0191, train_loss_step=0.0105, val_loss_epoch=0.019, train_loss_epoch=0.0183]
Epoch 5:  96%|██

Epoch 6:  91%|█████████ | 610/671 [03:37<00:21,  2.80it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  91%|█████████ | 611/671 [03:37<00:21,  2.81it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  91%|█████████ | 612/671 [03:37<00:21,  2.81it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  91%|█████████▏| 613/671 [03:38<00:20,  2.81it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  92%|█████████▏| 614/671 [03:38<00:20,  2.81it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  92%|█████████▏| 615/671 [03:38<00:19,  2.82it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  

Epoch 6:  98%|█████████▊| 659/671 [03:45<00:04,  2.92it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  98%|█████████▊| 660/671 [03:45<00:03,  2.92it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  99%|█████████▊| 661/671 [03:46<00:03,  2.92it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  99%|█████████▊| 662/671 [03:46<00:03,  2.93it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  99%|█████████▉| 663/671 [03:46<00:02,  2.93it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  99%|█████████▉| 664/671 [03:46<00:02,  2.93it/s, loss=0.017, val_loss_step=0.0192, train_loss_step=0.0102, val_loss_epoch=0.0203, train_loss_epoch=0.0181]
Epoch 6:  

Epoch 7:  94%|█████████▍| 633/671 [03:41<00:13,  2.86it/s, loss=0.017, val_loss_step=0.0178, train_loss_step=0.0165, val_loss_epoch=0.0186, train_loss_epoch=0.0179]
Epoch 7:  94%|█████████▍| 634/671 [03:41<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0178, train_loss_step=0.0165, val_loss_epoch=0.0186, train_loss_epoch=0.0179]
Epoch 7:  95%|█████████▍| 635/671 [03:42<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0178, train_loss_step=0.0165, val_loss_epoch=0.0186, train_loss_epoch=0.0179]
Epoch 7:  95%|█████████▍| 636/671 [03:42<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0178, train_loss_step=0.0165, val_loss_epoch=0.0186, train_loss_epoch=0.0179]
Epoch 7:  95%|█████████▍| 637/671 [03:42<00:11,  2.86it/s, loss=0.017, val_loss_step=0.0178, train_loss_step=0.0165, val_loss_epoch=0.0186, train_loss_epoch=0.0179]
Epoch 7:  95%|█████████▌| 638/671 [03:42<00:11,  2.87it/s, loss=0.017, val_loss_step=0.0178, train_loss_step=0.0165, val_loss_epoch=0.0186, train_loss_epoch=0.0179]
Epoch 7:  

Epoch 8:  90%|█████████ | 607/671 [03:37<00:22,  2.79it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  91%|█████████ | 608/671 [03:37<00:22,  2.79it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  91%|█████████ | 609/671 [03:37<00:22,  2.80it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  91%|█████████ | 610/671 [03:37<00:21,  2.80it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  91%|█████████ | 611/671 [03:38<00:21,  2.80it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  91%|█████████ | 612/671 [03:38<00:21,  2.80it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoc

Epoch 8:  98%|█████████▊| 656/671 [03:45<00:05,  2.91it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  98%|█████████▊| 657/671 [03:45<00:04,  2.91it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  98%|█████████▊| 658/671 [03:45<00:04,  2.91it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  98%|█████████▊| 659/671 [03:46<00:04,  2.92it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  98%|█████████▊| 660/671 [03:46<00:03,  2.92it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoch 8:  99%|█████████▊| 661/671 [03:46<00:03,  2.92it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.00855, val_loss_epoch=0.0212, train_loss_epoch=0.0176]
Epoc

Epoch 9:  94%|█████████▍| 630/671 [03:41<00:14,  2.85it/s, loss=0.017, val_loss_step=0.0531, train_loss_step=0.0138, val_loss_epoch=0.0515, train_loss_epoch=0.0174]
Epoch 9:  94%|█████████▍| 631/671 [03:41<00:14,  2.85it/s, loss=0.017, val_loss_step=0.0531, train_loss_step=0.0138, val_loss_epoch=0.0515, train_loss_epoch=0.0174]
Epoch 9:  94%|█████████▍| 632/671 [03:41<00:13,  2.85it/s, loss=0.017, val_loss_step=0.0531, train_loss_step=0.0138, val_loss_epoch=0.0515, train_loss_epoch=0.0174]
Epoch 9:  94%|█████████▍| 633/671 [03:41<00:13,  2.86it/s, loss=0.017, val_loss_step=0.0531, train_loss_step=0.0138, val_loss_epoch=0.0515, train_loss_epoch=0.0174]
Epoch 9:  94%|█████████▍| 634/671 [03:41<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0531, train_loss_step=0.0138, val_loss_epoch=0.0515, train_loss_epoch=0.0174]
Epoch 9:  95%|█████████▍| 635/671 [03:41<00:12,  2.86it/s, loss=0.017, val_loss_step=0.0531, train_loss_step=0.0138, val_loss_epoch=0.0515, train_loss_epoch=0.0174]
Epoch 9:  

Epoch 10:  90%|█████████ | 604/671 [03:36<00:24,  2.79it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  90%|█████████ | 605/671 [03:36<00:23,  2.79it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  90%|█████████ | 606/671 [03:36<00:23,  2.80it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  90%|█████████ | 607/671 [03:36<00:22,  2.80it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  91%|█████████ | 608/671 [03:37<00:22,  2.80it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  91%|█████████ | 609/671 [03:37<00:22,  2.80it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  91%|█

Epoch 10:  97%|█████████▋| 653/671 [03:44<00:06,  2.91it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  97%|█████████▋| 654/671 [03:44<00:05,  2.91it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  98%|█████████▊| 655/671 [03:44<00:05,  2.91it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  98%|█████████▊| 656/671 [03:44<00:05,  2.92it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  98%|█████████▊| 657/671 [03:45<00:04,  2.92it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  98%|█████████▊| 658/671 [03:45<00:04,  2.92it/s, loss=0.017, val_loss_step=0.0308, train_loss_step=0.0238, val_loss_epoch=0.03, train_loss_epoch=0.0172]
Epoch 10:  98%|█

Epoch 11:  93%|█████████▎| 627/671 [03:40<00:15,  2.85it/s, loss=0.016, val_loss_step=0.0203, train_loss_step=0.0123, val_loss_epoch=0.0206, train_loss_epoch=0.017]
Epoch 11:  94%|█████████▎| 628/671 [03:40<00:15,  2.85it/s, loss=0.016, val_loss_step=0.0203, train_loss_step=0.0123, val_loss_epoch=0.0206, train_loss_epoch=0.017]
Epoch 11:  94%|█████████▎| 629/671 [03:40<00:14,  2.85it/s, loss=0.016, val_loss_step=0.0203, train_loss_step=0.0123, val_loss_epoch=0.0206, train_loss_epoch=0.017]
Epoch 11:  94%|█████████▍| 630/671 [03:40<00:14,  2.86it/s, loss=0.016, val_loss_step=0.0203, train_loss_step=0.0123, val_loss_epoch=0.0206, train_loss_epoch=0.017]
Epoch 11:  94%|█████████▍| 631/671 [03:40<00:13,  2.86it/s, loss=0.016, val_loss_step=0.0203, train_loss_step=0.0123, val_loss_epoch=0.0206, train_loss_epoch=0.017]
Epoch 11:  94%|█████████▍| 632/671 [03:40<00:13,  2.86it/s, loss=0.016, val_loss_step=0.0203, train_loss_step=0.0123, val_loss_epoch=0.0206, train_loss_epoch=0.017]
Epoch 11: 

Epoch 12:  90%|████████▉ | 601/671 [03:35<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  90%|████████▉ | 602/671 [03:36<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  90%|████████▉ | 603/671 [03:36<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  90%|█████████ | 604/671 [03:36<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  90%|█████████ | 605/671 [03:36<00:23,  2.79it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  90%|█████████ | 606/671 [03:36<00:23,  2.80it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167

Epoch 12:  97%|█████████▋| 650/671 [03:43<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  97%|█████████▋| 651/671 [03:44<00:06,  2.90it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  97%|█████████▋| 652/671 [03:44<00:06,  2.91it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  97%|█████████▋| 653/671 [03:44<00:06,  2.91it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  97%|█████████▋| 654/671 [03:44<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167]
Epoch 12:  98%|█████████▊| 655/671 [03:44<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0613, train_loss_step=0.00952, val_loss_epoch=0.0609, train_loss_epoch=0.0167

Epoch 13:  93%|█████████▎| 624/671 [03:39<00:16,  2.85it/s, loss=0.016, val_loss_step=0.0606, train_loss_step=0.0185, val_loss_epoch=0.0593, train_loss_epoch=0.0166]
Epoch 13:  93%|█████████▎| 625/671 [03:39<00:16,  2.85it/s, loss=0.016, val_loss_step=0.0606, train_loss_step=0.0185, val_loss_epoch=0.0593, train_loss_epoch=0.0166]
Epoch 13:  93%|█████████▎| 626/671 [03:39<00:15,  2.85it/s, loss=0.016, val_loss_step=0.0606, train_loss_step=0.0185, val_loss_epoch=0.0593, train_loss_epoch=0.0166]
Epoch 13:  93%|█████████▎| 627/671 [03:39<00:15,  2.86it/s, loss=0.016, val_loss_step=0.0606, train_loss_step=0.0185, val_loss_epoch=0.0593, train_loss_epoch=0.0166]
Epoch 13:  94%|█████████▎| 628/671 [03:39<00:15,  2.86it/s, loss=0.016, val_loss_step=0.0606, train_loss_step=0.0185, val_loss_epoch=0.0593, train_loss_epoch=0.0166]
Epoch 13:  94%|█████████▎| 629/671 [03:39<00:14,  2.86it/s, loss=0.016, val_loss_step=0.0606, train_loss_step=0.0185, val_loss_epoch=0.0593, train_loss_epoch=0.0166]
Epoc

Epoch 14:  89%|████████▉ | 598/671 [03:35<00:26,  2.78it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  89%|████████▉ | 599/671 [03:35<00:25,  2.78it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  89%|████████▉ | 600/671 [03:35<00:25,  2.78it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  90%|████████▉ | 601/671 [03:35<00:25,  2.79it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  90%|████████▉ | 602/671 [03:35<00:24,  2.79it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  90%|████████▉ | 603/671 [03:36<00:24,  2.79it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoc

Epoch 14:  96%|█████████▋| 647/671 [03:43<00:08,  2.90it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 648/671 [03:43<00:07,  2.90it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 649/671 [03:43<00:07,  2.90it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 650/671 [03:43<00:07,  2.91it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 651/671 [03:43<00:06,  2.91it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoch 14:  97%|█████████▋| 652/671 [03:44<00:06,  2.91it/s, loss=0.017, val_loss_step=0.0174, train_loss_step=0.0201, val_loss_epoch=0.0174, train_loss_epoch=0.0164]
Epoc

Epoch 15:  93%|█████████▎| 621/671 [03:38<00:17,  2.84it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0154, val_loss_epoch=0.0172, train_loss_epoch=0.0163]
Epoch 15:  93%|█████████▎| 622/671 [03:38<00:17,  2.84it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0154, val_loss_epoch=0.0172, train_loss_epoch=0.0163]
Epoch 15:  93%|█████████▎| 623/671 [03:39<00:16,  2.84it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0154, val_loss_epoch=0.0172, train_loss_epoch=0.0163]
Epoch 15:  93%|█████████▎| 624/671 [03:39<00:16,  2.85it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0154, val_loss_epoch=0.0172, train_loss_epoch=0.0163]
Epoch 15:  93%|█████████▎| 625/671 [03:39<00:16,  2.85it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0154, val_loss_epoch=0.0172, train_loss_epoch=0.0163]
Epoch 15:  93%|█████████▎| 626/671 [03:39<00:15,  2.85it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0154, val_loss_epoch=0.0172, train_loss_epoch=0.0163]
Epoc

Epoch 15: 100%|█████████▉| 670/671 [03:46<00:00,  2.95it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.0154, val_loss_epoch=0.0172, train_loss_epoch=0.0163]
Epoch 15: 100%|██████████| 671/671 [03:47<00:00,  2.95it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0154, val_loss_epoch=0.017, train_loss_epoch=0.0163] 
Epoch 16:  89%|████████▉ | 597/671 [03:34<00:26,  2.78it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162] 
Validating: 0it [00:00, ?it/s][A
Epoch 16:  89%|████████▉ | 598/671 [03:35<00:26,  2.78it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16:  89%|████████▉ | 599/671 [03:35<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16:  89%|████████▉ | 600/671 [03:35<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017

Epoch 16:  96%|█████████▌| 644/671 [03:42<00:09,  2.89it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16:  96%|█████████▌| 645/671 [03:42<00:08,  2.89it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16:  96%|█████████▋| 646/671 [03:43<00:08,  2.90it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16:  96%|█████████▋| 647/671 [03:43<00:08,  2.90it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16:  97%|█████████▋| 648/671 [03:43<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16:  97%|█████████▋| 649/671 [03:43<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0186, val_loss_epoch=0.017, train_loss_epoch=0.0162]
Epoch 16: 

Epoch 17:  92%|█████████▏| 618/671 [03:38<00:18,  2.83it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17:  92%|█████████▏| 619/671 [03:38<00:18,  2.84it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17:  92%|█████████▏| 620/671 [03:38<00:17,  2.84it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17:  93%|█████████▎| 621/671 [03:38<00:17,  2.84it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17:  93%|█████████▎| 622/671 [03:38<00:17,  2.84it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17:  93%|█████████▎| 623/671 [03:38<00:16,  2.85it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoc

Epoch 17:  99%|█████████▉| 667/671 [03:46<00:01,  2.95it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17: 100%|█████████▉| 668/671 [03:46<00:01,  2.95it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17: 100%|█████████▉| 669/671 [03:46<00:00,  2.95it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17: 100%|█████████▉| 670/671 [03:46<00:00,  2.96it/s, loss=0.017, val_loss_step=0.0166, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0161]
Epoch 17: 100%|██████████| 671/671 [03:47<00:00,  2.95it/s, loss=0.017, val_loss_step=0.0162, train_loss_step=0.0182, val_loss_epoch=0.0166, train_loss_epoch=0.0161]
Epoch 18:  89%|████████▉ | 597/671 [03:34<00:26,  2.79it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0172, val_loss_epoch=0.0166, train_loss_epoch=0.016] 
Vali

Epoch 18:  96%|█████████▌| 641/671 [03:41<00:10,  2.89it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0172, val_loss_epoch=0.0166, train_loss_epoch=0.016]
Epoch 18:  96%|█████████▌| 642/671 [03:41<00:10,  2.89it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0172, val_loss_epoch=0.0166, train_loss_epoch=0.016]
Epoch 18:  96%|█████████▌| 643/671 [03:41<00:09,  2.90it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0172, val_loss_epoch=0.0166, train_loss_epoch=0.016]
Epoch 18:  96%|█████████▌| 644/671 [03:42<00:09,  2.90it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0172, val_loss_epoch=0.0166, train_loss_epoch=0.016]
Epoch 18:  96%|█████████▌| 645/671 [03:42<00:08,  2.90it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0172, val_loss_epoch=0.0166, train_loss_epoch=0.016]
Epoch 18:  96%|█████████▋| 646/671 [03:42<00:08,  2.90it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0172, val_loss_epoch=0.0166, train_loss_epoch=0.016]
Epoch 18: 

Epoch 19:  92%|█████████▏| 615/671 [03:37<00:19,  2.83it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 616/671 [03:37<00:19,  2.83it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 617/671 [03:37<00:19,  2.83it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 618/671 [03:38<00:18,  2.83it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 619/671 [03:38<00:18,  2.84it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  92%|█████████▏| 620/671 [03:38<00:17,  2.84it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoc

Epoch 19:  99%|█████████▉| 664/671 [03:45<00:02,  2.94it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  99%|█████████▉| 665/671 [03:45<00:02,  2.95it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  99%|█████████▉| 666/671 [03:45<00:01,  2.95it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19:  99%|█████████▉| 667/671 [03:46<00:01,  2.95it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19: 100%|█████████▉| 668/671 [03:46<00:01,  2.95it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoch 19: 100%|█████████▉| 669/671 [03:46<00:00,  2.95it/s, loss=0.015, val_loss_step=0.0164, train_loss_step=0.0154, val_loss_epoch=0.0167, train_loss_epoch=0.0159]
Epoc

Epoch 20:  95%|█████████▌| 638/671 [03:41<00:11,  2.88it/s, loss=0.017, val_loss_step=0.0162, train_loss_step=0.0185, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 20:  95%|█████████▌| 639/671 [03:41<00:11,  2.89it/s, loss=0.017, val_loss_step=0.0162, train_loss_step=0.0185, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 20:  95%|█████████▌| 640/671 [03:41<00:10,  2.89it/s, loss=0.017, val_loss_step=0.0162, train_loss_step=0.0185, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 20:  96%|█████████▌| 641/671 [03:41<00:10,  2.89it/s, loss=0.017, val_loss_step=0.0162, train_loss_step=0.0185, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 20:  96%|█████████▌| 642/671 [03:41<00:10,  2.89it/s, loss=0.017, val_loss_step=0.0162, train_loss_step=0.0185, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 20:  96%|█████████▌| 643/671 [03:41<00:09,  2.90it/s, loss=0.017, val_loss_step=0.0162, train_loss_step=0.0185, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoc

Epoch 21:  91%|█████████ | 612/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  91%|█████████▏| 613/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  92%|█████████▏| 614/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  92%|█████████▏| 615/671 [03:37<00:19,  2.83it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  92%|█████████▏| 616/671 [03:37<00:19,  2.83it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  92%|█████████▏| 617/671 [03:37<00:19,  2.83it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoc

Epoch 21:  99%|█████████▊| 661/671 [03:45<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  99%|█████████▊| 662/671 [03:45<00:03,  2.94it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  99%|█████████▉| 663/671 [03:45<00:02,  2.94it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  99%|█████████▉| 664/671 [03:45<00:02,  2.94it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  99%|█████████▉| 665/671 [03:45<00:02,  2.94it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 21:  99%|█████████▉| 666/671 [03:46<00:01,  2.95it/s, loss=0.016, val_loss_step=0.0162, train_loss_step=0.0177, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoc

Epoch 22:  95%|█████████▍| 635/671 [03:40<00:12,  2.87it/s, loss=0.015, val_loss_step=0.0162, train_loss_step=0.0129, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 22:  95%|█████████▍| 636/671 [03:41<00:12,  2.88it/s, loss=0.015, val_loss_step=0.0162, train_loss_step=0.0129, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 22:  95%|█████████▍| 637/671 [03:41<00:11,  2.88it/s, loss=0.015, val_loss_step=0.0162, train_loss_step=0.0129, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 22:  95%|█████████▌| 638/671 [03:41<00:11,  2.88it/s, loss=0.015, val_loss_step=0.0162, train_loss_step=0.0129, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 22:  95%|█████████▌| 639/671 [03:41<00:11,  2.88it/s, loss=0.015, val_loss_step=0.0162, train_loss_step=0.0129, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoch 22:  95%|█████████▌| 640/671 [03:41<00:10,  2.89it/s, loss=0.015, val_loss_step=0.0162, train_loss_step=0.0129, val_loss_epoch=0.0165, train_loss_epoch=0.0159]
Epoc

Epoch 23:  91%|█████████ | 609/671 [03:36<00:22,  2.81it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████ | 610/671 [03:37<00:21,  2.81it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████ | 611/671 [03:37<00:21,  2.81it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████ | 612/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  91%|█████████▏| 613/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  92%|█████████▏| 614/671 [03:37<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoc

Epoch 23:  98%|█████████▊| 658/671 [03:44<00:04,  2.93it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  98%|█████████▊| 659/671 [03:45<00:04,  2.93it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  98%|█████████▊| 660/671 [03:45<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▊| 661/671 [03:45<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▊| 662/671 [03:45<00:03,  2.93it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 23:  99%|█████████▉| 663/671 [03:45<00:02,  2.94it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0256, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoc

Epoch 24:  94%|█████████▍| 632/671 [03:40<00:13,  2.87it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 24:  94%|█████████▍| 633/671 [03:40<00:13,  2.87it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 24:  94%|█████████▍| 634/671 [03:40<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 635/671 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 636/671 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoch 24:  95%|█████████▍| 637/671 [03:41<00:11,  2.88it/s, loss=0.016, val_loss_step=0.0164, train_loss_step=0.0192, val_loss_epoch=0.0166, train_loss_epoch=0.0159]
Epoc

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 11 M  


Initial Learning Rate: 0.001000
Validate iterations: 75
Train iterations: 595                                                 
Epoch 0:  89%|████████▉ | 595/670 [03:35<00:27,  2.76it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  89%|████████▉ | 596/670 [03:35<00:26,  2.76it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0:  89%|████████▉ | 597/670 [03:36<00:26,  2.76it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0:  89%|████████▉ | 598/670 [03:36<00:26,  2.77it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0:  89%|████████▉ | 599/670 [03:36<00:25,  2.77it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0:  90%|████████▉ | 600/670 [03:36<00:25,  2.77it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0:  90%|████████▉ | 601/670 [03:36<00:24,  2.77it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0:  90%|████████▉ | 602/670 [03:36<

Epoch 0:  99%|█████████▉| 665/670 [03:47<00:01,  2.93it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0:  99%|█████████▉| 666/670 [03:47<00:01,  2.93it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0: 100%|█████████▉| 667/670 [03:47<00:01,  2.93it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0: 100%|█████████▉| 668/670 [03:47<00:00,  2.93it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0: 100%|█████████▉| 669/670 [03:47<00:00,  2.93it/s, loss=0.021, val_loss_step=0.7, train_loss_step=0.0279]
Epoch 0: 100%|██████████| 670/670 [03:48<00:00,  2.93it/s, loss=0.021, val_loss_step=0.023, train_loss_step=0.0279, val_loss_epoch=0.0243]
Epoch 1:  89%|████████▉ | 595/670 [03:35<00:27,  2.76it/s, loss=0.018, val_loss_step=0.023, train_loss_step=0.0167, val_loss_epoch=0.0243, train_loss_epoch=0.0247]
Validating: 0it [00:00, ?it/s][A
Epoch 1:  89%|████████▉ | 596/670 [03:36<00:26,  2.76it/s, loss=0.018, val_loss_step=0.023, t

Epoch 1:  96%|█████████▌| 640/670 [03:43<00:10,  2.86it/s, loss=0.018, val_loss_step=0.023, train_loss_step=0.0167, val_loss_epoch=0.0243, train_loss_epoch=0.0247]
Epoch 1:  96%|█████████▌| 641/670 [03:43<00:10,  2.87it/s, loss=0.018, val_loss_step=0.023, train_loss_step=0.0167, val_loss_epoch=0.0243, train_loss_epoch=0.0247]
Epoch 1:  96%|█████████▌| 642/670 [03:43<00:09,  2.87it/s, loss=0.018, val_loss_step=0.023, train_loss_step=0.0167, val_loss_epoch=0.0243, train_loss_epoch=0.0247]
Epoch 1:  96%|█████████▌| 643/670 [03:43<00:09,  2.87it/s, loss=0.018, val_loss_step=0.023, train_loss_step=0.0167, val_loss_epoch=0.0243, train_loss_epoch=0.0247]
Epoch 1:  96%|█████████▌| 644/670 [03:44<00:09,  2.87it/s, loss=0.018, val_loss_step=0.023, train_loss_step=0.0167, val_loss_epoch=0.0243, train_loss_epoch=0.0247]
Epoch 1:  96%|█████████▋| 645/670 [03:44<00:08,  2.88it/s, loss=0.018, val_loss_step=0.023, train_loss_step=0.0167, val_loss_epoch=0.0243, train_loss_epoch=0.0247]
Epoch 1:  96%|██

Epoch 2:  91%|█████████▏| 613/670 [03:38<00:20,  2.80it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  92%|█████████▏| 614/670 [03:39<00:19,  2.80it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  92%|█████████▏| 615/670 [03:39<00:19,  2.80it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  92%|█████████▏| 616/670 [03:39<00:19,  2.81it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  92%|█████████▏| 617/670 [03:39<00:18,  2.81it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  92%|█████████▏| 618/670 [03:39<00:18,  2.81it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  92%|████████

Epoch 2:  99%|█████████▉| 663/670 [03:47<00:02,  2.92it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  99%|█████████▉| 664/670 [03:47<00:02,  2.92it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  99%|█████████▉| 665/670 [03:47<00:01,  2.92it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2:  99%|█████████▉| 666/670 [03:47<00:01,  2.92it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2: 100%|█████████▉| 667/670 [03:47<00:01,  2.93it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2: 100%|█████████▉| 668/670 [03:48<00:00,  2.93it/s, loss=0.019, val_loss_step=0.101, train_loss_step=0.0192, val_loss_epoch=0.103, train_loss_epoch=0.0196]
Epoch 2: 100%|████████

Epoch 3:  95%|█████████▍| 636/670 [03:42<00:11,  2.86it/s, loss=0.019, val_loss_step=0.018, train_loss_step=0.0155, val_loss_epoch=0.0197, train_loss_epoch=0.0191]
Epoch 3:  95%|█████████▌| 637/670 [03:42<00:11,  2.86it/s, loss=0.019, val_loss_step=0.018, train_loss_step=0.0155, val_loss_epoch=0.0197, train_loss_epoch=0.0191]
Epoch 3:  95%|█████████▌| 638/670 [03:42<00:11,  2.87it/s, loss=0.019, val_loss_step=0.018, train_loss_step=0.0155, val_loss_epoch=0.0197, train_loss_epoch=0.0191]
Epoch 3:  95%|█████████▌| 639/670 [03:42<00:10,  2.87it/s, loss=0.019, val_loss_step=0.018, train_loss_step=0.0155, val_loss_epoch=0.0197, train_loss_epoch=0.0191]
Epoch 3:  96%|█████████▌| 640/670 [03:42<00:10,  2.87it/s, loss=0.019, val_loss_step=0.018, train_loss_step=0.0155, val_loss_epoch=0.0197, train_loss_epoch=0.0191]
Epoch 3:  96%|█████████▌| 641/670 [03:43<00:10,  2.87it/s, loss=0.019, val_loss_step=0.018, train_loss_step=0.0155, val_loss_epoch=0.0197, train_loss_epoch=0.0191]
Epoch 3:  96%|██

Epoch 4:  91%|█████████ | 609/670 [03:38<00:21,  2.79it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  91%|█████████ | 610/670 [03:38<00:21,  2.80it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  91%|█████████ | 611/670 [03:38<00:21,  2.80it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  91%|█████████▏| 612/670 [03:38<00:20,  2.80it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  91%|█████████▏| 613/670 [03:38<00:20,  2.80it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  92%|█████████▏| 614/670 [03:38<00:19,  2.80it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  

Epoch 4:  98%|█████████▊| 658/670 [03:46<00:04,  2.91it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  98%|█████████▊| 659/670 [03:46<00:03,  2.91it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  99%|█████████▊| 660/670 [03:46<00:03,  2.91it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  99%|█████████▊| 661/670 [03:46<00:03,  2.92it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  99%|█████████▉| 662/670 [03:46<00:02,  2.92it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  99%|█████████▉| 663/670 [03:47<00:02,  2.92it/s, loss=0.018, val_loss_step=0.0185, train_loss_step=0.0146, val_loss_epoch=0.0224, train_loss_epoch=0.0187]
Epoch 4:  

Epoch 5:  94%|█████████▍| 631/670 [03:41<00:13,  2.85it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0175, val_loss_epoch=0.0202, train_loss_epoch=0.0183]
Epoch 5:  94%|█████████▍| 632/670 [03:41<00:13,  2.85it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0175, val_loss_epoch=0.0202, train_loss_epoch=0.0183]
Epoch 5:  94%|█████████▍| 633/670 [03:41<00:12,  2.86it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0175, val_loss_epoch=0.0202, train_loss_epoch=0.0183]
Epoch 5:  95%|█████████▍| 634/670 [03:41<00:12,  2.86it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0175, val_loss_epoch=0.0202, train_loss_epoch=0.0183]
Epoch 5:  95%|█████████▍| 635/670 [03:41<00:12,  2.86it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0175, val_loss_epoch=0.0202, train_loss_epoch=0.0183]
Epoch 5:  95%|█████████▍| 636/670 [03:42<00:11,  2.86it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0175, val_loss_epoch=0.0202, train_loss_epoch=0.0183]
Epoch 5:  

Epoch 6:  90%|█████████ | 604/670 [03:36<00:23,  2.79it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  90%|█████████ | 605/670 [03:36<00:23,  2.79it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  90%|█████████ | 606/670 [03:36<00:22,  2.79it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  91%|█████████ | 607/670 [03:37<00:22,  2.80it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  91%|█████████ | 608/670 [03:37<00:22,  2.80it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  91%|█████████ | 609/670 [03:37<00:21,  2.80it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  

Epoch 6:  97%|█████████▋| 653/670 [03:44<00:05,  2.91it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  98%|█████████▊| 654/670 [03:44<00:05,  2.91it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  98%|█████████▊| 655/670 [03:45<00:05,  2.91it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  98%|█████████▊| 656/670 [03:45<00:04,  2.91it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  98%|█████████▊| 657/670 [03:45<00:04,  2.92it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  98%|█████████▊| 658/670 [03:45<00:04,  2.92it/s, loss=0.018, val_loss_step=0.0178, train_loss_step=0.0212, val_loss_epoch=0.0195, train_loss_epoch=0.0179]
Epoch 6:  

Epoch 7:  93%|█████████▎| 626/670 [03:39<00:15,  2.85it/s, loss=0.017, val_loss_step=0.0197, train_loss_step=0.0169, val_loss_epoch=0.0203, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▎| 627/670 [03:40<00:15,  2.85it/s, loss=0.017, val_loss_step=0.0197, train_loss_step=0.0169, val_loss_epoch=0.0203, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▎| 628/670 [03:40<00:14,  2.85it/s, loss=0.017, val_loss_step=0.0197, train_loss_step=0.0169, val_loss_epoch=0.0203, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▍| 629/670 [03:40<00:14,  2.85it/s, loss=0.017, val_loss_step=0.0197, train_loss_step=0.0169, val_loss_epoch=0.0203, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▍| 630/670 [03:40<00:14,  2.86it/s, loss=0.017, val_loss_step=0.0197, train_loss_step=0.0169, val_loss_epoch=0.0203, train_loss_epoch=0.0176]
Epoch 7:  94%|█████████▍| 631/670 [03:40<00:13,  2.86it/s, loss=0.017, val_loss_step=0.0197, train_loss_step=0.0169, val_loss_epoch=0.0203, train_loss_epoch=0.0176]
Epoch 7:  

Epoch 8:  89%|████████▉ | 599/670 [03:35<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  90%|████████▉ | 600/670 [03:35<00:25,  2.78it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  90%|████████▉ | 601/670 [03:35<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  90%|████████▉ | 602/670 [03:35<00:24,  2.79it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  90%|█████████ | 603/670 [03:35<00:23,  2.79it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  90%|█████████ | 604/670 [03:36<00:23,  2.79it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  

Epoch 8:  97%|█████████▋| 648/670 [03:43<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  97%|█████████▋| 649/670 [03:43<00:07,  2.90it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  97%|█████████▋| 650/670 [03:43<00:06,  2.91it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  97%|█████████▋| 651/670 [03:43<00:06,  2.91it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  97%|█████████▋| 652/670 [03:44<00:06,  2.91it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  97%|█████████▋| 653/670 [03:44<00:05,  2.91it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0182, val_loss_epoch=0.0215, train_loss_epoch=0.0172]
Epoch 8:  

Epoch 9:  93%|█████████▎| 621/670 [03:38<00:17,  2.84it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.0166, val_loss_epoch=0.0208, train_loss_epoch=0.017]
Epoch 9:  93%|█████████▎| 622/670 [03:38<00:16,  2.84it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.0166, val_loss_epoch=0.0208, train_loss_epoch=0.017]
Epoch 9:  93%|█████████▎| 623/670 [03:38<00:16,  2.85it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.0166, val_loss_epoch=0.0208, train_loss_epoch=0.017]
Epoch 9:  93%|█████████▎| 624/670 [03:39<00:16,  2.85it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.0166, val_loss_epoch=0.0208, train_loss_epoch=0.017]
Epoch 9:  93%|█████████▎| 625/670 [03:39<00:15,  2.85it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.0166, val_loss_epoch=0.0208, train_loss_epoch=0.017]
Epoch 9:  93%|█████████▎| 626/670 [03:39<00:15,  2.85it/s, loss=0.018, val_loss_step=0.0212, train_loss_step=0.0166, val_loss_epoch=0.0208, train_loss_epoch=0.017]
Epoch 9:  94%|██

Epoch 9: 100%|██████████| 670/670 [03:46<00:00,  2.95it/s, loss=0.018, val_loss_step=0.0239, train_loss_step=0.0166, val_loss_epoch=0.0217, train_loss_epoch=0.017]
Epoch 10:  89%|████████▉ | 595/670 [03:33<00:26,  2.78it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168] 
Validating: 0it [00:00, ?it/s][A
Epoch 10:  89%|████████▉ | 596/670 [03:34<00:26,  2.78it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  89%|████████▉ | 597/670 [03:34<00:26,  2.79it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  89%|████████▉ | 598/670 [03:34<00:25,  2.79it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  89%|████████▉ | 599/670 [03:34<00:25,  2.79it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.02

Epoch 10:  96%|█████████▌| 643/670 [03:41<00:09,  2.90it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  96%|█████████▌| 644/670 [03:42<00:08,  2.90it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  96%|█████████▋| 645/670 [03:42<00:08,  2.90it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  96%|█████████▋| 646/670 [03:42<00:08,  2.90it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  97%|█████████▋| 647/670 [03:42<00:07,  2.91it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoch 10:  97%|█████████▋| 648/670 [03:42<00:07,  2.91it/s, loss=0.017, val_loss_step=0.0239, train_loss_step=0.0233, val_loss_epoch=0.0217, train_loss_epoch=0.0168]
Epoc

Epoch 11:  92%|█████████▏| 616/670 [03:37<00:19,  2.83it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11:  92%|█████████▏| 617/670 [03:37<00:18,  2.83it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11:  92%|█████████▏| 618/670 [03:37<00:18,  2.84it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11:  92%|█████████▏| 619/670 [03:38<00:17,  2.84it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11:  93%|█████████▎| 620/670 [03:38<00:17,  2.84it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11:  93%|█████████▎| 621/670 [03:38<00:17,  2.84it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11:  93%|█

Epoch 11:  99%|█████████▉| 665/670 [03:45<00:01,  2.95it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11:  99%|█████████▉| 666/670 [03:45<00:01,  2.95it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11: 100%|█████████▉| 667/670 [03:46<00:01,  2.95it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11: 100%|█████████▉| 668/670 [03:46<00:00,  2.95it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11: 100%|█████████▉| 669/670 [03:46<00:00,  2.96it/s, loss=0.017, val_loss_step=0.027, train_loss_step=0.017, val_loss_epoch=0.0249, train_loss_epoch=0.0167]
Epoch 11: 100%|██████████| 670/670 [03:46<00:00,  2.96it/s, loss=0.017, val_loss_step=0.0658, train_loss_step=0.017, val_loss_epoch=0.0728, train_loss_epoch=0.0167]
Epoch 12:  89%|

Epoch 12:  95%|█████████▌| 638/670 [03:41<00:11,  2.88it/s, loss=0.017, val_loss_step=0.0658, train_loss_step=0.0129, val_loss_epoch=0.0728, train_loss_epoch=0.0165]
Epoch 12:  95%|█████████▌| 639/670 [03:41<00:10,  2.89it/s, loss=0.017, val_loss_step=0.0658, train_loss_step=0.0129, val_loss_epoch=0.0728, train_loss_epoch=0.0165]
Epoch 12:  96%|█████████▌| 640/670 [03:41<00:10,  2.89it/s, loss=0.017, val_loss_step=0.0658, train_loss_step=0.0129, val_loss_epoch=0.0728, train_loss_epoch=0.0165]
Epoch 12:  96%|█████████▌| 641/670 [03:41<00:10,  2.89it/s, loss=0.017, val_loss_step=0.0658, train_loss_step=0.0129, val_loss_epoch=0.0728, train_loss_epoch=0.0165]
Epoch 12:  96%|█████████▌| 642/670 [03:41<00:09,  2.89it/s, loss=0.017, val_loss_step=0.0658, train_loss_step=0.0129, val_loss_epoch=0.0728, train_loss_epoch=0.0165]
Epoch 12:  96%|█████████▌| 643/670 [03:42<00:09,  2.90it/s, loss=0.017, val_loss_step=0.0658, train_loss_step=0.0129, val_loss_epoch=0.0728, train_loss_epoch=0.0165]
Epoc

Epoch 13:  91%|█████████ | 611/670 [03:36<00:20,  2.82it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  91%|█████████▏| 612/670 [03:36<00:20,  2.82it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  91%|█████████▏| 613/670 [03:37<00:20,  2.82it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  92%|█████████▏| 614/670 [03:37<00:19,  2.83it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  92%|█████████▏| 615/670 [03:37<00:19,  2.83it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  92%|█████████▏| 616/670 [03:37<00:19,  2.83it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoc

Epoch 13:  99%|█████████▊| 660/670 [03:44<00:03,  2.94it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  99%|█████████▊| 661/670 [03:44<00:03,  2.94it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  99%|█████████▉| 662/670 [03:45<00:02,  2.94it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  99%|█████████▉| 663/670 [03:45<00:02,  2.94it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  99%|█████████▉| 664/670 [03:45<00:02,  2.95it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoch 13:  99%|█████████▉| 665/670 [03:45<00:01,  2.95it/s, loss=0.015, val_loss_step=0.0543, train_loss_step=0.0174, val_loss_epoch=0.0542, train_loss_epoch=0.0164]
Epoc

Epoch 14:  94%|█████████▍| 633/670 [03:40<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0279, train_loss_step=0.0175, val_loss_epoch=0.0296, train_loss_epoch=0.0163]
Epoch 14:  95%|█████████▍| 634/670 [03:40<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0279, train_loss_step=0.0175, val_loss_epoch=0.0296, train_loss_epoch=0.0163]
Epoch 14:  95%|█████████▍| 635/670 [03:41<00:12,  2.87it/s, loss=0.016, val_loss_step=0.0279, train_loss_step=0.0175, val_loss_epoch=0.0296, train_loss_epoch=0.0163]
Epoch 14:  95%|█████████▍| 636/670 [03:41<00:11,  2.87it/s, loss=0.016, val_loss_step=0.0279, train_loss_step=0.0175, val_loss_epoch=0.0296, train_loss_epoch=0.0163]
Epoch 14:  95%|█████████▌| 637/670 [03:41<00:11,  2.88it/s, loss=0.016, val_loss_step=0.0279, train_loss_step=0.0175, val_loss_epoch=0.0296, train_loss_epoch=0.0163]
Epoch 14:  95%|█████████▌| 638/670 [03:41<00:11,  2.88it/s, loss=0.016, val_loss_step=0.0279, train_loss_step=0.0175, val_loss_epoch=0.0296, train_loss_epoch=0.0163]
Epoc

Epoch 15:  90%|█████████ | 606/670 [03:35<00:22,  2.81it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  91%|█████████ | 607/670 [03:35<00:22,  2.81it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  91%|█████████ | 608/670 [03:35<00:22,  2.81it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  91%|█████████ | 609/670 [03:36<00:21,  2.82it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  91%|█████████ | 610/670 [03:36<00:21,  2.82it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  91%|█████████ | 611/670 [03:36<00:20,  2.82it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15: 

Epoch 15:  98%|█████████▊| 655/670 [03:43<00:05,  2.93it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  98%|█████████▊| 656/670 [03:43<00:04,  2.93it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  98%|█████████▊| 657/670 [03:44<00:04,  2.93it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  98%|█████████▊| 658/670 [03:44<00:04,  2.93it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  98%|█████████▊| 659/670 [03:44<00:03,  2.94it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15:  99%|█████████▊| 660/670 [03:44<00:03,  2.94it/s, loss=0.016, val_loss_step=0.0501, train_loss_step=0.017, val_loss_epoch=0.0434, train_loss_epoch=0.0162]
Epoch 15: 

Epoch 16:  94%|█████████▎| 628/670 [03:38<00:14,  2.87it/s, loss=0.016, val_loss_step=0.023, train_loss_step=0.0219, val_loss_epoch=0.0255, train_loss_epoch=0.016]
Epoch 16:  94%|█████████▍| 629/670 [03:39<00:14,  2.87it/s, loss=0.016, val_loss_step=0.023, train_loss_step=0.0219, val_loss_epoch=0.0255, train_loss_epoch=0.016]
Epoch 16:  94%|█████████▍| 630/670 [03:39<00:13,  2.87it/s, loss=0.016, val_loss_step=0.023, train_loss_step=0.0219, val_loss_epoch=0.0255, train_loss_epoch=0.016]
Epoch 16:  94%|█████████▍| 631/670 [03:39<00:13,  2.88it/s, loss=0.016, val_loss_step=0.023, train_loss_step=0.0219, val_loss_epoch=0.0255, train_loss_epoch=0.016]
Epoch 16:  94%|█████████▍| 632/670 [03:39<00:13,  2.88it/s, loss=0.016, val_loss_step=0.023, train_loss_step=0.0219, val_loss_epoch=0.0255, train_loss_epoch=0.016]
Epoch 16:  94%|█████████▍| 633/670 [03:39<00:12,  2.88it/s, loss=0.016, val_loss_step=0.023, train_loss_step=0.0219, val_loss_epoch=0.0255, train_loss_epoch=0.016]
Epoch 16:  95%|█

Epoch 17:  90%|████████▉ | 601/670 [03:34<00:24,  2.80it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  90%|████████▉ | 602/670 [03:34<00:24,  2.80it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  90%|█████████ | 603/670 [03:35<00:23,  2.80it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  90%|█████████ | 604/670 [03:35<00:23,  2.81it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  90%|█████████ | 605/670 [03:35<00:23,  2.81it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  90%|█████████ | 606/670 [03:35<00:22,  2.81it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17: 

Epoch 17:  97%|█████████▋| 650/670 [03:42<00:06,  2.92it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  97%|█████████▋| 651/670 [03:42<00:06,  2.92it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  97%|█████████▋| 652/670 [03:43<00:06,  2.92it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  97%|█████████▋| 653/670 [03:43<00:05,  2.92it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  98%|█████████▊| 654/670 [03:43<00:05,  2.93it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17:  98%|█████████▊| 655/670 [03:43<00:05,  2.93it/s, loss=0.016, val_loss_step=0.0224, train_loss_step=0.0172, val_loss_epoch=0.023, train_loss_epoch=0.0159]
Epoch 17: 

Epoch 18:  93%|█████████▎| 623/670 [03:38<00:16,  2.85it/s, loss=0.016, val_loss_step=0.0171, train_loss_step=0.0155, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 18:  93%|█████████▎| 624/670 [03:38<00:16,  2.85it/s, loss=0.016, val_loss_step=0.0171, train_loss_step=0.0155, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 18:  93%|█████████▎| 625/670 [03:38<00:15,  2.86it/s, loss=0.016, val_loss_step=0.0171, train_loss_step=0.0155, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 18:  93%|█████████▎| 626/670 [03:38<00:15,  2.86it/s, loss=0.016, val_loss_step=0.0171, train_loss_step=0.0155, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 18:  94%|█████████▎| 627/670 [03:39<00:15,  2.86it/s, loss=0.016, val_loss_step=0.0171, train_loss_step=0.0155, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 18:  94%|█████████▎| 628/670 [03:39<00:14,  2.86it/s, loss=0.016, val_loss_step=0.0171, train_loss_step=0.0155, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 18: 

Epoch 19:  89%|████████▉ | 596/670 [03:33<00:26,  2.79it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  89%|████████▉ | 597/670 [03:34<00:26,  2.79it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  89%|████████▉ | 598/670 [03:34<00:25,  2.79it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  89%|████████▉ | 599/670 [03:34<00:25,  2.79it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  90%|████████▉ | 600/670 [03:34<00:25,  2.80it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  90%|████████▉ | 601/670 [03:34<00:24,  2.80it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  90%|█

Epoch 19:  96%|█████████▋| 645/670 [03:42<00:08,  2.90it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  96%|█████████▋| 646/670 [03:42<00:08,  2.91it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  97%|█████████▋| 647/670 [03:42<00:07,  2.91it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  97%|█████████▋| 648/670 [03:42<00:07,  2.91it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  97%|█████████▋| 649/670 [03:42<00:07,  2.91it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  97%|█████████▋| 650/670 [03:42<00:06,  2.92it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.017, val_loss_epoch=0.017, train_loss_epoch=0.0158]
Epoch 19:  97%|█

Epoch 20:  92%|█████████▏| 618/670 [03:37<00:18,  2.84it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20:  92%|█████████▏| 619/670 [03:37<00:17,  2.84it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20:  93%|█████████▎| 620/670 [03:37<00:17,  2.84it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20:  93%|█████████▎| 621/670 [03:38<00:17,  2.85it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20:  93%|█████████▎| 622/670 [03:38<00:16,  2.85it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20:  93%|█████████▎| 623/670 [03:38<00:16,  2.85it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20: 

Epoch 20: 100%|█████████▉| 667/670 [03:45<00:01,  2.96it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20: 100%|█████████▉| 668/670 [03:45<00:00,  2.96it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20: 100%|█████████▉| 669/670 [03:45<00:00,  2.96it/s, loss=0.015, val_loss_step=0.017, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 20: 100%|██████████| 670/670 [03:46<00:00,  2.96it/s, loss=0.015, val_loss_step=0.0169, train_loss_step=0.0129, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 21:  89%|████████▉ | 595/670 [03:33<00:26,  2.78it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166, train_loss_epoch=0.0157]  
Validating: 0it [00:00, ?it/s][A
Epoch 21:  89%|████████▉ | 596/670 [03:34<00:26,  2.78it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166

Epoch 21:  96%|█████████▌| 640/670 [03:41<00:10,  2.89it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 21:  96%|█████████▌| 641/670 [03:41<00:10,  2.89it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 21:  96%|█████████▌| 642/670 [03:41<00:09,  2.89it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 21:  96%|█████████▌| 643/670 [03:41<00:09,  2.90it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 21:  96%|█████████▌| 644/670 [03:42<00:08,  2.90it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 21:  96%|█████████▋| 645/670 [03:42<00:08,  2.90it/s, loss=0.016, val_loss_step=0.0169, train_loss_step=0.014, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 21: 

Epoch 22:  91%|█████████▏| 613/670 [03:36<00:20,  2.83it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  92%|█████████▏| 614/670 [03:36<00:19,  2.83it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  92%|█████████▏| 615/670 [03:36<00:19,  2.83it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  92%|█████████▏| 616/670 [03:37<00:19,  2.84it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  92%|█████████▏| 617/670 [03:37<00:18,  2.84it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  92%|█████████▏| 618/670 [03:37<00:18,  2.84it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoc

Epoch 22:  99%|█████████▉| 662/670 [03:44<00:02,  2.95it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  99%|█████████▉| 663/670 [03:44<00:02,  2.95it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  99%|█████████▉| 664/670 [03:45<00:02,  2.95it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  99%|█████████▉| 665/670 [03:45<00:01,  2.95it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22:  99%|█████████▉| 666/670 [03:45<00:01,  2.96it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoch 22: 100%|█████████▉| 667/670 [03:45<00:01,  2.96it/s, loss=0.016, val_loss_step=0.0168, train_loss_step=0.0123, val_loss_epoch=0.0166, train_loss_epoch=0.0157]
Epoc

Epoch 23:  95%|█████████▍| 635/670 [03:40<00:12,  2.89it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 23:  95%|█████████▍| 636/670 [03:40<00:11,  2.89it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 23:  95%|█████████▌| 637/670 [03:40<00:11,  2.89it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 23:  95%|█████████▌| 638/670 [03:40<00:11,  2.89it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 23:  95%|█████████▌| 639/670 [03:40<00:10,  2.90it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoch 23:  96%|█████████▌| 640/670 [03:40<00:10,  2.90it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.0182, val_loss_epoch=0.0169, train_loss_epoch=0.0158]
Epoc

Epoch 24:  91%|█████████ | 608/670 [03:35<00:22,  2.82it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  91%|█████████ | 609/670 [03:35<00:21,  2.82it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  91%|█████████ | 610/670 [03:36<00:21,  2.82it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  91%|█████████ | 611/670 [03:36<00:20,  2.83it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  91%|█████████▏| 612/670 [03:36<00:20,  2.83it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  91%|█████████▏| 613/670 [03:36<00:20,  2.83it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  92%|█

Epoch 24:  98%|█████████▊| 657/670 [03:43<00:04,  2.94it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  98%|█████████▊| 658/670 [03:43<00:04,  2.94it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  98%|█████████▊| 659/670 [03:44<00:03,  2.94it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  99%|█████████▊| 660/670 [03:44<00:03,  2.94it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  99%|█████████▊| 661/670 [03:44<00:03,  2.94it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  99%|█████████▉| 662/670 [03:44<00:02,  2.95it/s, loss=0.016, val_loss_step=0.019, train_loss_step=0.014, val_loss_epoch=0.0184, train_loss_epoch=0.0158]
Epoch 24:  99%|█

In [36]:
%%time
#b0
score= Exec(param_space)
print("score: " + str(score))

~~~~~~~~~~~~~~~~~~~~ SEED 42 ~~~~~~~~~~~~~~~~~~~~


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 4 M   


Initial Learning Rate: 0.001000
Validate iterations: 38
Train iterations: 149                                                 
Epoch 0:  80%|███████▉  | 149/187 [00:59<00:15,  2.50it/s, loss=0.020, val_loss_step=0.688, train_loss_step=0.0206]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  80%|████████  | 150/187 [01:00<00:14,  2.49it/s, loss=0.020, val_loss_step=0.688, train_loss_step=0.0206]
Epoch 0:  81%|████████▏ | 152/187 [01:00<00:13,  2.51it/s, loss=0.020, val_loss_step=0.688, train_loss_step=0.0206]
Validating:  11%|█         | 4/38 [00:01<00:11,  3.07it/s][A
Epoch 0:  82%|████████▏ | 154/187 [01:00<00:13,  2.53it/s, loss=0.020, val_loss_step=0.688, train_loss_step=0.0206]
Epoch 0:  83%|████████▎ | 156/187 [01:01<00:12,  2.54it/s, loss=0.020, val_loss_step=0.688, train_loss_step=0.0206]
Validating:  21%|██        | 8/38 [00:01<00:06,  4.58it/s][A
Epoch 0:  84%|████████▍ | 158/187 [01:01<00:11,  2.56it/s, loss=0.020, val_loss_step=0.688, train_loss_step=0.0206]
Validating:  26%|██

Epoch 2:  83%|████████▎ | 156/187 [01:01<00:12,  2.53it/s, loss=0.018, val_loss_step=0.0455, train_loss_step=0.0198, val_loss_epoch=0.0485, train_loss_epoch=0.0191]
Validating:  21%|██        | 8/38 [00:01<00:07,  4.26it/s][A
Epoch 2:  84%|████████▍ | 158/187 [01:01<00:11,  2.55it/s, loss=0.018, val_loss_step=0.0455, train_loss_step=0.0198, val_loss_epoch=0.0485, train_loss_epoch=0.0191]
Epoch 2:  86%|████████▌ | 160/187 [01:02<00:10,  2.57it/s, loss=0.018, val_loss_step=0.0455, train_loss_step=0.0198, val_loss_epoch=0.0485, train_loss_epoch=0.0191]
Epoch 2:  87%|████████▋ | 162/187 [01:02<00:09,  2.58it/s, loss=0.018, val_loss_step=0.0455, train_loss_step=0.0198, val_loss_epoch=0.0485, train_loss_epoch=0.0191]
Epoch 2:  88%|████████▊ | 164/187 [01:03<00:08,  2.60it/s, loss=0.018, val_loss_step=0.0455, train_loss_step=0.0198, val_loss_epoch=0.0485, train_loss_epoch=0.0191]
Epoch 2:  89%|████████▉ | 166/187 [01:03<00:08,  2.62it/s, loss=0.018, val_loss_step=0.0455, train_loss_step=0.01

Epoch 4:  89%|████████▉ | 166/187 [01:03<00:08,  2.61it/s, loss=0.017, val_loss_step=0.0436, train_loss_step=0.017, val_loss_epoch=0.0465, train_loss_epoch=0.0172]
Epoch 4:  90%|████████▉ | 168/187 [01:03<00:07,  2.63it/s, loss=0.017, val_loss_step=0.0436, train_loss_step=0.017, val_loss_epoch=0.0465, train_loss_epoch=0.0172]
Epoch 4:  91%|█████████ | 170/187 [01:04<00:06,  2.64it/s, loss=0.017, val_loss_step=0.0436, train_loss_step=0.017, val_loss_epoch=0.0465, train_loss_epoch=0.0172]
Epoch 4:  92%|█████████▏| 172/187 [01:04<00:05,  2.66it/s, loss=0.017, val_loss_step=0.0436, train_loss_step=0.017, val_loss_epoch=0.0465, train_loss_epoch=0.0172]
Epoch 4:  93%|█████████▎| 174/187 [01:05<00:04,  2.68it/s, loss=0.017, val_loss_step=0.0436, train_loss_step=0.017, val_loss_epoch=0.0465, train_loss_epoch=0.0172]
Epoch 4:  94%|█████████▍| 176/187 [01:05<00:04,  2.69it/s, loss=0.017, val_loss_step=0.0436, train_loss_step=0.017, val_loss_epoch=0.0465, train_loss_epoch=0.0172]
Epoch 4:  95%|██

Epoch 6:  90%|████████▉ | 168/187 [01:03<00:07,  2.64it/s, loss=0.016, val_loss_step=0.0985, train_loss_step=0.0204, val_loss_epoch=0.105, train_loss_epoch=0.0167]
Epoch 6:  91%|█████████ | 170/187 [01:04<00:06,  2.65it/s, loss=0.016, val_loss_step=0.0985, train_loss_step=0.0204, val_loss_epoch=0.105, train_loss_epoch=0.0167]
Epoch 6:  92%|█████████▏| 172/187 [01:04<00:05,  2.67it/s, loss=0.016, val_loss_step=0.0985, train_loss_step=0.0204, val_loss_epoch=0.105, train_loss_epoch=0.0167]
Epoch 6:  93%|█████████▎| 174/187 [01:04<00:04,  2.69it/s, loss=0.016, val_loss_step=0.0985, train_loss_step=0.0204, val_loss_epoch=0.105, train_loss_epoch=0.0167]
Epoch 6:  94%|█████████▍| 176/187 [01:05<00:04,  2.70it/s, loss=0.016, val_loss_step=0.0985, train_loss_step=0.0204, val_loss_epoch=0.105, train_loss_epoch=0.0167]
Epoch 6:  95%|█████████▌| 178/187 [01:05<00:03,  2.72it/s, loss=0.016, val_loss_step=0.0985, train_loss_step=0.0204, val_loss_epoch=0.105, train_loss_epoch=0.0167]
Epoch 6:  96%|██

Epoch 8:  94%|█████████▍| 176/187 [01:05<00:04,  2.70it/s, loss=0.016, val_loss_step=0.021, train_loss_step=0.0135, val_loss_epoch=0.0233, train_loss_epoch=0.0162]
Validating:  74%|███████▎  | 28/38 [00:05<00:01,  5.67it/s][A
Epoch 8:  95%|█████████▌| 178/187 [01:05<00:03,  2.71it/s, loss=0.016, val_loss_step=0.021, train_loss_step=0.0135, val_loss_epoch=0.0233, train_loss_epoch=0.0162]
Validating:  79%|███████▉  | 30/38 [00:05<00:01,  5.67it/s][A
Epoch 8:  96%|█████████▋| 180/187 [01:05<00:02,  2.73it/s, loss=0.016, val_loss_step=0.021, train_loss_step=0.0135, val_loss_epoch=0.0233, train_loss_epoch=0.0162]
Validating:  84%|████████▍ | 32/38 [00:06<00:01,  5.70it/s][A
Epoch 8:  97%|█████████▋| 182/187 [01:06<00:01,  2.74it/s, loss=0.016, val_loss_step=0.021, train_loss_step=0.0135, val_loss_epoch=0.0233, train_loss_epoch=0.0162]
Validating:  89%|████████▉ | 34/38 [00:06<00:00,  5.68it/s][A
Epoch 8:  98%|█████████▊| 184/187 [01:06<00:01,  2.76it/s, loss=0.016, val_loss_step=0.021, 

Epoch 10:  97%|█████████▋| 182/187 [01:06<00:01,  2.72it/s, loss=0.015, val_loss_step=0.0301, train_loss_step=0.0139, val_loss_epoch=0.0318, train_loss_epoch=0.0158]
Epoch 10:  98%|█████████▊| 184/187 [01:07<00:01,  2.74it/s, loss=0.015, val_loss_step=0.0301, train_loss_step=0.0139, val_loss_epoch=0.0318, train_loss_epoch=0.0158]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  6.14it/s][A
Epoch 10: 100%|██████████| 187/187 [01:07<00:00,  2.76it/s, loss=0.015, val_loss_step=0.0163, train_loss_step=0.0139, val_loss_epoch=0.0191, train_loss_epoch=0.0158]
Epoch 11:  80%|███████▉  | 149/187 [01:00<00:15,  2.48it/s, loss=0.015, val_loss_step=0.0163, train_loss_step=0.0182, val_loss_epoch=0.0191, train_loss_epoch=0.0156]
Validating: 0it [00:00, ?it/s][A
Epoch 11:  80%|████████  | 150/187 [01:00<00:14,  2.48it/s, loss=0.015, val_loss_step=0.0163, train_loss_step=0.0182, val_loss_epoch=0.0191, train_loss_epoch=0.0156]
Epoch 11:  81%|████████▏ | 152/187 [01:00<00:14,  2.49it/s, loss=0.015, v

Epoch 13:  80%|████████  | 150/187 [01:01<00:15,  2.46it/s, loss=0.014, val_loss_step=0.0153, train_loss_step=0.0138, val_loss_epoch=0.019, train_loss_epoch=0.0152]
Validating:   5%|▌         | 2/38 [00:00<00:13,  2.64it/s][A
Epoch 13:  81%|████████▏ | 152/187 [01:01<00:14,  2.48it/s, loss=0.014, val_loss_step=0.0153, train_loss_step=0.0138, val_loss_epoch=0.019, train_loss_epoch=0.0152]
Epoch 13:  82%|████████▏ | 154/187 [01:01<00:13,  2.49it/s, loss=0.014, val_loss_step=0.0153, train_loss_step=0.0138, val_loss_epoch=0.019, train_loss_epoch=0.0152]
Epoch 13:  83%|████████▎ | 156/187 [01:02<00:12,  2.51it/s, loss=0.014, val_loss_step=0.0153, train_loss_step=0.0138, val_loss_epoch=0.019, train_loss_epoch=0.0152]
Epoch 13:  84%|████████▍ | 158/187 [01:02<00:11,  2.53it/s, loss=0.014, val_loss_step=0.0153, train_loss_step=0.0138, val_loss_epoch=0.019, train_loss_epoch=0.0152]
Epoch 13:  86%|████████▌ | 160/187 [01:02<00:10,  2.55it/s, loss=0.014, val_loss_step=0.0153, train_loss_step=0.0

Epoch 15:  86%|████████▌ | 160/187 [01:02<00:10,  2.56it/s, loss=0.015, val_loss_step=0.0155, train_loss_step=0.017, val_loss_epoch=0.0181, train_loss_epoch=0.0148]
Epoch 15:  87%|████████▋ | 162/187 [01:02<00:09,  2.58it/s, loss=0.015, val_loss_step=0.0155, train_loss_step=0.017, val_loss_epoch=0.0181, train_loss_epoch=0.0148]
Epoch 15:  88%|████████▊ | 164/187 [01:03<00:08,  2.60it/s, loss=0.015, val_loss_step=0.0155, train_loss_step=0.017, val_loss_epoch=0.0181, train_loss_epoch=0.0148]
Epoch 15:  89%|████████▉ | 166/187 [01:03<00:08,  2.61it/s, loss=0.015, val_loss_step=0.0155, train_loss_step=0.017, val_loss_epoch=0.0181, train_loss_epoch=0.0148]
Epoch 15:  90%|████████▉ | 168/187 [01:03<00:07,  2.63it/s, loss=0.015, val_loss_step=0.0155, train_loss_step=0.017, val_loss_epoch=0.0181, train_loss_epoch=0.0148]
Epoch 15:  91%|█████████ | 170/187 [01:04<00:06,  2.64it/s, loss=0.015, val_loss_step=0.0155, train_loss_step=0.017, val_loss_epoch=0.0181, train_loss_epoch=0.0148]
Epoch 15: 

Epoch 17:  91%|█████████ | 170/187 [01:04<00:06,  2.64it/s, loss=0.015, val_loss_step=0.0307, train_loss_step=0.0129, val_loss_epoch=0.0324, train_loss_epoch=0.0144]
Epoch 17:  92%|█████████▏| 172/187 [01:04<00:05,  2.66it/s, loss=0.015, val_loss_step=0.0307, train_loss_step=0.0129, val_loss_epoch=0.0324, train_loss_epoch=0.0144]
Epoch 17:  93%|█████████▎| 174/187 [01:05<00:04,  2.68it/s, loss=0.015, val_loss_step=0.0307, train_loss_step=0.0129, val_loss_epoch=0.0324, train_loss_epoch=0.0144]
Epoch 17:  94%|█████████▍| 176/187 [01:05<00:04,  2.69it/s, loss=0.015, val_loss_step=0.0307, train_loss_step=0.0129, val_loss_epoch=0.0324, train_loss_epoch=0.0144]
Epoch 17:  95%|█████████▌| 178/187 [01:05<00:03,  2.71it/s, loss=0.015, val_loss_step=0.0307, train_loss_step=0.0129, val_loss_epoch=0.0324, train_loss_epoch=0.0144]
Epoch 17:  96%|█████████▋| 180/187 [01:06<00:02,  2.72it/s, loss=0.015, val_loss_step=0.0307, train_loss_step=0.0129, val_loss_epoch=0.0324, train_loss_epoch=0.0144]
Epoc

Epoch 19:  95%|█████████▌| 178/187 [01:05<00:03,  2.70it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.014]
Epoch 19:  96%|█████████▋| 180/187 [01:06<00:02,  2.72it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.014]
Validating:  84%|████████▍ | 32/38 [00:06<00:00,  6.14it/s][A
Epoch 19:  97%|█████████▋| 182/187 [01:06<00:01,  2.73it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.014]
Validating:  89%|████████▉ | 34/38 [00:06<00:00,  5.83it/s][A
Epoch 19:  98%|█████████▊| 184/187 [01:06<00:01,  2.75it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.014]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  5.91it/s][A
Epoch 19:  99%|█████████▉| 186/187 [01:07<00:00,  2.76it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoc

Epoch 21:  98%|█████████▊| 184/187 [01:06<00:01,  2.76it/s, loss=0.014, val_loss_step=0.0128, train_loss_step=0.0107, val_loss_epoch=0.0166, train_loss_epoch=0.0139]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  6.07it/s][A
Epoch 21: 100%|██████████| 187/187 [01:07<00:00,  2.78it/s, loss=0.014, val_loss_step=0.0129, train_loss_step=0.0107, val_loss_epoch=0.0166, train_loss_epoch=0.0139]
Epoch 22:  80%|███████▉  | 149/187 [01:00<00:15,  2.48it/s, loss=0.014, val_loss_step=0.0129, train_loss_step=0.0141, val_loss_epoch=0.0166, train_loss_epoch=0.0139]
Validating: 0it [00:00, ?it/s][A
Epoch 22:  80%|████████  | 150/187 [01:00<00:14,  2.47it/s, loss=0.014, val_loss_step=0.0129, train_loss_step=0.0141, val_loss_epoch=0.0166, train_loss_epoch=0.0139]
Epoch 22:  81%|████████▏ | 152/187 [01:00<00:14,  2.49it/s, loss=0.014, val_loss_step=0.0129, train_loss_step=0.0141, val_loss_epoch=0.0166, train_loss_epoch=0.0139]
Validating:  11%|█         | 4/38 [00:00<00:10,  3.20it/s][A
Epoch 22:  

Epoch 23:  99%|█████████▉| 186/187 [01:07<00:00,  2.76it/s, loss=0.014, val_loss_step=0.0129, train_loss_step=0.0108, val_loss_epoch=0.0167, train_loss_epoch=0.0139]
Epoch 23: 100%|██████████| 187/187 [01:07<00:00,  2.77it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0108, val_loss_epoch=0.0167, train_loss_epoch=0.0139] 
Epoch 24:  80%|███████▉  | 149/187 [01:00<00:15,  2.48it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0157, val_loss_epoch=0.0167, train_loss_epoch=0.014] 
Validating: 0it [00:00, ?it/s][A
Epoch 24:  80%|████████  | 150/187 [01:00<00:14,  2.47it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0157, val_loss_epoch=0.0167, train_loss_epoch=0.014]
Epoch 24:  81%|████████▏ | 152/187 [01:00<00:14,  2.49it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0157, val_loss_epoch=0.0167, train_loss_epoch=0.014]
Epoch 24:  82%|████████▏ | 154/187 [01:01<00:13,  2.51it/s, loss=0.014, val_loss_step=0.013, train_loss_step=0.0157, val_loss_epoch=0.0167, t

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 4 M   


Initial Learning Rate: 0.001000
Validate iterations: 38
Train iterations: 149                                                 
Epoch 0:  80%|███████▉  | 149/187 [00:59<00:15,  2.49it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0214]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  80%|████████  | 150/187 [01:00<00:14,  2.49it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0214]
Epoch 0:  81%|████████▏ | 152/187 [01:00<00:13,  2.50it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0214]
Epoch 0:  82%|████████▏ | 154/187 [01:01<00:13,  2.52it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0214]
Epoch 0:  83%|████████▎ | 156/187 [01:01<00:12,  2.54it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0214]
Epoch 0:  84%|████████▍ | 158/187 [01:01<00:11,  2.56it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0214]
Validating:  26%|██▋       | 10/38 [00:01<00:05,  4.98it/s][A
Epoch 0:  86%|████████▌ | 160/187 [01:02<00:10,  2.58it/s, loss=0.020, val_loss_step=0

Epoch 2:  86%|████████▌ | 160/187 [01:02<00:10,  2.58it/s, loss=0.018, val_loss_step=0.0276, train_loss_step=0.0165, val_loss_epoch=0.0307, train_loss_epoch=0.0194]
Epoch 2:  87%|████████▋ | 162/187 [01:02<00:09,  2.60it/s, loss=0.018, val_loss_step=0.0276, train_loss_step=0.0165, val_loss_epoch=0.0307, train_loss_epoch=0.0194]
Validating:  37%|███▋      | 14/38 [00:02<00:03,  6.05it/s][A
Epoch 2:  88%|████████▊ | 164/187 [01:02<00:08,  2.61it/s, loss=0.018, val_loss_step=0.0276, train_loss_step=0.0165, val_loss_epoch=0.0307, train_loss_epoch=0.0194]
Epoch 2:  89%|████████▉ | 166/187 [01:03<00:07,  2.63it/s, loss=0.018, val_loss_step=0.0276, train_loss_step=0.0165, val_loss_epoch=0.0307, train_loss_epoch=0.0194]
Validating:  47%|████▋     | 18/38 [00:03<00:03,  6.25it/s][A
Epoch 2:  90%|████████▉ | 168/187 [01:03<00:07,  2.65it/s, loss=0.018, val_loss_step=0.0276, train_loss_step=0.0165, val_loss_epoch=0.0307, train_loss_epoch=0.0194]
Epoch 2:  91%|█████████ | 170/187 [01:03<00:06,  

Epoch 4:  90%|████████▉ | 168/187 [01:03<00:07,  2.64it/s, loss=0.017, val_loss_step=0.079, train_loss_step=0.0166, val_loss_epoch=0.0789, train_loss_epoch=0.0176]
Epoch 4:  91%|█████████ | 170/187 [01:03<00:06,  2.66it/s, loss=0.017, val_loss_step=0.079, train_loss_step=0.0166, val_loss_epoch=0.0789, train_loss_epoch=0.0176]
Epoch 4:  92%|█████████▏| 172/187 [01:04<00:05,  2.68it/s, loss=0.017, val_loss_step=0.079, train_loss_step=0.0166, val_loss_epoch=0.0789, train_loss_epoch=0.0176]
Validating:  63%|██████▎   | 24/38 [00:04<00:02,  6.00it/s][A
Epoch 4:  93%|█████████▎| 174/187 [01:04<00:04,  2.69it/s, loss=0.017, val_loss_step=0.079, train_loss_step=0.0166, val_loss_epoch=0.0789, train_loss_epoch=0.0176]
Validating:  68%|██████▊   | 26/38 [00:04<00:01,  6.02it/s][A
Epoch 4:  94%|█████████▍| 176/187 [01:04<00:04,  2.71it/s, loss=0.017, val_loss_step=0.079, train_loss_step=0.0166, val_loss_epoch=0.0789, train_loss_epoch=0.0176]
Validating:  74%|███████▎  | 28/38 [00:05<00:01,  6.01

Epoch 6:  94%|█████████▍| 176/187 [01:04<00:04,  2.71it/s, loss=0.016, val_loss_step=0.0418, train_loss_step=0.0193, val_loss_epoch=0.0435, train_loss_epoch=0.0167]
Validating:  74%|███████▎  | 28/38 [00:05<00:01,  6.49it/s][A
Epoch 6:  95%|█████████▌| 178/187 [01:05<00:03,  2.73it/s, loss=0.016, val_loss_step=0.0418, train_loss_step=0.0193, val_loss_epoch=0.0435, train_loss_epoch=0.0167]
Epoch 6:  96%|█████████▋| 180/187 [01:05<00:02,  2.74it/s, loss=0.016, val_loss_step=0.0418, train_loss_step=0.0193, val_loss_epoch=0.0435, train_loss_epoch=0.0167]
Epoch 6:  97%|█████████▋| 182/187 [01:06<00:01,  2.76it/s, loss=0.016, val_loss_step=0.0418, train_loss_step=0.0193, val_loss_epoch=0.0435, train_loss_epoch=0.0167]
Epoch 6:  98%|█████████▊| 184/187 [01:06<00:01,  2.77it/s, loss=0.016, val_loss_step=0.0418, train_loss_step=0.0193, val_loss_epoch=0.0435, train_loss_epoch=0.0167]
Epoch 6: 100%|██████████| 187/187 [01:06<00:00,  2.80it/s, loss=0.016, val_loss_step=0.0531, train_loss_step=0.0

Epoch 8:  96%|█████████▋| 180/187 [01:05<00:02,  2.75it/s, loss=0.016, val_loss_step=0.0238, train_loss_step=0.016, val_loss_epoch=0.0254, train_loss_epoch=0.0163]
Epoch 8:  97%|█████████▋| 182/187 [01:05<00:01,  2.76it/s, loss=0.016, val_loss_step=0.0238, train_loss_step=0.016, val_loss_epoch=0.0254, train_loss_epoch=0.0163]
Epoch 8:  98%|█████████▊| 184/187 [01:06<00:01,  2.78it/s, loss=0.016, val_loss_step=0.0238, train_loss_step=0.016, val_loss_epoch=0.0254, train_loss_epoch=0.0163]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  6.53it/s][A
Epoch 8: 100%|██████████| 187/187 [01:06<00:00,  2.80it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.016, val_loss_epoch=0.0194, train_loss_epoch=0.0163]
Epoch 9:  80%|███████▉  | 149/187 [00:59<00:15,  2.49it/s, loss=0.016, val_loss_step=0.0177, train_loss_step=0.0148, val_loss_epoch=0.0194, train_loss_epoch=0.0161]
Validating: 0it [00:00, ?it/s][A
Epoch 9:  80%|████████  | 150/187 [01:00<00:14,  2.48it/s, loss=0.016, val_loss_st

Epoch 11:  80%|███████▉  | 149/187 [01:00<00:15,  2.48it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.0158, val_loss_epoch=0.0191, train_loss_epoch=0.0157]
Validating: 0it [00:00, ?it/s][A
Epoch 11:  80%|████████  | 150/187 [01:00<00:14,  2.48it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.0158, val_loss_epoch=0.0191, train_loss_epoch=0.0157]
Epoch 11:  81%|████████▏ | 152/187 [01:00<00:14,  2.50it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.0158, val_loss_epoch=0.0191, train_loss_epoch=0.0157]
Epoch 11:  82%|████████▏ | 154/187 [01:01<00:13,  2.51it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.0158, val_loss_epoch=0.0191, train_loss_epoch=0.0157]
Epoch 11:  83%|████████▎ | 156/187 [01:01<00:12,  2.53it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.0158, val_loss_epoch=0.0191, train_loss_epoch=0.0157]
Epoch 11:  84%|████████▍ | 158/187 [01:01<00:11,  2.55it/s, loss=0.015, val_loss_step=0.0173, train_loss_step=0.0158, val_loss_epoch=0.0

Epoch 13:  82%|████████▏ | 154/187 [01:00<00:13,  2.53it/s, loss=0.015, val_loss_step=0.0141, train_loss_step=0.0175, val_loss_epoch=0.0172, train_loss_epoch=0.0153]
Validating:  16%|█▌        | 6/38 [00:01<00:08,  3.94it/s][A
Epoch 13:  83%|████████▎ | 156/187 [01:01<00:12,  2.55it/s, loss=0.015, val_loss_step=0.0141, train_loss_step=0.0175, val_loss_epoch=0.0172, train_loss_epoch=0.0153]
Epoch 13:  84%|████████▍ | 158/187 [01:01<00:11,  2.57it/s, loss=0.015, val_loss_step=0.0141, train_loss_step=0.0175, val_loss_epoch=0.0172, train_loss_epoch=0.0153]
Validating:  26%|██▋       | 10/38 [00:01<00:05,  5.16it/s][A
Epoch 13:  86%|████████▌ | 160/187 [01:01<00:10,  2.58it/s, loss=0.015, val_loss_step=0.0141, train_loss_step=0.0175, val_loss_epoch=0.0172, train_loss_epoch=0.0153]
Validating:  32%|███▏      | 12/38 [00:02<00:04,  5.69it/s][A
Epoch 13:  87%|████████▋ | 162/187 [01:02<00:09,  2.60it/s, loss=0.015, val_loss_step=0.0141, train_loss_step=0.0175, val_loss_epoch=0.0172, train_l

Epoch 15:  81%|████████▏ | 152/187 [01:00<00:14,  2.50it/s, loss=0.015, val_loss_step=0.0147, train_loss_step=0.0149, val_loss_epoch=0.0198, train_loss_epoch=0.0149]
Validating:  11%|█         | 4/38 [00:00<00:10,  3.37it/s][A
Epoch 15:  82%|████████▏ | 154/187 [01:01<00:13,  2.52it/s, loss=0.015, val_loss_step=0.0147, train_loss_step=0.0149, val_loss_epoch=0.0198, train_loss_epoch=0.0149]
Epoch 15:  83%|████████▎ | 156/187 [01:01<00:12,  2.54it/s, loss=0.015, val_loss_step=0.0147, train_loss_step=0.0149, val_loss_epoch=0.0198, train_loss_epoch=0.0149]
Epoch 15:  84%|████████▍ | 158/187 [01:01<00:11,  2.55it/s, loss=0.015, val_loss_step=0.0147, train_loss_step=0.0149, val_loss_epoch=0.0198, train_loss_epoch=0.0149]
Epoch 15:  86%|████████▌ | 160/187 [01:02<00:10,  2.57it/s, loss=0.015, val_loss_step=0.0147, train_loss_step=0.0149, val_loss_epoch=0.0198, train_loss_epoch=0.0149]
Epoch 15:  87%|████████▋ | 162/187 [01:02<00:09,  2.59it/s, loss=0.015, val_loss_step=0.0147, train_loss_ste

Epoch 17:  81%|████████▏ | 152/187 [01:00<00:13,  2.51it/s, loss=0.015, val_loss_step=0.0118, train_loss_step=0.0159, val_loss_epoch=0.0167, train_loss_epoch=0.0145]
Epoch 17:  82%|████████▏ | 154/187 [01:00<00:13,  2.53it/s, loss=0.015, val_loss_step=0.0118, train_loss_step=0.0159, val_loss_epoch=0.0167, train_loss_epoch=0.0145]
Epoch 17:  83%|████████▎ | 156/187 [01:01<00:12,  2.55it/s, loss=0.015, val_loss_step=0.0118, train_loss_step=0.0159, val_loss_epoch=0.0167, train_loss_epoch=0.0145]
Epoch 17:  84%|████████▍ | 158/187 [01:01<00:11,  2.57it/s, loss=0.015, val_loss_step=0.0118, train_loss_step=0.0159, val_loss_epoch=0.0167, train_loss_epoch=0.0145]
Epoch 17:  86%|████████▌ | 160/187 [01:01<00:10,  2.59it/s, loss=0.015, val_loss_step=0.0118, train_loss_step=0.0159, val_loss_epoch=0.0167, train_loss_epoch=0.0145]
Epoch 17:  87%|████████▋ | 162/187 [01:02<00:09,  2.60it/s, loss=0.015, val_loss_step=0.0118, train_loss_step=0.0159, val_loss_epoch=0.0167, train_loss_epoch=0.0145]
Epoc

Epoch 19:  83%|████████▎ | 156/187 [01:01<00:12,  2.54it/s, loss=0.014, val_loss_step=0.0115, train_loss_step=0.0144, val_loss_epoch=0.0162, train_loss_epoch=0.0141]
Epoch 19:  84%|████████▍ | 158/187 [01:01<00:11,  2.56it/s, loss=0.014, val_loss_step=0.0115, train_loss_step=0.0144, val_loss_epoch=0.0162, train_loss_epoch=0.0141]
Epoch 19:  86%|████████▌ | 160/187 [01:02<00:10,  2.58it/s, loss=0.014, val_loss_step=0.0115, train_loss_step=0.0144, val_loss_epoch=0.0162, train_loss_epoch=0.0141]
Epoch 19:  87%|████████▋ | 162/187 [01:02<00:09,  2.59it/s, loss=0.014, val_loss_step=0.0115, train_loss_step=0.0144, val_loss_epoch=0.0162, train_loss_epoch=0.0141]
Epoch 19:  88%|████████▊ | 164/187 [01:02<00:08,  2.61it/s, loss=0.014, val_loss_step=0.0115, train_loss_step=0.0144, val_loss_epoch=0.0162, train_loss_epoch=0.0141]
Epoch 19:  89%|████████▉ | 166/187 [01:03<00:07,  2.63it/s, loss=0.014, val_loss_step=0.0115, train_loss_step=0.0144, val_loss_epoch=0.0162, train_loss_epoch=0.0141]
Epoc

Epoch 21:  86%|████████▌ | 160/187 [01:02<00:10,  2.57it/s, loss=0.014, val_loss_step=0.0116, train_loss_step=0.0121, val_loss_epoch=0.0161, train_loss_epoch=0.014]
Validating:  32%|███▏      | 12/38 [00:02<00:04,  5.68it/s][A
Epoch 21:  87%|████████▋ | 162/187 [01:02<00:09,  2.59it/s, loss=0.014, val_loss_step=0.0116, train_loss_step=0.0121, val_loss_epoch=0.0161, train_loss_epoch=0.014]
Epoch 21:  88%|████████▊ | 164/187 [01:02<00:08,  2.61it/s, loss=0.014, val_loss_step=0.0116, train_loss_step=0.0121, val_loss_epoch=0.0161, train_loss_epoch=0.014]
Validating:  42%|████▏     | 16/38 [00:03<00:03,  5.95it/s][A
Epoch 21:  89%|████████▉ | 166/187 [01:03<00:07,  2.63it/s, loss=0.014, val_loss_step=0.0116, train_loss_step=0.0121, val_loss_epoch=0.0161, train_loss_epoch=0.014]
Epoch 21:  90%|████████▉ | 168/187 [01:03<00:07,  2.64it/s, loss=0.014, val_loss_step=0.0116, train_loss_step=0.0121, val_loss_epoch=0.0161, train_loss_epoch=0.014]
Epoch 21:  91%|█████████ | 170/187 [01:03<00:06, 

Epoch 23:  87%|████████▋ | 162/187 [01:02<00:09,  2.59it/s, loss=0.014, val_loss_step=0.0117, train_loss_step=0.0135, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 23:  88%|████████▊ | 164/187 [01:02<00:08,  2.61it/s, loss=0.014, val_loss_step=0.0117, train_loss_step=0.0135, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 23:  89%|████████▉ | 166/187 [01:03<00:07,  2.63it/s, loss=0.014, val_loss_step=0.0117, train_loss_step=0.0135, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 23:  90%|████████▉ | 168/187 [01:03<00:07,  2.64it/s, loss=0.014, val_loss_step=0.0117, train_loss_step=0.0135, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 23:  91%|█████████ | 170/187 [01:03<00:06,  2.66it/s, loss=0.014, val_loss_step=0.0117, train_loss_step=0.0135, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 23:  92%|█████████▏| 172/187 [01:04<00:05,  2.68it/s, loss=0.014, val_loss_step=0.0117, train_loss_step=0.0135, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 23: 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 4 M   


Initial Learning Rate: 0.001000
Validate iterations: 37
Train iterations: 150                                                 
Epoch 0:  80%|████████  | 150/187 [01:00<00:14,  2.48it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0241]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  81%|████████  | 151/187 [01:00<00:14,  2.48it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0241]
Epoch 0:  82%|████████▏ | 153/187 [01:01<00:13,  2.50it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0241]
Validating:  11%|█         | 4/37 [00:00<00:10,  3.20it/s][A
Epoch 0:  83%|████████▎ | 155/187 [01:01<00:12,  2.52it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0241]
Epoch 0:  84%|████████▍ | 157/187 [01:01<00:11,  2.53it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0241]
Epoch 0:  85%|████████▌ | 159/187 [01:02<00:10,  2.55it/s, loss=0.020, val_loss_step=0.69, train_loss_step=0.0241]
Epoch 0:  86%|████████▌ | 161/187 [01:02<00:10,  2.57it/s, loss=0.020, val_loss_step=0.

Epoch 2:  92%|█████████▏| 172/187 [01:04<00:05,  2.68it/s, loss=0.018, val_loss_step=0.0236, train_loss_step=0.0147, val_loss_epoch=0.0243, train_loss_epoch=0.019]
Epoch 2:  93%|█████████▎| 174/187 [01:04<00:04,  2.69it/s, loss=0.018, val_loss_step=0.0236, train_loss_step=0.0147, val_loss_epoch=0.0243, train_loss_epoch=0.019]
Epoch 2:  94%|█████████▍| 176/187 [01:04<00:04,  2.71it/s, loss=0.018, val_loss_step=0.0236, train_loss_step=0.0147, val_loss_epoch=0.0243, train_loss_epoch=0.019]
Epoch 2:  95%|█████████▌| 178/187 [01:05<00:03,  2.73it/s, loss=0.018, val_loss_step=0.0236, train_loss_step=0.0147, val_loss_epoch=0.0243, train_loss_epoch=0.019]
Epoch 2:  96%|█████████▋| 180/187 [01:05<00:02,  2.74it/s, loss=0.018, val_loss_step=0.0236, train_loss_step=0.0147, val_loss_epoch=0.0243, train_loss_epoch=0.019]
Epoch 2:  97%|█████████▋| 182/187 [01:05<00:01,  2.76it/s, loss=0.018, val_loss_step=0.0236, train_loss_step=0.0147, val_loss_epoch=0.0243, train_loss_epoch=0.019]
Epoch 2:  98%|██

Validating:  95%|█████████▍| 35/37 [00:06<00:00,  5.50it/s][A
Epoch 4:  99%|█████████▉| 186/187 [01:07<00:00,  2.77it/s, loss=0.017, val_loss_step=0.0472, train_loss_step=0.0157, val_loss_epoch=0.047, train_loss_epoch=0.0175]
Epoch 4: 100%|██████████| 187/187 [01:07<00:00,  2.77it/s, loss=0.017, val_loss_step=0.0535, train_loss_step=0.0157, val_loss_epoch=0.0536, train_loss_epoch=0.0175]
Epoch 5:  80%|████████  | 150/187 [01:00<00:14,  2.49it/s, loss=0.017, val_loss_step=0.0535, train_loss_step=0.0196, val_loss_epoch=0.0536, train_loss_epoch=0.017] 
Validating: 0it [00:00, ?it/s][A
Epoch 5:  81%|████████▏ | 152/187 [01:00<00:13,  2.50it/s, loss=0.017, val_loss_step=0.0535, train_loss_step=0.0196, val_loss_epoch=0.0536, train_loss_epoch=0.017]
Epoch 5:  82%|████████▏ | 154/187 [01:01<00:13,  2.52it/s, loss=0.017, val_loss_step=0.0535, train_loss_step=0.0196, val_loss_epoch=0.0536, train_loss_epoch=0.017]
Validating:  14%|█▎        | 5/37 [00:01<00:10,  3.02it/s][A
Epoch 5:  83%|█████

Epoch 7:  82%|████████▏ | 154/187 [01:01<00:13,  2.52it/s, loss=0.016, val_loss_step=0.0243, train_loss_step=0.0171, val_loss_epoch=0.0254, train_loss_epoch=0.0165]
Epoch 7:  83%|████████▎ | 156/187 [01:01<00:12,  2.53it/s, loss=0.016, val_loss_step=0.0243, train_loss_step=0.0171, val_loss_epoch=0.0254, train_loss_epoch=0.0165]
Epoch 7:  84%|████████▍ | 158/187 [01:01<00:11,  2.55it/s, loss=0.016, val_loss_step=0.0243, train_loss_step=0.0171, val_loss_epoch=0.0254, train_loss_epoch=0.0165]
Epoch 7:  86%|████████▌ | 160/187 [01:02<00:10,  2.57it/s, loss=0.016, val_loss_step=0.0243, train_loss_step=0.0171, val_loss_epoch=0.0254, train_loss_epoch=0.0165]
Epoch 7:  87%|████████▋ | 162/187 [01:02<00:09,  2.59it/s, loss=0.016, val_loss_step=0.0243, train_loss_step=0.0171, val_loss_epoch=0.0254, train_loss_epoch=0.0165]
Validating:  35%|███▌      | 13/37 [00:02<00:04,  4.91it/s][A
Epoch 7:  88%|████████▊ | 164/187 [01:02<00:08,  2.60it/s, loss=0.016, val_loss_step=0.0243, train_loss_step=0.0

Epoch 9:  86%|████████▌ | 160/187 [01:02<00:10,  2.57it/s, loss=0.016, val_loss_step=0.0545, train_loss_step=0.016, val_loss_epoch=0.058, train_loss_epoch=0.016]
Validating:  30%|██▉       | 11/37 [00:02<00:05,  4.50it/s][A
Epoch 9:  87%|████████▋ | 162/187 [01:02<00:09,  2.59it/s, loss=0.016, val_loss_step=0.0545, train_loss_step=0.016, val_loss_epoch=0.058, train_loss_epoch=0.016]
Epoch 9:  88%|████████▊ | 164/187 [01:02<00:08,  2.61it/s, loss=0.016, val_loss_step=0.0545, train_loss_step=0.016, val_loss_epoch=0.058, train_loss_epoch=0.016]
Epoch 9:  89%|████████▉ | 166/187 [01:03<00:08,  2.62it/s, loss=0.016, val_loss_step=0.0545, train_loss_step=0.016, val_loss_epoch=0.058, train_loss_epoch=0.016]
Epoch 9:  90%|████████▉ | 168/187 [01:03<00:07,  2.64it/s, loss=0.016, val_loss_step=0.0545, train_loss_step=0.016, val_loss_epoch=0.058, train_loss_epoch=0.016]
Epoch 9:  91%|█████████ | 170/187 [01:04<00:06,  2.66it/s, loss=0.016, val_loss_step=0.0545, train_loss_step=0.016, val_loss_ep

Epoch 11:  93%|█████████▎| 174/187 [01:04<00:04,  2.68it/s, loss=0.016, val_loss_step=0.0252, train_loss_step=0.0127, val_loss_epoch=0.0259, train_loss_epoch=0.0157]
Validating:  68%|██████▊   | 25/37 [00:04<00:02,  5.52it/s][A
Epoch 11:  94%|█████████▍| 176/187 [01:05<00:04,  2.69it/s, loss=0.016, val_loss_step=0.0252, train_loss_step=0.0127, val_loss_epoch=0.0259, train_loss_epoch=0.0157]
Epoch 11:  95%|█████████▌| 178/187 [01:05<00:03,  2.71it/s, loss=0.016, val_loss_step=0.0252, train_loss_step=0.0127, val_loss_epoch=0.0259, train_loss_epoch=0.0157]
Validating:  78%|███████▊  | 29/37 [00:05<00:01,  5.41it/s][A
Epoch 11:  96%|█████████▋| 180/187 [01:06<00:02,  2.72it/s, loss=0.016, val_loss_step=0.0252, train_loss_step=0.0127, val_loss_epoch=0.0259, train_loss_epoch=0.0157]
Validating:  84%|████████▍ | 31/37 [00:05<00:01,  5.41it/s][A
Epoch 11:  97%|█████████▋| 182/187 [01:06<00:01,  2.74it/s, loss=0.016, val_loss_step=0.0252, train_loss_step=0.0127, val_loss_epoch=0.0259, train_

Epoch 13:  96%|█████████▋| 180/187 [01:05<00:02,  2.73it/s, loss=0.015, val_loss_step=0.0163, train_loss_step=0.0143, val_loss_epoch=0.0175, train_loss_epoch=0.0152]
Epoch 13:  97%|█████████▋| 182/187 [01:06<00:01,  2.75it/s, loss=0.015, val_loss_step=0.0163, train_loss_step=0.0143, val_loss_epoch=0.0175, train_loss_epoch=0.0152]
Epoch 13:  98%|█████████▊| 184/187 [01:06<00:01,  2.76it/s, loss=0.015, val_loss_step=0.0163, train_loss_step=0.0143, val_loss_epoch=0.0175, train_loss_epoch=0.0152]
Epoch 13:  99%|█████████▉| 186/187 [01:06<00:00,  2.78it/s, loss=0.015, val_loss_step=0.0163, train_loss_step=0.0143, val_loss_epoch=0.0175, train_loss_epoch=0.0152]
Epoch 13: 100%|██████████| 187/187 [01:07<00:00,  2.78it/s, loss=0.015, val_loss_step=0.031, train_loss_step=0.0143, val_loss_epoch=0.0321, train_loss_epoch=0.0152] 
Epoch 14:  80%|████████  | 150/187 [01:00<00:14,  2.49it/s, loss=0.015, val_loss_step=0.031, train_loss_step=0.0116, val_loss_epoch=0.0321, train_loss_epoch=0.015] 
Valid

Epoch 16:  80%|████████  | 150/187 [01:00<00:14,  2.49it/s, loss=0.014, val_loss_step=0.0558, train_loss_step=0.0165, val_loss_epoch=0.0601, train_loss_epoch=0.0146]
Validating: 0it [00:00, ?it/s][A
Epoch 16:  81%|████████▏ | 152/187 [01:00<00:14,  2.50it/s, loss=0.014, val_loss_step=0.0558, train_loss_step=0.0165, val_loss_epoch=0.0601, train_loss_epoch=0.0146]
Epoch 16:  82%|████████▏ | 154/187 [01:01<00:13,  2.51it/s, loss=0.014, val_loss_step=0.0558, train_loss_step=0.0165, val_loss_epoch=0.0601, train_loss_epoch=0.0146]
Validating:  14%|█▎        | 5/37 [00:01<00:10,  3.02it/s][A
Epoch 16:  83%|████████▎ | 156/187 [01:01<00:12,  2.53it/s, loss=0.014, val_loss_step=0.0558, train_loss_step=0.0165, val_loss_epoch=0.0601, train_loss_epoch=0.0146]
Epoch 16:  84%|████████▍ | 158/187 [01:02<00:11,  2.55it/s, loss=0.014, val_loss_step=0.0558, train_loss_step=0.0165, val_loss_epoch=0.0601, train_loss_epoch=0.0146]
Epoch 16:  86%|████████▌ | 160/187 [01:02<00:10,  2.56it/s, loss=0.014, va

Epoch 18:  83%|████████▎ | 156/187 [01:01<00:12,  2.53it/s, loss=0.014, val_loss_step=0.0168, train_loss_step=0.0145, val_loss_epoch=0.0171, train_loss_epoch=0.0142]
Validating:  19%|█▉        | 7/37 [00:01<00:07,  3.93it/s][A
Epoch 18:  84%|████████▍ | 158/187 [01:01<00:11,  2.55it/s, loss=0.014, val_loss_step=0.0168, train_loss_step=0.0145, val_loss_epoch=0.0171, train_loss_epoch=0.0142]
Validating:  24%|██▍       | 9/37 [00:01<00:06,  4.51it/s][A
Epoch 18:  86%|████████▌ | 160/187 [01:02<00:10,  2.57it/s, loss=0.014, val_loss_step=0.0168, train_loss_step=0.0145, val_loss_epoch=0.0171, train_loss_epoch=0.0142]
Epoch 18:  87%|████████▋ | 162/187 [01:02<00:09,  2.58it/s, loss=0.014, val_loss_step=0.0168, train_loss_step=0.0145, val_loss_epoch=0.0171, train_loss_epoch=0.0142]
Epoch 18:  88%|████████▊ | 164/187 [01:03<00:08,  2.60it/s, loss=0.014, val_loss_step=0.0168, train_loss_step=0.0145, val_loss_epoch=0.0171, train_loss_epoch=0.0142]
Validating:  41%|████      | 15/37 [00:02<00:0

Epoch 20:  87%|████████▋ | 162/187 [01:02<00:09,  2.59it/s, loss=0.013, val_loss_step=0.0157, train_loss_step=0.0103, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Epoch 20:  88%|████████▊ | 164/187 [01:02<00:08,  2.61it/s, loss=0.013, val_loss_step=0.0157, train_loss_step=0.0103, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Epoch 20:  89%|████████▉ | 166/187 [01:03<00:07,  2.63it/s, loss=0.013, val_loss_step=0.0157, train_loss_step=0.0103, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Epoch 20:  90%|████████▉ | 168/187 [01:03<00:07,  2.64it/s, loss=0.013, val_loss_step=0.0157, train_loss_step=0.0103, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Epoch 20:  91%|█████████ | 170/187 [01:03<00:06,  2.66it/s, loss=0.013, val_loss_step=0.0157, train_loss_step=0.0103, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Epoch 20:  92%|█████████▏| 172/187 [01:04<00:05,  2.68it/s, loss=0.013, val_loss_step=0.0157, train_loss_step=0.0103, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Epoc

Epoch 22:  92%|█████████▏| 172/187 [01:04<00:05,  2.67it/s, loss=0.014, val_loss_step=0.0158, train_loss_step=0.0185, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Validating:  62%|██████▏   | 23/37 [00:04<00:02,  5.37it/s][A
Epoch 22:  93%|█████████▎| 174/187 [01:04<00:04,  2.69it/s, loss=0.014, val_loss_step=0.0158, train_loss_step=0.0185, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Validating:  68%|██████▊   | 25/37 [00:04<00:02,  5.38it/s][A
Epoch 22:  94%|█████████▍| 176/187 [01:05<00:04,  2.71it/s, loss=0.014, val_loss_step=0.0158, train_loss_step=0.0185, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Validating:  73%|███████▎  | 27/37 [00:05<00:01,  5.37it/s][A
Epoch 22:  95%|█████████▌| 178/187 [01:05<00:03,  2.72it/s, loss=0.014, val_loss_step=0.0158, train_loss_step=0.0185, val_loss_epoch=0.0164, train_loss_epoch=0.0139]
Validating:  78%|███████▊  | 29/37 [00:05<00:01,  5.41it/s][A
Epoch 22:  96%|█████████▋| 180/187 [01:05<00:02,  2.74it/s, loss=0.014, val_loss_ste

Epoch 24:  98%|█████████▊| 184/187 [01:06<00:01,  2.77it/s, loss=0.014, val_loss_step=0.0175, train_loss_step=0.0167, val_loss_epoch=0.018, train_loss_epoch=0.014]
Epoch 24:  99%|█████████▉| 186/187 [01:06<00:00,  2.78it/s, loss=0.014, val_loss_step=0.0175, train_loss_step=0.0167, val_loss_epoch=0.018, train_loss_epoch=0.014]
Epoch 24: 100%|██████████| 187/187 [01:07<00:00,  2.79it/s, loss=0.014, val_loss_step=0.0181, train_loss_step=0.0167, val_loss_epoch=0.0192, train_loss_epoch=0.014]
Epoch 24: 100%|██████████| 187/187 [01:07<00:00,  2.79it/s, loss=0.014, val_loss_step=0.0181, train_loss_step=0.0167, val_loss_epoch=0.0192, train_loss_epoch=0.014]
Test iterations: 68
Testing:  99%|█████████▊| 67/68 [00:12<00:00,  5.67it/s]Logits: tensor([[ -9.4688,  -9.4062,  -8.6719,  ...,  -8.1719,  -7.3828,  -6.9297],
        [-10.9062, -12.0000,  -7.9648,  ...,  -7.3789,  -4.8438,  -6.8711],
        [-17.2344, -15.0156, -11.6719,  ..., -14.2422, -11.7812, -12.5703],
        ...,
        [ -7.4570

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 4 M   


Initial Learning Rate: 0.001000
Validate iterations: 37
Train iterations: 150                                                 
Epoch 0:  80%|████████  | 150/187 [01:00<00:14,  2.49it/s, loss=0.020, val_loss_step=0.691, train_loss_step=0.0204]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  81%|████████  | 151/187 [01:00<00:14,  2.49it/s, loss=0.020, val_loss_step=0.691, train_loss_step=0.0204]
Epoch 0:  82%|████████▏ | 153/187 [01:01<00:13,  2.51it/s, loss=0.020, val_loss_step=0.691, train_loss_step=0.0204]
Epoch 0:  83%|████████▎ | 155/187 [01:01<00:12,  2.53it/s, loss=0.020, val_loss_step=0.691, train_loss_step=0.0204]
Validating:  16%|█▌        | 6/37 [00:01<00:08,  3.77it/s][A
Epoch 0:  84%|████████▍ | 157/187 [01:01<00:11,  2.54it/s, loss=0.020, val_loss_step=0.691, train_loss_step=0.0204]
Epoch 0:  85%|████████▌ | 159/187 [01:02<00:10,  2.56it/s, loss=0.020, val_loss_step=0.691, train_loss_step=0.0204]
Epoch 0:  86%|████████▌ | 161/187 [01:02<00:10,  2.58it/s, loss=0.020, val_loss_s

Epoch 2:  93%|█████████▎| 174/187 [01:04<00:04,  2.70it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0165, val_loss_epoch=0.0212, train_loss_epoch=0.0193]
Epoch 2:  94%|█████████▍| 176/187 [01:04<00:04,  2.72it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0165, val_loss_epoch=0.0212, train_loss_epoch=0.0193]
Epoch 2:  95%|█████████▌| 178/187 [01:05<00:03,  2.73it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0165, val_loss_epoch=0.0212, train_loss_epoch=0.0193]
Epoch 2:  96%|█████████▋| 180/187 [01:05<00:02,  2.75it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0165, val_loss_epoch=0.0212, train_loss_epoch=0.0193]
Validating:  84%|████████▍ | 31/37 [00:05<00:01,  5.69it/s][A
Epoch 2:  97%|█████████▋| 182/187 [01:05<00:01,  2.76it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0165, val_loss_epoch=0.0212, train_loss_epoch=0.0193]
Epoch 2:  98%|█████████▊| 184/187 [01:06<00:01,  2.78it/s, loss=0.018, val_loss_step=0.0196, train_loss_step=0.0

Epoch 4:  99%|█████████▉| 186/187 [01:06<00:00,  2.80it/s, loss=0.017, val_loss_step=0.355, train_loss_step=0.0156, val_loss_epoch=0.373, train_loss_epoch=0.0179]
Epoch 4: 100%|██████████| 187/187 [01:06<00:00,  2.80it/s, loss=0.017, val_loss_step=0.106, train_loss_step=0.0156, val_loss_epoch=0.11, train_loss_epoch=0.0179] 
Epoch 5:  80%|████████  | 150/187 [01:00<00:14,  2.50it/s, loss=0.017, val_loss_step=0.106, train_loss_step=0.0196, val_loss_epoch=0.11, train_loss_epoch=0.0174]
Validating: 0it [00:00, ?it/s][A
Epoch 5:  81%|████████▏ | 152/187 [01:00<00:13,  2.51it/s, loss=0.017, val_loss_step=0.106, train_loss_step=0.0196, val_loss_epoch=0.11, train_loss_epoch=0.0174]
Epoch 5:  82%|████████▏ | 154/187 [01:00<00:13,  2.53it/s, loss=0.017, val_loss_step=0.106, train_loss_step=0.0196, val_loss_epoch=0.11, train_loss_epoch=0.0174]
Validating:  14%|█▎        | 5/37 [00:01<00:10,  3.03it/s][A
Epoch 5:  83%|████████▎ | 156/187 [01:01<00:12,  2.55it/s, loss=0.017, val_loss_step=0.106, 

Epoch 7:  83%|████████▎ | 156/187 [01:01<00:12,  2.54it/s, loss=0.017, val_loss_step=0.0477, train_loss_step=0.0163, val_loss_epoch=0.0498, train_loss_epoch=0.0168]
Epoch 7:  84%|████████▍ | 158/187 [01:01<00:11,  2.56it/s, loss=0.017, val_loss_step=0.0477, train_loss_step=0.0163, val_loss_epoch=0.0498, train_loss_epoch=0.0168]
Validating:  24%|██▍       | 9/37 [00:01<00:06,  4.27it/s][A
Epoch 7:  86%|████████▌ | 160/187 [01:02<00:10,  2.58it/s, loss=0.017, val_loss_step=0.0477, train_loss_step=0.0163, val_loss_epoch=0.0498, train_loss_epoch=0.0168]
Epoch 7:  87%|████████▋ | 162/187 [01:02<00:09,  2.60it/s, loss=0.017, val_loss_step=0.0477, train_loss_step=0.0163, val_loss_epoch=0.0498, train_loss_epoch=0.0168]
Validating:  35%|███▌      | 13/37 [00:02<00:04,  4.94it/s][A
Epoch 7:  88%|████████▊ | 164/187 [01:02<00:08,  2.61it/s, loss=0.017, val_loss_step=0.0477, train_loss_step=0.0163, val_loss_epoch=0.0498, train_loss_epoch=0.0168]
Validating:  41%|████      | 15/37 [00:02<00:04,  

Epoch 9:  84%|████████▍ | 158/187 [01:01<00:11,  2.57it/s, loss=0.016, val_loss_step=0.0854, train_loss_step=0.0162, val_loss_epoch=0.091, train_loss_epoch=0.0163]
Epoch 9:  86%|████████▌ | 160/187 [01:01<00:10,  2.59it/s, loss=0.016, val_loss_step=0.0854, train_loss_step=0.0162, val_loss_epoch=0.091, train_loss_epoch=0.0163]
Validating:  30%|██▉       | 11/37 [00:02<00:05,  4.81it/s][A
Epoch 9:  87%|████████▋ | 162/187 [01:02<00:09,  2.60it/s, loss=0.016, val_loss_step=0.0854, train_loss_step=0.0162, val_loss_epoch=0.091, train_loss_epoch=0.0163]
Epoch 9:  88%|████████▊ | 164/187 [01:02<00:08,  2.62it/s, loss=0.016, val_loss_step=0.0854, train_loss_step=0.0162, val_loss_epoch=0.091, train_loss_epoch=0.0163]
Validating:  41%|████      | 15/37 [00:02<00:04,  5.18it/s][A
Epoch 9:  89%|████████▉ | 166/187 [01:02<00:07,  2.64it/s, loss=0.016, val_loss_step=0.0854, train_loss_step=0.0162, val_loss_epoch=0.091, train_loss_epoch=0.0163]
Epoch 9:  90%|████████▉ | 168/187 [01:03<00:07,  2.65i

Epoch 11:  89%|████████▉ | 166/187 [01:03<00:08,  2.62it/s, loss=0.016, val_loss_step=0.0217, train_loss_step=0.025, val_loss_epoch=0.0243, train_loss_epoch=0.0157]
Epoch 11:  90%|████████▉ | 168/187 [01:03<00:07,  2.64it/s, loss=0.016, val_loss_step=0.0217, train_loss_step=0.025, val_loss_epoch=0.0243, train_loss_epoch=0.0157]
Validating:  51%|█████▏    | 19/37 [00:03<00:03,  5.38it/s][A
Epoch 11:  91%|█████████ | 170/187 [01:04<00:06,  2.65it/s, loss=0.016, val_loss_step=0.0217, train_loss_step=0.025, val_loss_epoch=0.0243, train_loss_epoch=0.0157]
Validating:  57%|█████▋    | 21/37 [00:03<00:02,  5.50it/s][A
Epoch 11:  92%|█████████▏| 172/187 [01:04<00:05,  2.67it/s, loss=0.016, val_loss_step=0.0217, train_loss_step=0.025, val_loss_epoch=0.0243, train_loss_epoch=0.0157]
Validating:  62%|██████▏   | 23/37 [00:04<00:02,  5.52it/s][A
Epoch 11:  93%|█████████▎| 174/187 [01:04<00:04,  2.69it/s, loss=0.016, val_loss_step=0.0217, train_loss_step=0.025, val_loss_epoch=0.0243, train_loss_

Validating:  57%|█████▋    | 21/37 [00:03<00:02,  5.40it/s][A
Epoch 13:  92%|█████████▏| 172/187 [01:04<00:05,  2.69it/s, loss=0.015, val_loss_step=0.0179, train_loss_step=0.0174, val_loss_epoch=0.0203, train_loss_epoch=0.0153]
Validating:  62%|██████▏   | 23/37 [00:04<00:02,  5.42it/s][A
Epoch 13:  93%|█████████▎| 174/187 [01:04<00:04,  2.70it/s, loss=0.015, val_loss_step=0.0179, train_loss_step=0.0174, val_loss_epoch=0.0203, train_loss_epoch=0.0153]
Validating:  68%|██████▊   | 25/37 [00:04<00:02,  5.44it/s][A
Epoch 13:  94%|█████████▍| 176/187 [01:04<00:04,  2.72it/s, loss=0.015, val_loss_step=0.0179, train_loss_step=0.0174, val_loss_epoch=0.0203, train_loss_epoch=0.0153]
Validating:  73%|███████▎  | 27/37 [00:05<00:01,  5.37it/s][A
Epoch 13:  95%|█████████▌| 178/187 [01:05<00:03,  2.73it/s, loss=0.015, val_loss_step=0.0179, train_loss_step=0.0174, val_loss_epoch=0.0203, train_loss_epoch=0.0153]
Validating:  78%|███████▊  | 29/37 [00:05<00:01,  5.43it/s][A
Epoch 13:  96%|██████

Validating:  57%|█████▋    | 21/37 [00:04<00:03,  5.28it/s][A
Epoch 15:  92%|█████████▏| 172/187 [01:05<00:05,  2.61it/s, loss=0.016, val_loss_step=0.015, train_loss_step=0.0338, val_loss_epoch=0.0177, train_loss_epoch=0.0149]
Validating:  62%|██████▏   | 23/37 [00:04<00:02,  5.24it/s][A
Epoch 15:  93%|█████████▎| 174/187 [01:06<00:04,  2.63it/s, loss=0.016, val_loss_step=0.015, train_loss_step=0.0338, val_loss_epoch=0.0177, train_loss_epoch=0.0149]
Validating:  68%|██████▊   | 25/37 [00:04<00:02,  5.37it/s][A
Epoch 15:  94%|█████████▍| 176/187 [01:06<00:04,  2.65it/s, loss=0.016, val_loss_step=0.015, train_loss_step=0.0338, val_loss_epoch=0.0177, train_loss_epoch=0.0149]
Validating:  73%|███████▎  | 27/37 [00:05<00:01,  5.29it/s][A
Epoch 15:  95%|█████████▌| 178/187 [01:06<00:03,  2.66it/s, loss=0.016, val_loss_step=0.015, train_loss_step=0.0338, val_loss_epoch=0.0177, train_loss_epoch=0.0149]
Validating:  78%|███████▊  | 29/37 [00:05<00:01,  5.41it/s][A
Epoch 15:  96%|█████████▋

Epoch 17:  93%|█████████▎| 174/187 [01:04<00:04,  2.68it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0153, val_loss_epoch=0.0165, train_loss_epoch=0.0145]
Epoch 17:  94%|█████████▍| 176/187 [01:05<00:04,  2.70it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0153, val_loss_epoch=0.0165, train_loss_epoch=0.0145]
Epoch 17:  95%|█████████▌| 178/187 [01:05<00:03,  2.72it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0153, val_loss_epoch=0.0165, train_loss_epoch=0.0145]
Validating:  78%|███████▊  | 29/37 [00:05<00:01,  5.51it/s][A
Epoch 17:  96%|█████████▋| 180/187 [01:05<00:02,  2.73it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0153, val_loss_epoch=0.0165, train_loss_epoch=0.0145]
Validating:  84%|████████▍ | 31/37 [00:05<00:01,  5.32it/s][A
Epoch 17:  97%|█████████▋| 182/187 [01:06<00:01,  2.75it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0153, val_loss_epoch=0.0165, train_loss_epoch=0.0145]
Validating:  89%|████████▉ | 33/37 [00:06<00:00, 

Epoch 19:  95%|█████████▌| 178/187 [01:05<00:03,  2.71it/s, loss=0.014, val_loss_step=0.0141, train_loss_step=0.0172, val_loss_epoch=0.0164, train_loss_epoch=0.0141]
Epoch 19:  96%|█████████▋| 180/187 [01:05<00:02,  2.73it/s, loss=0.014, val_loss_step=0.0141, train_loss_step=0.0172, val_loss_epoch=0.0164, train_loss_epoch=0.0141]
Epoch 19:  97%|█████████▋| 182/187 [01:06<00:01,  2.74it/s, loss=0.014, val_loss_step=0.0141, train_loss_step=0.0172, val_loss_epoch=0.0164, train_loss_epoch=0.0141]
Epoch 19:  98%|█████████▊| 184/187 [01:06<00:01,  2.76it/s, loss=0.014, val_loss_step=0.0141, train_loss_step=0.0172, val_loss_epoch=0.0164, train_loss_epoch=0.0141]
Epoch 19:  99%|█████████▉| 186/187 [01:07<00:00,  2.77it/s, loss=0.014, val_loss_step=0.0141, train_loss_step=0.0172, val_loss_epoch=0.0164, train_loss_epoch=0.0141]
Epoch 19: 100%|██████████| 187/187 [01:07<00:00,  2.77it/s, loss=0.014, val_loss_step=0.0139, train_loss_step=0.0172, val_loss_epoch=0.0162, train_loss_epoch=0.0141]
Epoc

Epoch 21:  97%|█████████▋| 182/187 [01:06<00:01,  2.73it/s, loss=0.014, val_loss_step=0.0139, train_loss_step=0.0115, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 21:  98%|█████████▊| 184/187 [01:06<00:01,  2.75it/s, loss=0.014, val_loss_step=0.0139, train_loss_step=0.0115, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 21:  99%|█████████▉| 186/187 [01:07<00:00,  2.76it/s, loss=0.014, val_loss_step=0.0139, train_loss_step=0.0115, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Epoch 21: 100%|██████████| 187/187 [01:07<00:00,  2.76it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0115, val_loss_epoch=0.0163, train_loss_epoch=0.014] 
Epoch 22:  80%|████████  | 150/187 [01:00<00:14,  2.49it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0201, val_loss_epoch=0.0163, train_loss_epoch=0.014]
Validating: 0it [00:00, ?it/s][A
Epoch 22:  81%|████████▏ | 152/187 [01:00<00:13,  2.50it/s, loss=0.014, val_loss_step=0.014, train_loss_step=0.0201, val_loss_epoch=0.0163, tr

Epoch 23:  99%|█████████▉| 186/187 [01:07<00:00,  2.77it/s, loss=0.015, val_loss_step=0.0142, train_loss_step=0.0229, val_loss_epoch=0.0164, train_loss_epoch=0.0141]
Epoch 23: 100%|██████████| 187/187 [01:07<00:00,  2.77it/s, loss=0.015, val_loss_step=0.0174, train_loss_step=0.0229, val_loss_epoch=0.0204, train_loss_epoch=0.0141]
Epoch 24:  80%|████████  | 150/187 [01:00<00:14,  2.50it/s, loss=0.014, val_loss_step=0.0174, train_loss_step=0.0131, val_loss_epoch=0.0204, train_loss_epoch=0.0141]
Validating: 0it [00:00, ?it/s][A
Epoch 24:  81%|████████▏ | 152/187 [01:00<00:13,  2.50it/s, loss=0.014, val_loss_step=0.0174, train_loss_step=0.0131, val_loss_epoch=0.0204, train_loss_epoch=0.0141]
Validating:   8%|▊         | 3/37 [00:00<00:14,  2.33it/s][A
Epoch 24:  82%|████████▏ | 154/187 [01:01<00:13,  2.52it/s, loss=0.014, val_loss_step=0.0174, train_loss_step=0.0131, val_loss_epoch=0.0204, train_loss_epoch=0.0141]
Validating:  14%|█▎        | 5/37 [00:01<00:09,  3.25it/s][A
Epoch 24:  8

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 4 M   


Initial Learning Rate: 0.001000
Validate iterations: 38
Train iterations: 149                                                 
Epoch 0:  80%|███████▉  | 149/187 [01:00<00:15,  2.46it/s, loss=0.019, val_loss_step=0.69, train_loss_step=0.0198]
Validating: 0it [00:00, ?it/s][A
Epoch 0:  80%|████████  | 150/187 [01:00<00:15,  2.46it/s, loss=0.019, val_loss_step=0.69, train_loss_step=0.0198]
Epoch 0:  81%|████████▏ | 152/187 [01:01<00:14,  2.48it/s, loss=0.019, val_loss_step=0.69, train_loss_step=0.0198]
Validating:  11%|█         | 4/38 [00:00<00:10,  3.35it/s][A
Epoch 0:  82%|████████▏ | 154/187 [01:01<00:13,  2.50it/s, loss=0.019, val_loss_step=0.69, train_loss_step=0.0198]
Validating:  16%|█▌        | 6/38 [00:01<00:07,  4.29it/s][A
Epoch 0:  83%|████████▎ | 156/187 [01:02<00:12,  2.51it/s, loss=0.019, val_loss_step=0.69, train_loss_step=0.0198]
Validating:  21%|██        | 8/38 [00:01<00:06,  4.97it/s][A
Epoch 0:  84%|████████▍ | 158/187 [01:02<00:11,  2.53it/s, loss=0.019, val_los

Epoch 2:  82%|████████▏ | 154/187 [01:01<00:13,  2.50it/s, loss=0.018, val_loss_step=0.0211, train_loss_step=0.0171, val_loss_epoch=0.0218, train_loss_epoch=0.0192]
Epoch 2:  83%|████████▎ | 156/187 [01:02<00:12,  2.51it/s, loss=0.018, val_loss_step=0.0211, train_loss_step=0.0171, val_loss_epoch=0.0218, train_loss_epoch=0.0192]
Validating:  21%|██        | 8/38 [00:01<00:06,  4.68it/s][A
Epoch 2:  84%|████████▍ | 158/187 [01:02<00:11,  2.53it/s, loss=0.018, val_loss_step=0.0211, train_loss_step=0.0171, val_loss_epoch=0.0218, train_loss_epoch=0.0192]
Epoch 2:  86%|████████▌ | 160/187 [01:02<00:10,  2.55it/s, loss=0.018, val_loss_step=0.0211, train_loss_step=0.0171, val_loss_epoch=0.0218, train_loss_epoch=0.0192]
Epoch 2:  87%|████████▋ | 162/187 [01:03<00:09,  2.56it/s, loss=0.018, val_loss_step=0.0211, train_loss_step=0.0171, val_loss_epoch=0.0218, train_loss_epoch=0.0192]
Validating:  37%|███▋      | 14/38 [00:02<00:04,  5.57it/s][A
Epoch 2:  88%|████████▊ | 164/187 [01:03<00:08,  2

Epoch 4:  81%|████████▏ | 152/187 [01:01<00:14,  2.48it/s, loss=0.017, val_loss_step=0.0305, train_loss_step=0.0151, val_loss_epoch=0.0304, train_loss_epoch=0.0176]
Epoch 4:  82%|████████▏ | 154/187 [01:01<00:13,  2.50it/s, loss=0.017, val_loss_step=0.0305, train_loss_step=0.0151, val_loss_epoch=0.0304, train_loss_epoch=0.0176]
Validating:  16%|█▌        | 6/38 [00:01<00:08,  3.71it/s][A
Epoch 4:  83%|████████▎ | 156/187 [01:02<00:12,  2.51it/s, loss=0.017, val_loss_step=0.0305, train_loss_step=0.0151, val_loss_epoch=0.0304, train_loss_epoch=0.0176]
Epoch 4:  84%|████████▍ | 158/187 [01:02<00:11,  2.53it/s, loss=0.017, val_loss_step=0.0305, train_loss_step=0.0151, val_loss_epoch=0.0304, train_loss_epoch=0.0176]
Epoch 4:  86%|████████▌ | 160/187 [01:02<00:10,  2.55it/s, loss=0.017, val_loss_step=0.0305, train_loss_step=0.0151, val_loss_epoch=0.0304, train_loss_epoch=0.0176]
Validating:  32%|███▏      | 12/38 [00:02<00:04,  5.32it/s][A
Epoch 4:  87%|████████▋ | 162/187 [01:03<00:09,  2

Epoch 6:  80%|████████  | 150/187 [01:00<00:15,  2.46it/s, loss=0.016, val_loss_step=0.0376, train_loss_step=0.0162, val_loss_epoch=0.0355, train_loss_epoch=0.0167]
Validating:   5%|▌         | 2/38 [00:00<00:13,  2.76it/s][A
Epoch 6:  81%|████████▏ | 152/187 [01:01<00:14,  2.48it/s, loss=0.016, val_loss_step=0.0376, train_loss_step=0.0162, val_loss_epoch=0.0355, train_loss_epoch=0.0167]
Epoch 6:  82%|████████▏ | 154/187 [01:01<00:13,  2.50it/s, loss=0.016, val_loss_step=0.0376, train_loss_step=0.0162, val_loss_epoch=0.0355, train_loss_epoch=0.0167]
Epoch 6:  83%|████████▎ | 156/187 [01:02<00:12,  2.52it/s, loss=0.016, val_loss_step=0.0376, train_loss_step=0.0162, val_loss_epoch=0.0355, train_loss_epoch=0.0167]
Validating:  21%|██        | 8/38 [00:01<00:06,  4.80it/s][A
Epoch 6:  84%|████████▍ | 158/187 [01:02<00:11,  2.53it/s, loss=0.016, val_loss_step=0.0376, train_loss_step=0.0162, val_loss_epoch=0.0355, train_loss_epoch=0.0167]
Validating:  26%|██▋       | 10/38 [00:02<00:05,  5

Epoch 7:  98%|█████████▊| 184/187 [01:07<00:01,  2.74it/s, loss=0.016, val_loss_step=0.0416, train_loss_step=0.0132, val_loss_epoch=0.0393, train_loss_epoch=0.0164]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  6.03it/s][A
Epoch 7:  99%|█████████▉| 186/187 [01:07<00:00,  2.76it/s, loss=0.016, val_loss_step=0.0416, train_loss_step=0.0132, val_loss_epoch=0.0393, train_loss_epoch=0.0164]
Epoch 7: 100%|██████████| 187/187 [01:07<00:00,  2.76it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0132, val_loss_epoch=0.0199, train_loss_epoch=0.0164]
Epoch 8:  80%|███████▉  | 149/187 [01:00<00:15,  2.46it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0132, val_loss_epoch=0.0199, train_loss_epoch=0.0162]
Validating: 0it [00:00, ?it/s][A
Epoch 8:  80%|████████  | 150/187 [01:00<00:15,  2.46it/s, loss=0.016, val_loss_step=0.0195, train_loss_step=0.0132, val_loss_epoch=0.0199, train_loss_epoch=0.0162]
Epoch 8:  81%|████████▏ | 152/187 [01:01<00:14,  2.48it/s, loss=0.016, val_los

Validating:  95%|█████████▍| 36/38 [00:06<00:00,  6.08it/s][A
Epoch 9: 100%|██████████| 187/187 [01:07<00:00,  2.75it/s, loss=0.016, val_loss_step=0.0864, train_loss_step=0.0154, val_loss_epoch=0.0892, train_loss_epoch=0.016]
Epoch 10:  80%|███████▉  | 149/187 [01:00<00:15,  2.46it/s, loss=0.016, val_loss_step=0.0864, train_loss_step=0.0164, val_loss_epoch=0.0892, train_loss_epoch=0.0158]
Validating: 0it [00:00, ?it/s][A
Epoch 10:  80%|████████  | 150/187 [01:00<00:15,  2.46it/s, loss=0.016, val_loss_step=0.0864, train_loss_step=0.0164, val_loss_epoch=0.0892, train_loss_epoch=0.0158]
Validating:   5%|▌         | 2/38 [00:00<00:13,  2.71it/s][A
Epoch 10:  81%|████████▏ | 152/187 [01:01<00:14,  2.48it/s, loss=0.016, val_loss_step=0.0864, train_loss_step=0.0164, val_loss_epoch=0.0892, train_loss_epoch=0.0158]
Epoch 10:  82%|████████▏ | 154/187 [01:01<00:13,  2.50it/s, loss=0.016, val_loss_step=0.0864, train_loss_step=0.0164, val_loss_epoch=0.0892, train_loss_epoch=0.0158]
Validating:  

Epoch 11:  96%|█████████▋| 180/187 [01:06<00:02,  2.72it/s, loss=0.016, val_loss_step=0.061, train_loss_step=0.0147, val_loss_epoch=0.0643, train_loss_epoch=0.0156]
Validating:  84%|████████▍ | 32/38 [00:06<00:01,  5.73it/s][A
Epoch 11:  97%|█████████▋| 182/187 [01:06<00:01,  2.73it/s, loss=0.016, val_loss_step=0.061, train_loss_step=0.0147, val_loss_epoch=0.0643, train_loss_epoch=0.0156]
Validating:  89%|████████▉ | 34/38 [00:06<00:00,  5.69it/s][A
Epoch 11:  98%|█████████▊| 184/187 [01:06<00:01,  2.75it/s, loss=0.016, val_loss_step=0.061, train_loss_step=0.0147, val_loss_epoch=0.0643, train_loss_epoch=0.0156]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  5.65it/s][A
Epoch 11: 100%|██████████| 187/187 [01:07<00:00,  2.77it/s, loss=0.016, val_loss_step=0.0431, train_loss_step=0.0147, val_loss_epoch=0.0441, train_loss_epoch=0.0156]
Epoch 12:  80%|███████▉  | 149/187 [01:00<00:15,  2.46it/s, loss=0.015, val_loss_step=0.0431, train_loss_step=0.0112, val_loss_epoch=0.0441, train_los

Epoch 13:  98%|█████████▊| 184/187 [01:07<00:01,  2.74it/s, loss=0.015, val_loss_step=0.032, train_loss_step=0.0158, val_loss_epoch=0.0329, train_loss_epoch=0.0151]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  5.74it/s][A
Epoch 13:  99%|█████████▉| 186/187 [01:07<00:00,  2.75it/s, loss=0.015, val_loss_step=0.032, train_loss_step=0.0158, val_loss_epoch=0.0329, train_loss_epoch=0.0151]
Epoch 13: 100%|██████████| 187/187 [01:07<00:00,  2.76it/s, loss=0.015, val_loss_step=0.0192, train_loss_step=0.0158, val_loss_epoch=0.0174, train_loss_epoch=0.0151]
Epoch 14:  80%|███████▉  | 149/187 [01:00<00:15,  2.47it/s, loss=0.015, val_loss_step=0.0192, train_loss_step=0.0145, val_loss_epoch=0.0174, train_loss_epoch=0.015] 
Validating: 0it [00:00, ?it/s][A
Epoch 14:  80%|████████  | 150/187 [01:00<00:15,  2.46it/s, loss=0.015, val_loss_step=0.0192, train_loss_step=0.0145, val_loss_epoch=0.0174, train_loss_epoch=0.015]
Epoch 14:  81%|████████▏ | 152/187 [01:01<00:14,  2.48it/s, loss=0.015, val_

Epoch 15:  97%|█████████▋| 182/187 [01:06<00:01,  2.72it/s, loss=0.014, val_loss_step=0.0187, train_loss_step=0.015, val_loss_epoch=0.021, train_loss_epoch=0.0147]
Validating:  89%|████████▉ | 34/38 [00:06<00:00,  5.78it/s][A
Epoch 15:  98%|█████████▊| 184/187 [01:07<00:01,  2.73it/s, loss=0.014, val_loss_step=0.0187, train_loss_step=0.015, val_loss_epoch=0.021, train_loss_epoch=0.0147]
Epoch 15: 100%|██████████| 187/187 [01:07<00:00,  2.75it/s, loss=0.014, val_loss_step=0.0239, train_loss_step=0.015, val_loss_epoch=0.0254, train_loss_epoch=0.0147]
Epoch 16:  80%|███████▉  | 149/187 [01:00<00:15,  2.46it/s, loss=0.015, val_loss_step=0.0239, train_loss_step=0.0167, val_loss_epoch=0.0254, train_loss_epoch=0.0144]
Validating: 0it [00:00, ?it/s][A
Epoch 16:  80%|████████  | 150/187 [01:01<00:15,  2.45it/s, loss=0.015, val_loss_step=0.0239, train_loss_step=0.0167, val_loss_epoch=0.0254, train_loss_epoch=0.0144]
Epoch 16:  81%|████████▏ | 152/187 [01:01<00:14,  2.47it/s, loss=0.015, val_lo

Epoch 17:  96%|█████████▋| 180/187 [01:06<00:02,  2.71it/s, loss=0.014, val_loss_step=0.0178, train_loss_step=0.0178, val_loss_epoch=0.0173, train_loss_epoch=0.0142]
Validating:  84%|████████▍ | 32/38 [00:06<00:01,  5.95it/s][A
Epoch 17:  97%|█████████▋| 182/187 [01:06<00:01,  2.72it/s, loss=0.014, val_loss_step=0.0178, train_loss_step=0.0178, val_loss_epoch=0.0173, train_loss_epoch=0.0142]
Validating:  89%|████████▉ | 34/38 [00:06<00:00,  5.88it/s][A
Epoch 17:  98%|█████████▊| 184/187 [01:07<00:01,  2.74it/s, loss=0.014, val_loss_step=0.0178, train_loss_step=0.0178, val_loss_epoch=0.0173, train_loss_epoch=0.0142]
Validating:  95%|█████████▍| 36/38 [00:06<00:00,  5.86it/s][A
Epoch 17: 100%|██████████| 187/187 [01:07<00:00,  2.76it/s, loss=0.014, val_loss_step=0.0171, train_loss_step=0.0178, val_loss_epoch=0.0168, train_loss_epoch=0.0142]
Epoch 18:  80%|███████▉  | 149/187 [01:01<00:15,  2.43it/s, loss=0.014, val_loss_step=0.0171, train_loss_step=0.0147, val_loss_epoch=0.0168, train_

Epoch 19:  94%|█████████▍| 176/187 [01:05<00:04,  2.67it/s, loss=0.014, val_loss_step=0.0174, train_loss_step=0.0115, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  74%|███████▎  | 28/38 [00:05<00:01,  5.63it/s][A
Epoch 19:  95%|█████████▌| 178/187 [01:06<00:03,  2.69it/s, loss=0.014, val_loss_step=0.0174, train_loss_step=0.0115, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  79%|███████▉  | 30/38 [00:05<00:01,  5.67it/s][A
Epoch 19:  96%|█████████▋| 180/187 [01:06<00:02,  2.70it/s, loss=0.014, val_loss_step=0.0174, train_loss_step=0.0115, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  84%|████████▍ | 32/38 [00:06<00:01,  5.65it/s][A
Epoch 19:  97%|█████████▋| 182/187 [01:07<00:01,  2.71it/s, loss=0.014, val_loss_step=0.0174, train_loss_step=0.0115, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  89%|████████▉ | 34/38 [00:06<00:00,  5.68it/s][A
Epoch 19:  98%|█████████▊| 184/187 [01:07<00:01,  2.73it/s, loss=0.014, val_loss_ste

Validating:  68%|██████▊   | 26/38 [00:04<00:02,  5.91it/s][A
Epoch 21:  94%|█████████▍| 176/187 [01:05<00:04,  2.68it/s, loss=0.013, val_loss_step=0.0175, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.0137]
Validating:  74%|███████▎  | 28/38 [00:05<00:01,  5.83it/s][A
Epoch 21:  95%|█████████▌| 178/187 [01:06<00:03,  2.69it/s, loss=0.013, val_loss_step=0.0175, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.0137]
Validating:  79%|███████▉  | 30/38 [00:05<00:01,  5.94it/s][A
Epoch 21:  96%|█████████▋| 180/187 [01:06<00:02,  2.71it/s, loss=0.013, val_loss_step=0.0175, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.0137]
Epoch 21:  97%|█████████▋| 182/187 [01:06<00:01,  2.72it/s, loss=0.013, val_loss_step=0.0175, train_loss_step=0.0127, val_loss_epoch=0.0166, train_loss_epoch=0.0137]
Validating:  89%|████████▉ | 34/38 [00:06<00:00,  6.05it/s][A
Epoch 21:  98%|█████████▊| 184/187 [01:07<00:01,  2.74it/s, loss=0.013, val_loss_ste

Epoch 23:  92%|█████████▏| 172/187 [01:05<00:05,  2.64it/s, loss=0.014, val_loss_step=0.0175, train_loss_step=0.0159, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  63%|██████▎   | 24/38 [00:04<00:02,  5.74it/s][A
Epoch 23:  93%|█████████▎| 174/187 [01:05<00:04,  2.66it/s, loss=0.014, val_loss_step=0.0175, train_loss_step=0.0159, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  68%|██████▊   | 26/38 [00:05<00:02,  5.72it/s][A
Epoch 23:  94%|█████████▍| 176/187 [01:05<00:04,  2.68it/s, loss=0.014, val_loss_step=0.0175, train_loss_step=0.0159, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  74%|███████▎  | 28/38 [00:05<00:01,  5.73it/s][A
Epoch 23:  95%|█████████▌| 178/187 [01:06<00:03,  2.69it/s, loss=0.014, val_loss_step=0.0175, train_loss_step=0.0159, val_loss_epoch=0.0166, train_loss_epoch=0.0138]
Validating:  79%|███████▉  | 30/38 [00:05<00:01,  5.76it/s][A
Epoch 23:  96%|█████████▋| 180/187 [01:06<00:02,  2.71it/s, loss=0.014, val_loss_ste

# Predict

In [37]:
def run_predict(confFitting, param, test, target, fold, seed):
    
    seed_everything(seed)
    
    x_test = test[confFitting["feature_cols"]]
    
    #データセットをイメージ化するトランスフォーマー。
    #ここでLogScaleも実施。
    all_scaler, all_it, test = PreprocessingLoadTransform(param, x_test, fold, seed)
    x_test = x_test.values
    
    #model class 定義
    model = MoAEfficientNet.load_from_checkpoint(
        checkpoint_path=f"{SAVEMODEL}/model{model_type}_SEED{seed}_FOLD{fold}.ckpt",
        training_set=(None, None),  # tuple
        valid_set=(None, None),  # tuple
        test_set=x_test, #予測用のデータセット
        transformer=all_it,
        drop_rate=drop_rate,
        drop_connect_rate=drop_connect_rate,
        fc_size=fc_size,
        weight_init='goog')
    
    model.freeze()
    model.eval()
    
    trainer = Trainer(
        logger=False,
        gpus=gpus,
        distributed_backend="dp",  # multiple-gpus, 1 machine
        precision=16,
        benchmark=False,
        deterministic=True)
    
    output = trainer.test(model, verbose=False)[0]
    predictions = output["pred_probs"]
    
    return predictions


In [38]:
def run_k_fold_predict(confFitting, test, target, param, Tester, NFOLDS, seed):
    predictions = np.zeros((len(test), confFitting["num_targets"]))
    
    for fold in range(NFOLDS):
        if Tester:
            print('=' * 20, 'Fold', fold, '=' * 20)
        pred_ = run_predict(confFitting, param, test, target, fold, seed)
        
        predictions += pred_ / NFOLDS
        
    return predictions

In [39]:
def SubmitPredict(confFitting, predictions, test, prefix):
    test[confFitting["target_cols"]] = predictions
    sub = sample_submission.drop(columns=confFitting["target_cols"]).merge(test[['sig_id']+confFitting["target_cols"]], on='sig_id', how='left').fillna(0)
    sub.to_csv(f'{SUBMIT}{prefix}submission.csv', index=False)

    print("sub.shape" + str(sub.shape))
    
    return

In [40]:
def Predict(param):
    #Tester(True/False)
    Tester = False
    
    #Preprocessing Data
    train, test = preprocessing(param, trainFeature, testFeature, trainTargetScored)
    
    #CV folds
    folds = CV_folds_drug_id(train, trainTargetScored)
    
    #Config about Fitting
    confFitting = Config_about_Fitting(train, test, trainTargetScored, folds)
    
    # Averaging on multiple SEEDS
    SEED = SEED = [42]
    predictions = np.zeros((len(test), confFitting["num_targets"]))
    
    ### RUN ###
    for seed in SEED:
        if Tester:
            print('~' * 20, 'SEED', seed, '~' * 20)
        predictions_ = run_k_fold_predict(confFitting, test, trainTargetScored, param, Tester, NFOLDS, seed)
        predictions += predictions_ / len(SEED)
    
    # 課題提出
    prefix = "Pytorch"
    SubmitPredict(confFitting, predictions, test, prefix)
    
    return

In [41]:
%%time
Predict(param_space)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Test iterations: 63
Testing: 100%|██████████| 63/63 [00:10<00:00,  6.53it/s]Logits: tensor([[-15.7344, -14.8359, -17.3281,  ..., -17.9219, -16.1562, -17.4219],
        [-13.6797, -14.5938, -14.9922,  ..., -16.1250, -14.4922, -15.0312],
        [-19.3125, -18.2188, -20.1562,  ..., -20.3750, -19.9531, -20.2188],
        ...,
        [-10.2500, -10.4062, -11.2578,  ..., -10.8906, -11.4766, -10.8203],
        [-18.6250, -18.5312, -18.2500,  ..., -21.0781, -16.8750, -19.2812],
        [-16.1562, -15.8750, -17.6250,  ..., -19.2969, -16.0000, -18.0781]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[1.192e-07 3.576e-07 5.960e-08 ... 0.000e+00 1.192e-07 0.000e+00]
 [1.132e-06 4.768e-07 2.980e-07 ... 1.192e-07 5.364e-07 2.980e-07]
 [0.000e+00 0.000e+00 0.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 ...
 [3.535e-05 3.022e-05 1.293e-05 ... 1.866e-05 1.037e-05 1.997e-05]
 [0.000e+00 0.000e+00 0.000e+00 ... 0.000e+00 5.960e-08 0.000e+00]
 [1.192e-07 1.192e-07 0.000e+00 ... 0.000e+00 1

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Test iterations: 63
Testing: 100%|██████████| 63/63 [00:10<00:00,  6.53it/s]Logits: tensor([[-15.0547, -15.7578, -12.0859,  ..., -12.2188, -12.5859, -13.0078],
        [-16.0938, -16.1562, -13.0469,  ..., -13.8203, -15.1719, -14.5391],
        [-14.6484, -15.2109, -11.4062,  ..., -10.8281, -12.4844, -11.7656],
        ...,
        [-13.9922, -14.5781, -11.5078,  ..., -11.1797, -12.4609, -12.1250],
        [-13.8984, -13.3203, -13.1875,  ..., -12.8906, -15.9922, -13.2656],
        [-15.4766, -15.9297, -12.3594,  ..., -12.0078, -14.1016, -13.1016]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[2.980e-07 1.192e-07 5.662e-06 ... 4.947e-06 3.397e-06 2.265e-06]
 [1.192e-07 1.192e-07 2.146e-06 ... 1.013e-06 2.384e-07 4.768e-07]
 [4.172e-07 2.384e-07 1.115e-05 ... 1.985e-05 3.815e-06 7.749e-06]
 ...
 [8.345e-07 4.768e-07 1.007e-05 ... 1.395e-05 3.874e-06 5.424e-06]
 [8.941e-07 1.669e-06 1.848e-06 ... 2.503e-06 1.192e-07 1.729e-06]
 [1.788e-07 1.192e-07 4.292e-06 ... 6.080e-06 7

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Test iterations: 63
Testing: 100%|██████████| 63/63 [00:10<00:00,  6.54it/s]Logits: tensor([[-20.0781, -16.0938, -16.1875,  ..., -18.3906, -16.3906, -18.0938],
        [-15.5547, -12.6484, -11.4375,  ..., -13.7656, -10.3750, -13.2656],
        [-16.9531, -14.4688, -13.7422,  ..., -14.7734, -12.9453, -14.3359],
        ...,
        [-14.9297, -11.9688, -11.4219,  ..., -13.3125, -10.9688, -13.0234],
        [-11.8516,  -9.5391,  -9.6797,  ..., -10.5391,  -9.6250, -10.4219],
        [-19.9844, -15.2500, -17.4062,  ..., -18.9375, -19.5469, -19.1250]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[0.000e+00 1.192e-07 1.192e-07 ... 0.000e+00 5.960e-08 0.000e+00]
 [1.788e-07 3.219e-06 1.079e-05 ... 1.073e-06 3.117e-05 1.729e-06]
 [5.960e-08 5.364e-07 1.073e-06 ... 3.576e-07 2.384e-06 5.960e-07]
 ...
 [3.576e-07 6.318e-06 1.097e-05 ... 1.669e-06 1.723e-05 2.205e-06]
 [7.153e-06 7.200e-05 6.253e-05 ... 2.646e-05 6.604e-05 2.980e-05]
 [0.000e+00 2.384e-07 0.000e+00 ... 0.000e+00 0

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Test iterations: 63
Testing: 100%|██████████| 63/63 [00:10<00:00,  6.52it/s]Logits: tensor([[ -9.9531, -12.0391, -13.5547,  ..., -11.9766, -11.8594, -13.6875],
        [-10.7891, -13.7422, -13.2031,  ..., -10.6484, -12.3984, -11.2891],
        [-10.8203, -12.4297, -13.8281,  ..., -13.0156, -12.8672, -14.2734],
        ...,
        [ -9.9297, -12.1641, -12.8594,  ..., -11.1875, -11.3906, -12.6250],
        [-10.4141, -12.7031, -15.2344,  ..., -13.5391, -13.1641, -15.8125],
        [ -8.3359, -10.4219, -12.0859,  ..., -10.4141, -10.7656, -11.8125]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[4.756e-05 5.901e-06 1.311e-06 ... 6.318e-06 7.093e-06 1.132e-06]
 [2.062e-05 1.073e-06 1.848e-06 ... 2.372e-05 4.113e-06 1.252e-05]
 [1.997e-05 3.994e-06 1.013e-06 ... 2.205e-06 2.563e-06 6.557e-07]
 ...
 [4.870e-05 5.186e-06 2.623e-06 ... 1.383e-05 1.132e-05 3.278e-06]
 [2.998e-05 3.040e-06 2.384e-07 ... 1.311e-06 1.907e-06 1.192e-07]
 [2.397e-04 2.980e-05 5.662e-06 ... 2.998e-05 2

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


Test iterations: 63
Testing: 100%|██████████| 63/63 [00:10<00:00,  6.51it/s]Logits: tensor([[-17.2969, -17.7812, -20.1719,  ..., -21.8438, -20.0469, -20.7969],
        [ -9.7500, -10.5938, -12.7891,  ..., -13.1094, -12.6484, -12.6484],
        [-14.9062, -15.6172, -18.0156,  ..., -19.3594, -17.8906, -18.5156],
        ...,
        [-13.8359, -14.7422, -17.0469,  ..., -17.4062, -17.0781, -16.6562],
        [-10.4531, -11.5859, -13.3828,  ..., -13.5703, -13.5781, -13.4922],
        [ -9.9766, -11.4062, -13.8906,  ..., -13.4531, -13.4219, -12.5000]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[5.960e-08 0.000e+00 0.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 [5.829e-05 2.509e-05 2.801e-06 ... 2.027e-06 3.219e-06 3.219e-06]
 [3.576e-07 1.788e-07 0.000e+00 ... 0.000e+00 0.000e+00 0.000e+00]
 ...
 [9.537e-07 4.172e-07 5.960e-08 ... 0.000e+00 5.960e-08 5.960e-08]
 [2.885e-05 9.298e-06 1.550e-06 ... 1.252e-06 1.252e-06 1.371e-06]
 [4.649e-05 1.115e-05 9.537e-07 ... 1.431e-06 1

# Hyperparameter Tuning

In [29]:
#hyperopt
from hyperopt import fmin, tpe, hp, rand, Trials

In [30]:
def HOptExec(param):
    #Tester(True/False)
    Tester = False
    
    #Preprocessing Data
    train, test, target = preprocessing(param, trainFeature, testFeature, trainTargetScored)
    
    #CV folds
    folds = CV_folds(train, target)
    
    #Config about Fitting
    confFitting = Config_about_Fitting(train, test, target, folds)
    
    # Averaging on multiple SEEDS
    SEED = [0, 1, 2, 3 ,4, 5]
    oof = np.zeros((len(train), confFitting["num_targets"]))
    predictions = np.zeros((len(test), confFitting["num_targets"]))
    
    ### RUN ###
    for seed in SEED:
        if Tester:
            print('~' * 20, 'SEED', seed, '~' * 20)
        oof_, predictions_ = run_k_fold(Tester, NFOLDS, seed, param,
                                       folds, train, test, target, confFitting)
        oof += oof_ / len(SEED)
        predictions += predictions_ / len(SEED)
    
    #CV 評価
    score = CV_Evaluation(confFitting, oof, train, target)
    
    # 課題提出
    #Submit(confFitting, predictions, test)
    
    return score

In [None]:
%%time

param_space = {'hidden_size1': 512, 
               'hidden_size2': 512, 
               'dropOutRate1': 0.20393004966355735, 
               'dropOutRate2': 0.39170486751620137,
               'rankGauss_n_quantiles': 488.0393350201078,
               'leakyReluSlope': hp.uniform('leakyReluSlope', 1e-3, 1e-1),
              }

trials = Trials()

hopt = fmin(fn = HOptExec, 
            space = param_space, 
            algo = tpe.suggest, 
            max_evals = 15, 
            #timeout = 8.9 * 60 * 60, 
            trials = trials, 
           )

print(hopt)

CV log_loss:                                          
0.014981391207012364                                  
CV log_loss:                                                                         
0.01504250432043703                                                                  
CV log_loss:                                                                         
0.015004835293169368                                                                 
CV log_loss:                                                                         
0.015002514832957038                                                                 
CV log_loss:                                                                         
0.015008986227264749                                                                 
CV log_loss:                                                                         
0.014993115273980633                                                                   
CV log_loss:                