In [1]:
import numpy as np
import torch
from torch import nn
from skorch import NeuralNetRegressor
from sklearn.model_selection import GridSearchCV


In [2]:
# Define the neural network architecture with a variable number of layers
class VariableLayerNet(nn.Module):
    def __init__(self, num_units=10, num_layers=2):
        super(VariableLayerNet, self).__init__()
        layers = [nn.Linear(1, num_units), nn.Sigmoid()]
        
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(num_units, num_units))
            layers.append(nn.Sigmoid())
        
        layers.append(nn.Linear(num_units, 1))  # Output layer
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


In [10]:
num_points = 1000
x_values = np.linspace(0, 4 * np.pi, num_points)
y_values = np.sin(x_values)
y_values[y_values < 0] = 0
y_values[y_values > 0] = 1

In [11]:
x_tensor = torch.tensor(x_values, dtype=torch.float32).view(-1, 1)
y_tensor = torch.tensor(y_values, dtype=torch.float32).view(-1, 1)

In [12]:
num_samples = 900
indices = np.random.choice(range(num_points), num_samples, replace=False)
x_train = x_tensor[indices]
y_train = y_tensor[indices]

In [13]:
params = {
    'module__num_units': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
    'module__num_layers': [2, 3, 4, 6,8,12,15,18 , 20],  # Example: searching the best numlayers
    'max_epochs': [10],  # You can define more epochs
    'lr': [0.01, 0.05, 0.1],  # Learning rates to try
}

In [14]:
# Initialize the neural net regressor
net = NeuralNetRegressor(
    module=VariableLayerNet,
    criterion=nn.MSELoss,
    optimizer=torch.optim.Adam,
    iterator_train__shuffle=True,
)


In [15]:
gs = GridSearchCV(net, params, refit=False, cv=3, scoring='neg_mean_squared_error')



In [16]:
# Run grid search
gs.fit(x_train, y_train)

# Best parameters found
print("Best parameters found:", gs.best_params_)

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.7973[0m        [32m1.7100[0m  0.0187
      2        [36m1.6346[0m        [32m1.5525[0m  0.0156
      3        [36m1.4814[0m        [32m1.4089[0m  0.0156
      4        [36m1.3446[0m        [32m1.2774[0m  0.0313
      5        [36m1.2183[0m        [32m1.1568[0m  0.0155
      6        [36m1.1020[0m        [32m1.0452[0m  0.0312
      7        [36m0.9919[0m        [32m0.9419[0m  0.0158
      8        [36m0.8947[0m        [32m0.8449[0m  0.0312
      9        [36m0.8013[0m        [32m0.7557[0m  0.0312
     10        [36m0.7161[0m        [32m0.6742[0m  0.0155
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4503[0m        [32m0.4197[0m  0.0156
      2        [36m0.3919[0m        [32m0.3680[0m  0.0158
      3        [36m0.3460[0m        [32m0.3275[0m  0.0157
      4        [3

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2645[0m        [32m0.2551[0m  0.0160
      2        [36m0.2581[0m        [32m0.2541[0m  0.0103
      3        [36m0.2541[0m        [32m0.2494[0m  0.0162
      4        [36m0.2482[0m        [32m0.2485[0m  0.0161
      5        [36m0.2463[0m        [32m0.2467[0m  0.0157
      6        [36m0.2428[0m        [32m0.2437[0m  0.0156
      7        [36m0.2392[0m        [32m0.2411[0m  0.0000
      8        [36m0.2371[0m        [32m0.2392[0m  0.0000
      9        [36m0.2345[0m        [32m0.2378[0m  0.0162
     10        [36m0.2299[0m        [32m0.2360[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5067[0m        [32m0.3884[0m  0.0167
      2        [36m0.3551[0m        [32m0.2903[0m  0.0124
      3        [36m0.2756[0m        [32m0.2546[0m  0.0321
      4        [3

      3        [36m0.2378[0m        [32m0.2395[0m  0.0128
      4        [36m0.2349[0m        [32m0.2369[0m  0.0026
      5        [36m0.2303[0m        [32m0.2356[0m  0.0005
      6        [36m0.2250[0m        [32m0.2325[0m  0.0278
      7        [36m0.2205[0m        [32m0.2292[0m  0.0000
      8        [36m0.2163[0m        [32m0.2269[0m  0.0157
      9        [36m0.2110[0m        0.2279  0.0255
     10        [36m0.2076[0m        [32m0.2241[0m  0.0075
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5296[0m        [32m0.3604[0m  0.0000
      2        [36m0.3225[0m        [32m0.2481[0m  0.0000
      3        [36m0.2450[0m        [32m0.2394[0m  0.0012
      4        [36m0.2447[0m        0.2593  0.0156
      5        0.2569        0.2563  0.0122
      6        0.2489        [32m0.2371[0m  0.0162
      7        [36m0.2319[0m        [32m0.2226[0m  0.0160
      8        [36m0.2233

      6        [36m0.2155[0m        0.2291  0.0148
      7        [36m0.2106[0m        [32m0.2247[0m  0.0130
      8        [36m0.2048[0m        [32m0.2202[0m  0.0069
      9        [36m0.2019[0m        0.2205  0.0215
     10        [36m0.1985[0m        0.2209  0.0149
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2474[0m        [32m0.2356[0m  0.0147
      2        [36m0.2363[0m        [32m0.2292[0m  0.0033
      3        [36m0.2274[0m        [32m0.2151[0m  0.0204
      4        [36m0.2203[0m        [32m0.2059[0m  0.0138
      5        [36m0.2123[0m        [32m0.1991[0m  0.0213
      6        [36m0.2077[0m        [32m0.1916[0m  0.0072
      7        [36m0.2034[0m        [32m0.1869[0m  0.0143
      8        [36m0.2017[0m        [32m0.1843[0m  0.0144
      9        [36m0.2014[0m        [32m0.1829[0m  0.0140
     10        0.2015        [32m0.1819[0m  0.0138
  epoch    train_l

     10        [36m0.2095[0m        [32m0.2237[0m  0.0138
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7038[0m        [32m0.3232[0m  0.0121
      2        [36m0.2873[0m        [32m0.2994[0m  0.0083
      3        0.3246        0.3469  0.0156
      4        0.3165        [32m0.2722[0m  0.0258
      5        [36m0.2525[0m        [32m0.2349[0m  0.0058
      6        [36m0.2396[0m        0.2424  0.0213
      7        0.2503        0.2370  0.0143
      8        [36m0.2373[0m        [32m0.2176[0m  0.0137
      9        [36m0.2198[0m        [32m0.2123[0m  0.0151
     10        [36m0.2174[0m        0.2136  0.0204
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2745[0m        [32m0.2678[0m  0.0148
      2        [36m0.2527[0m        [32m0.2359[0m  0.0129
      3        [36m0.2387[0m        [32m0.2276[0m  0.0138
      4        [36

      3        [36m1.2940[0m        [32m1.1998[0m  0.0121
      4        [36m1.1837[0m        [32m1.0971[0m  0.0157
      5        [36m1.0839[0m        [32m1.0039[0m  0.0008
      6        [36m0.9927[0m        [32m0.9197[0m  0.0232
      7        [36m0.9093[0m        [32m0.8437[0m  0.0157
      8        [36m0.8349[0m        [32m0.7752[0m  0.0259
      9        [36m0.7682[0m        [32m0.7134[0m  0.0278
     10        [36m0.7073[0m        [32m0.6580[0m  0.0271
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7603[0m        [32m0.6687[0m  0.0314
      2        [36m0.6676[0m        [32m0.5837[0m  0.0312
      3        [36m0.5824[0m        [32m0.5081[0m  0.0313
      4        [36m0.5067[0m        [32m0.4424[0m  0.0168
      5        [36m0.4425[0m        [32m0.3870[0m  0.0470
      6        [36m0.3866[0m        [32m0.3425[0m  0.0428
      7        [36m0.3425[0m        [32m0

      7        0.2547        0.2601  0.0139
      8        0.2590        0.2614  0.0208
      9        0.2584        0.2580  0.0144
     10        0.2545        0.2524  0.0139
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2565[0m        [32m0.2493[0m  0.0152
      2        [36m0.2522[0m        0.2501  0.0143
      3        [36m0.2511[0m        [32m0.2485[0m  0.0137
      4        [36m0.2479[0m        0.2498  0.0075
      5        0.2486        0.2505  0.0214
      6        0.2486        0.2488  0.0140
      7        [36m0.2474[0m        [32m0.2462[0m  0.0133
      8        [36m0.2468[0m        [32m0.2453[0m  0.0144
      9        [36m0.2467[0m        [32m0.2448[0m  0.0206
     10        [36m0.2458[0m        [32m0.2446[0m  0.0136
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2581[0m        [32m0.2512[0m  0.0170
      2        0.259

      7        0.2689        0.2576  0.0208
      8        [36m0.2494[0m        [32m0.2486[0m  0.0139
      9        0.2505        0.2506  0.0140
     10        0.2551        0.2515  0.0139
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6247[0m        [32m0.4031[0m  0.0131
      2        [36m0.3350[0m        [32m0.2601[0m  0.0074
      3        [36m0.2498[0m        [32m0.2597[0m  0.0273
      4        0.2734        0.2848  0.0144
      5        0.2872        0.2769  0.0140
      6        0.2721        [32m0.2574[0m  0.0148
      7        0.2541        [32m0.2483[0m  0.0145
      8        [36m0.2456[0m        0.2502  0.0169
      9        0.2498        0.2540  0.0140
     10        0.2505        0.2519  0.0210
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3855[0m        [32m0.2673[0m  0.0069
      2        [36m0.2639[0m        [32m0.26

      7        0.2736        0.2556  0.0143
      8        [36m0.2541[0m        [32m0.2498[0m  0.0205
      9        [36m0.2512[0m        0.2555  0.0138
     10        0.2541        0.2575  0.0140
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7616[0m        [32m0.4010[0m  0.0294
      2        [36m0.3229[0m        [32m0.2494[0m  0.0123
      3        [36m0.2756[0m        0.3237  0.0202
      4        0.3228        0.3150  0.0084
      5        0.2902        0.2611  0.0197
      6        [36m0.2526[0m        [32m0.2439[0m  0.0205
      7        [36m0.2489[0m        0.2510  0.0406
      8        0.2555        0.2505  0.0068
      9        0.2513        [32m0.2423[0m  0.0215
     10        [36m0.2431[0m        [32m0.2379[0m  0.0135
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5389[0m        [32m0.2916[0m  0.0140
      2        [36m

      5        [36m0.2464[0m        0.2551  0.0209
      6        0.2598        0.2560  0.0149
      7        0.2549        [32m0.2446[0m  0.0197
      8        [36m0.2443[0m        [32m0.2421[0m  0.0143
      9        [36m0.2421[0m        0.2433  0.0134
     10        0.2429        [32m0.2398[0m  0.0214
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.4661[0m        [32m0.7088[0m  0.0139
      2        [36m0.5450[0m        [32m0.2824[0m  0.0140
      3        [36m0.2682[0m        [32m0.2821[0m  0.0145
      4        0.3045        0.3586  0.0137
      5        0.3444        0.3417  0.0205
      6        0.3098        0.2826  0.0156
      7        [36m0.2639[0m        [32m0.2504[0m  0.0162
      8        [36m0.2459[0m        [32m0.2477[0m  0.0244
      9        0.2541        0.2525  0.0206
     10        0.2577        0.2513  0.0145
  epoch    train_loss    valid_loss     dur
-------  -----------

     10        0.2530        0.2543  0.0139
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5713[0m        [32m0.4921[0m  0.0148
      2        [36m0.4885[0m        [32m0.4233[0m  0.0070
      3        [36m0.4221[0m        [32m0.3667[0m  0.0145
      4        [36m0.3651[0m        [32m0.3227[0m  0.0140
      5        [36m0.3216[0m        [32m0.2905[0m  0.0138
      6        [36m0.2916[0m        [32m0.2687[0m  0.0222
      7        [36m0.2698[0m        [32m0.2563[0m  0.0270
      8        [36m0.2587[0m        [32m0.2510[0m  0.0203
      9        [36m0.2512[0m        [32m0.2502[0m  0.0157
     10        [36m0.2501[0m        0.2516  0.0189
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.6415[0m        [32m1.4824[0m  0.0159
      2        [36m1.3789[0m        [32m1.2430[0m  0.0183
      3        [36m1.1519[0m        [32m1.

      7        [36m0.2534[0m        0.2681  0.0070
      8        0.2665        0.2796  0.0219
      9        0.2727        0.2790  0.0267
     10        0.2692        0.2696  0.0145
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7195[0m        [32m0.5323[0m  0.0135
      2        [36m0.4463[0m        [32m0.3409[0m  0.0150
      3        [36m0.3043[0m        [32m0.2598[0m  0.0203
      4        [36m0.2522[0m        [32m0.2531[0m  0.0203
      5        0.2600        0.2695  0.0144
      6        0.2750        0.2736  0.0134
      7        0.2733        0.2649  0.0215
      8        0.2626        0.2541  0.0143
      9        0.2533        [32m0.2501[0m  0.0491
     10        [36m0.2500[0m        0.2512  0.0162
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2516[0m        [32m0.2513[0m  0.0176
      2        [36m0.2506[0m        [32m0.25

     10        0.2530        0.2564  0.0130
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2869[0m        [32m0.2551[0m  0.0170
      2        [36m0.2619[0m        0.2701  0.0139
      3        [36m0.2599[0m        [32m0.2501[0m  0.0058
      4        [36m0.2501[0m        0.2534  0.0208
      5        0.2560        0.2520  0.0022
      6        0.2508        0.2503  0.0179
      7        0.2527        0.2541  0.0222
      8        0.2513        [32m0.2493[0m  0.0187
      9        [36m0.2485[0m        [32m0.2490[0m  0.0063
     10        0.2503        [32m0.2485[0m  0.0287
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6374[0m        [32m0.3739[0m  0.0154
      2        [36m0.3190[0m        [32m0.2501[0m  0.0141
      3        [36m0.2578[0m        0.2963  0.0277
      4        0.2975        0.3118  0.0138
      5        0.2898        

      2        [36m0.2903[0m        [32m0.2670[0m  0.0140
      3        0.2929        0.3496  0.0140
      4        0.3266        0.3041  0.0232
      5        [36m0.2766[0m        [32m0.2541[0m  0.0184
      6        [36m0.2540[0m        [32m0.2529[0m  0.0135
      7        0.2583        0.2578  0.0160
      8        0.2612        0.2531  0.0049
      9        [36m0.2528[0m        [32m0.2486[0m  0.0198
     10        [36m0.2485[0m        0.2517  0.0106
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3156[0m        [32m0.2611[0m  0.0206
      2        [36m0.2819[0m        0.2706  0.0228
      3        [36m0.2624[0m        [32m0.2516[0m  0.0188
      4        [36m0.2554[0m        0.2621  0.0138
      5        0.2575        0.2516  0.0221
      6        [36m0.2492[0m        [32m0.2501[0m  0.0139
      7        0.2509        0.2505  0.0247
      8        0.2496        [32m0.2484[0m  0.0164
  

      2        [36m0.2752[0m        0.3078  0.0207
      3        0.3333        0.3305  0.0073
      4        0.3094        [32m0.2621[0m  0.0270
      5        [36m0.2566[0m        [32m0.2537[0m  0.0138
      6        0.2578        0.2686  0.0209
      7        0.2661        0.2643  0.0142
      8        0.2578        [32m0.2522[0m  0.0140
      9        [36m0.2489[0m        [32m0.2500[0m  0.0184
     10        0.2509        0.2524  0.0138
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3608[0m        [32m0.2624[0m  0.0491
      2        [36m0.2883[0m        0.2965  0.0212
      3        [36m0.2727[0m        [32m0.2499[0m  0.0215
      4        [36m0.2546[0m        0.2620  0.0138
      5        0.2639        0.2541  0.0151
      6        [36m0.2514[0m        [32m0.2498[0m  0.0144
      7        0.2521        0.2556  0.0222
      8        0.2530        0.2506  0.0187
      9        [36m0.2493[0

      9        0.2520        0.2515  0.0215
     10        [36m0.2499[0m        0.2501  0.0218
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8053[0m        [32m0.6525[0m  0.0152
      2        [36m0.5998[0m        [32m0.4869[0m  0.0120
      3        [36m0.4516[0m        [32m0.3685[0m  0.0276
      4        [36m0.3454[0m        [32m0.2950[0m  0.0214
      5        [36m0.2845[0m        [32m0.2589[0m  0.0134
      6        [36m0.2553[0m        [32m0.2499[0m  0.0282
      7        [36m0.2506[0m        0.2544  0.0134
      8        0.2548        0.2607  0.0269
      9        0.2599        0.2628  0.0207
     10        0.2600        0.2599  0.0144
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3418[0m        [32m0.2801[0m  0.0195
      2        [36m0.2698[0m        [32m0.2512[0m  0.0217
      3        [36m0.2528[0m        0.2555  

      3        [36m0.2531[0m        0.2705  0.0212
      4        0.2671        0.2750  0.0204
      5        0.2645        0.2607  0.0287
      6        0.2539        0.2510  0.0138
      7        [36m0.2503[0m        [32m0.2502[0m  0.0157
      8        0.2515        0.2507  0.0262
      9        0.2521        [32m0.2501[0m  0.0129
     10        0.2508        [32m0.2500[0m  0.0285
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2525[0m        [32m0.2508[0m  0.0293
      2        0.2537        0.2541  0.0265
      3        [36m0.2513[0m        [32m0.2501[0m  0.0208
      4        0.2518        0.2507  0.0208
      5        [36m0.2511[0m        0.2508  0.0208
      6        [36m0.2509[0m        0.2505  0.0214
      7        [36m0.2501[0m        [32m0.2500[0m  0.0208
      8        [36m0.2500[0m        [32m0.2500[0m  0.0203
      9        0.2502        0.2502  0.0279
     10        [36m0.2499[0

     10        [36m0.2495[0m        0.2502  0.0214
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5604[0m        [32m0.3253[0m  0.0178
      2        [36m0.2775[0m        [32m0.2559[0m  0.0207
      3        [36m0.2748[0m        0.3117  0.0225
      4        0.3045        0.2895  0.0191
      5        [36m0.2721[0m        [32m0.2546[0m  0.0212
      6        [36m0.2538[0m        [32m0.2520[0m  0.0233
      7        0.2571        0.2579  0.0180
      8        0.2594        0.2549  0.0216
      9        0.2541        [32m0.2502[0m  0.0273
     10        [36m0.2504[0m        0.2513  0.0143
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3422[0m        [32m0.2500[0m  0.0160
      2        [36m0.2584[0m        0.2935  0.0216
      3        0.2813        0.2701  0.0139
      4        [36m0.2539[0m        0.2500  0.0165
      5        0.254

      5        [36m0.2511[0m        0.2557  0.0141
      6        0.2622        0.2586  0.0132
      7        0.2602        [32m0.2515[0m  0.0133
      8        [36m0.2510[0m        [32m0.2514[0m  0.0287
      9        0.2516        0.2574  0.0196
     10        0.2534        0.2546  0.0242
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3142[0m        [32m0.2705[0m  0.0147
      2        [36m0.2907[0m        0.2766  0.0237
      3        [36m0.2624[0m        [32m0.2531[0m  0.0246
      4        [36m0.2565[0m        0.2672  0.0135
      5        0.2607        0.2531  0.0282
      6        [36m0.2517[0m        [32m0.2530[0m  0.0277
      7        0.2563        0.2533  0.0206
      8        0.2522        [32m0.2503[0m  0.0234
      9        [36m0.2499[0m        0.2525  0.0195
     10        0.2520        0.2510  0.0127
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  -

      8        0.2511        0.2500  0.0277
      9        [36m0.2498[0m        0.2504  0.0230
     10        0.2501        0.2507  0.0261
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3080[0m        [32m0.2866[0m  0.0053
      2        [36m0.2739[0m        [32m0.2612[0m  0.0282
      3        [36m0.2550[0m        [32m0.2511[0m  0.0274
      4        [36m0.2487[0m        [32m0.2502[0m  0.0211
      5        0.2530        0.2524  0.0205
      6        0.2540        0.2526  0.0266
      7        0.2534        0.2515  0.0158
      8        0.2518        0.2502  0.0271
      9        0.2505        [32m0.2500[0m  0.0234
     10        0.2501        0.2504  0.0186
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7103[0m        [32m0.6149[0m  0.0214
      2        [36m0.6105[0m        [32m0.5285[0m  0.0319
      3        [36m0.5271[0m        

      9        0.2509        [32m0.2499[0m  0.0145
     10        [36m0.2501[0m        0.2504  0.0140
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4016[0m        [32m0.2966[0m  0.0199
      2        [36m0.2841[0m        [32m0.2502[0m  0.0281
      3        [36m0.2520[0m        0.2639  0.0139
      4        0.2649        0.2767  0.0129
      5        0.2677        0.2667  0.0270
      6        0.2571        0.2541  0.0138
      7        [36m0.2517[0m        [32m0.2499[0m  0.0277
      8        [36m0.2503[0m        0.2506  0.0151
      9        0.2525        0.2507  0.0261
     10        0.2521        0.2499  0.0285
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5766[0m        [32m0.4478[0m  0.0292
      2        [36m0.3843[0m        [32m0.3069[0m  0.0232
      3        [36m0.2762[0m        [32m0.2524[0m  0.0288
      4        [36m

      4        0.2535        [32m0.2505[0m  0.0277
      5        [36m0.2500[0m        [32m0.2500[0m  0.0156
      6        0.2501        0.2501  0.0222
      7        0.2506        [32m0.2500[0m  0.0172
      8        0.2509        0.2512  0.0247
      9        0.2503        0.2501  0.0224
     10        0.2500        0.2504  0.0126
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7825[0m        [32m0.4678[0m  0.0295
      2        [36m0.3821[0m        [32m0.2625[0m  0.0564
      3        [36m0.2614[0m        0.2755  0.0238
      4        0.2914        0.3166  0.0237
      5        0.3068        0.2924  0.0210
      6        0.2747        [32m0.2587[0m  0.0242
      7        [36m0.2534[0m        [32m0.2502[0m  0.0253
      8        0.2543        0.2561  0.0305
      9        0.2586        0.2564  0.0245
     10        0.2569        0.2521  0.0416
  epoch    train_loss    valid_loss     dur
-------  ---

      2        [36m0.2590[0m        [32m0.2501[0m  0.0203
      3        [36m0.2510[0m        0.2517  0.0275
      4        0.2538        [32m0.2500[0m  0.0152
      5        [36m0.2495[0m        0.2537  0.0274
      6        0.2526        0.2542  0.0136
      7        0.2504        [32m0.2500[0m  0.0219
      8        0.2511        0.2501  0.0201
      9        0.2508        0.2509  0.0191
     10        0.2500        0.2517  0.0223
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2841[0m        [32m0.2592[0m  0.0195
      2        [36m0.2646[0m        [32m0.2547[0m  0.0271
      3        [36m0.2546[0m        0.2567  0.0160
      4        0.2554        [32m0.2539[0m  0.0270
      5        [36m0.2511[0m        [32m0.2501[0m  0.0200
      6        0.2512        0.2514  0.0288
      7        0.2517        0.2502  0.0143
      8        [36m0.2500[0m        0.2507  0.0158
      9        0.2505        

      8        0.2545        [32m0.2500[0m  0.0259
      9        [36m0.2518[0m        0.2511  0.0219
     10        [36m0.2503[0m        0.2502  0.0206
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2922[0m        [32m0.2848[0m  0.0157
      2        [36m0.2877[0m        [32m0.2614[0m  0.0270
      3        [36m0.2577[0m        [32m0.2599[0m  0.0157
      4        0.2638        [32m0.2554[0m  0.0191
      5        [36m0.2562[0m        [32m0.2538[0m  0.0156
      6        0.2567        0.2560  0.0205
      7        [36m0.2509[0m        [32m0.2500[0m  0.0225
      8        0.2518        0.2525  0.0262
      9        0.2527        0.2501  0.0205
     10        0.2510        0.2525  0.0213
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2565[0m        [32m0.2574[0m  0.0228
      2        [36m0.2538[0m        0.2585  0.0163
      3    

      2        [36m0.2513[0m        0.2547  0.0205
      3        0.2555        0.2590  0.0275
      4        0.2557        0.2528  0.0208
      5        [36m0.2507[0m        [32m0.2499[0m  0.0352
      6        [36m0.2505[0m        0.2506  0.0292
      7        0.2515        0.2504  0.0174
      8        [36m0.2504[0m        [32m0.2499[0m  0.0164
      9        [36m0.2502[0m        0.2509  0.0224
     10        0.2509        0.2514  0.0293
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4660[0m        [32m0.3729[0m  0.0069
      2        [36m0.3590[0m        [32m0.2984[0m  0.0342
      3        [36m0.2921[0m        [32m0.2611[0m  0.0203
      4        [36m0.2582[0m        [32m0.2501[0m  0.0194
      5        [36m0.2512[0m        0.2531  0.0158
      6        0.2529        0.2591  0.0181
      7        0.2561        0.2617  0.0235
      8        0.2569        0.2596  0.0213
      9        0.254

      6        0.2520        [32m0.2504[0m  0.0126
      7        0.2503        0.2528  0.0246
      8        0.2507        0.2517  0.0248
      9        [36m0.2496[0m        [32m0.2501[0m  0.0125
     10        0.2501        [32m0.2499[0m  0.0290
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3145[0m        [32m0.2535[0m  0.0298
      2        [36m0.2502[0m        0.2632  0.0139
      3        0.2676        0.2622  0.0347
      4        0.2593        [32m0.2500[0m  0.0210
      5        0.2529        0.2552  0.0246
      6        0.2544        0.2547  0.0348
      7        0.2519        0.2511  0.0171
      8        0.2504        0.2505  0.0246
      9        0.2531        0.2517  0.0218
     10        0.2515        0.2501  0.0183
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6240[0m        [32m0.4174[0m  0.0123
      2        [36m0.3603[0m

      2        [36m0.2625[0m        0.2726  0.0315
      3        [36m0.2619[0m        [32m0.2508[0m  0.0174
      4        [36m0.2505[0m        0.2536  0.0274
      5        0.2561        0.2525  0.0284
      6        0.2524        [32m0.2504[0m  0.0201
      7        0.2515        0.2529  0.0110
      8        0.2518        0.2504  0.0207
      9        [36m0.2505[0m        [32m0.2500[0m  0.0375
     10        [36m0.2503[0m        [32m0.2499[0m  0.0347
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6136[0m        [32m0.3584[0m  0.0303
      2        [36m0.3093[0m        [32m0.2502[0m  0.0200
      3        [36m0.2537[0m        0.2834  0.0346
      4        0.2842        0.3023  0.0152
      5        0.2864        0.2795  0.0305
      6        0.2634        0.2565  0.0274
      7        [36m0.2513[0m        [32m0.2499[0m  0.0377
      8        0.2515        0.2518  0.0178
      9        0.254

      7        0.2563        0.2533  0.0287
      8        [36m0.2509[0m        [32m0.2505[0m  0.0333
      9        0.2529        [32m0.2502[0m  0.0220
     10        0.2511        0.2520  0.0268
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3138[0m        [32m0.2611[0m  0.0125
      2        [36m0.2756[0m        0.2735  0.0324
      3        [36m0.2610[0m        [32m0.2500[0m  0.0321
      4        [36m0.2542[0m        0.2638  0.0544
      5        0.2604        0.2556  0.0209
      6        [36m0.2526[0m        0.2503  0.0404
      7        [36m0.2518[0m        0.2521  0.0385
      8        0.2523        0.2500  0.0217
      9        [36m0.2506[0m        0.2512  0.0193
     10        [36m0.2506[0m        0.2507  0.0232
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2635[0m        [32m0.2577[0m  0.0156
      2        [36m0.2494[0

     10        0.2506        0.2501  0.0323
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5833[0m        [32m0.5241[0m  0.0182
      2        [36m0.4823[0m        [32m0.4361[0m  0.0242
      3        [36m0.4047[0m        [32m0.3699[0m  0.0402
      4        [36m0.3444[0m        [32m0.3226[0m  0.0242
      5        [36m0.3046[0m        [32m0.2902[0m  0.0394
      6        [36m0.2787[0m        [32m0.2698[0m  0.0257
      7        [36m0.2629[0m        [32m0.2581[0m  0.0388
      8        [36m0.2546[0m        [32m0.2525[0m  0.0310
      9        [36m0.2510[0m        [32m0.2503[0m  0.0158
     10        [36m0.2495[0m        [32m0.2499[0m  0.0252
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2505[0m        [32m0.2505[0m  0.0313
      2        [36m0.2502[0m        0.2524  0.0269
      3        0.2506        0.2509  0.0321
 

      7        [36m0.2917[0m        [32m0.2663[0m  0.0403
      8        [36m0.2626[0m        [32m0.2521[0m  0.0242
      9        [36m0.2514[0m        [32m0.2501[0m  0.0314
     10        [36m0.2504[0m        0.2531  0.0240
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2636[0m        [32m0.2532[0m  0.0313
      2        [36m0.2555[0m        0.2628  0.0156
      3        [36m0.2545[0m        [32m0.2513[0m  0.0252
      4        [36m0.2496[0m        [32m0.2507[0m  0.0156
      5        0.2528        [32m0.2505[0m  0.0157
      6        0.2517        0.2507  0.0312
      7        0.2500        0.2523  0.0312
      8        0.2504        0.2515  0.0271
      9        0.2496        [32m0.2502[0m  0.0322
     10        0.2500        [32m0.2499[0m  0.0316
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6972[0m        [32m0.5210[0m  

      2        [36m0.2497[0m        0.2519  0.0278
      3        0.2518        0.2502  0.0265
      4        [36m0.2494[0m        0.2512  0.0278
      5        0.2527        0.2503  0.0261
      6        0.2497        0.2520  0.0288
      7        0.2514        0.2506  0.0348
      8        0.2517        0.2501  0.0278
      9        0.2502        0.2505  0.0352
     10        0.2502        0.2502  0.0296
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2756[0m        [32m0.2580[0m  0.0319
      2        [36m0.2633[0m        0.2614  0.0326
      3        [36m0.2545[0m        [32m0.2505[0m  0.0256
      4        [36m0.2540[0m        0.2539  0.0217
      5        [36m0.2530[0m        [32m0.2499[0m  0.0268
      6        [36m0.2515[0m        0.2545  0.0296
      7        0.2528        0.2513  0.0293
      8        [36m0.2499[0m        0.2500  0.0164
      9        0.2519        0.2507  0.0298
     10     

      8        0.2522        0.2519  0.0210
      9        0.2513        [32m0.2500[0m  0.0276
     10        0.2504        0.2500  0.0283
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5292[0m        [32m0.2923[0m  0.0286
      2        [36m0.2701[0m        [32m0.2708[0m  0.0260
      3        0.2835        0.3158  0.0334
      4        0.2955        0.2802  0.0425
      5        [36m0.2632[0m        [32m0.2516[0m  0.0293
      6        [36m0.2510[0m        0.2525  0.0424
      7        0.2581        0.2557  0.0248
      8        0.2577        0.2520  0.0289
      9        0.2520        [32m0.2502[0m  0.0336
     10        0.2514        0.2547  0.0411
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4384[0m        [32m0.2564[0m  0.0313
      2        [36m0.2653[0m        0.2934  0.0247
      3        0.2967        0.2760  0.0286
      4     

      4        [36m0.2530[0m        [32m0.2501[0m  0.0361
      5        [36m0.2489[0m        0.2530  0.0255
      6        0.2554        0.2532  0.0351
      7        0.2494        0.2504  0.0280
      8        0.2535        0.2528  0.0278
      9        0.2514        0.2503  0.0354
     10        0.2520        0.2541  0.0267
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6095[0m        [32m0.2578[0m  0.1471
      2        [36m0.2710[0m        0.3462  0.0323
      3        0.3429        0.3169  0.0311
      4        0.2825        [32m0.2526[0m  0.0317
      5        [36m0.2511[0m        0.2613  0.0115
      6        0.2672        0.2673  0.0265
      7        0.2657        0.2545  0.0297
      8        0.2515        [32m0.2505[0m  0.0255
      9        0.2529        0.2570  0.0288
     10        0.2556        0.2544  0.0338
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  --

      3        0.2505        0.2501  0.0358
      4        [36m0.2501[0m        0.2504  0.0171
      5        0.2508        0.2506  0.0424
      6        [36m0.2499[0m        0.2500  0.0266
      7        0.2504        0.2500  0.0246
      8        0.2512        0.2501  0.0241
      9        0.2502        0.2500  0.0206
     10        0.2502        0.2499  0.0211
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2525[0m        [32m0.2500[0m  0.0313
      2        [36m0.2501[0m        0.2506  0.0333
      3        [36m0.2499[0m        0.2505  0.0351
      4        0.2501        0.2508  0.0312
      5        [36m0.2498[0m        0.2507  0.0288
      6        0.2499        0.2504  0.0210
      7        [36m0.2498[0m        0.2503  0.0267
      8        0.2499        0.2503  0.0361
      9        0.2499        0.2510  0.0259
     10        0.2499        0.2507  0.0433
  epoch    train_loss    valid_loss     dur
----

      9        0.2679        0.2617  0.0404
     10        0.2536        0.2519  0.0285
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4055[0m        [32m0.2970[0m  0.0272
      2        [36m0.2673[0m        [32m0.2504[0m  0.0349
      3        [36m0.2672[0m        0.2729  0.0323
      4        0.2761        0.2679  0.0277
      5        [36m0.2645[0m        0.2536  0.0435
      6        [36m0.2523[0m        [32m0.2503[0m  0.0263
      7        [36m0.2505[0m        0.2540  0.0365
      8        0.2534        0.2550  0.0264
      9        0.2533        0.2526  0.0341
     10        0.2509        0.2504  0.0345
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6189[0m        [32m0.4100[0m  0.0293
      2        [36m0.3544[0m        [32m0.2718[0m  0.0279
      3        [36m0.2628[0m        [32m0.2515[0m  0.0351
      4        [36m0.2561[0

      5        0.2558        0.2554  0.0349
      6        0.2564        [32m0.2502[0m  0.0278
      7        [36m0.2502[0m        0.2511  0.0276
      8        0.2507        0.2519  0.0350
      9        0.2514        0.2503  0.0313
     10        0.2502        [32m0.2500[0m  0.0401
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8113[0m        [32m0.4385[0m  0.0282
      2        [36m0.3608[0m        [32m0.2513[0m  0.0262
      3        [36m0.2594[0m        0.3024  0.0344
      4        0.3065        0.3312  0.0363
      5        0.3079        0.2910  0.0340
      6        0.2693        0.2556  0.0336
      7        [36m0.2511[0m        [32m0.2507[0m  0.0295
      8        0.2557        0.2558  0.0348
      9        0.2601        0.2550  0.0266
     10        0.2563        0.2508  0.0491
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2664[0m

      2        [36m0.2728[0m        [32m0.3002[0m  0.0399
      3        0.3351        0.3389  0.0421
      4        0.3166        [32m0.2657[0m  0.0294
      5        [36m0.2538[0m        [32m0.2531[0m  0.0331
      6        0.2588        0.2718  0.0395
      7        0.2701        0.2679  0.0296
      8        0.2599        [32m0.2527[0m  0.0268
      9        [36m0.2513[0m        [32m0.2511[0m  0.0425
     10        0.2535        0.2540  0.0416
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2609[0m        [32m0.2552[0m  0.0156
      2        [36m0.2514[0m        0.2564  0.0473
      3        0.2550        [32m0.2513[0m  0.0342
      4        [36m0.2511[0m        0.2517  0.0274
      5        [36m0.2507[0m        [32m0.2500[0m  0.0282
      6        0.2508        [32m0.2499[0m  0.0465
      7        0.2515        0.2512  0.0294
      8        0.2508        0.2502  0.0310
      9        [36m

      2        [36m1.0968[0m        [32m0.9631[0m  0.0344
      3        [36m0.9231[0m        [32m0.8087[0m  0.0363
      4        [36m0.7762[0m        [32m0.6772[0m  0.0335
      5        [36m0.6517[0m        [32m0.5682[0m  0.0272
      6        [36m0.5489[0m        [32m0.4799[0m  0.0345
      7        [36m0.4652[0m        [32m0.4102[0m  0.0350
      8        [36m0.3976[0m        [32m0.3572[0m  0.0364
      9        [36m0.3492[0m        [32m0.3177[0m  0.0273
     10        [36m0.3116[0m        [32m0.2899[0m  0.0329
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5297[0m        [32m0.4488[0m  0.0300
      2        [36m0.4397[0m        [32m0.3770[0m  0.0341
      3        [36m0.3725[0m        [32m0.3231[0m  0.0402
      4        [36m0.3196[0m        [32m0.2863[0m  0.0281
      5        [36m0.2849[0m        [32m0.2638[0m  0.0303
      6        [36m0.2641[0m        [32m0

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.2891[0m        [32m0.9895[0m  0.0342
      2        [36m0.9135[0m        [32m0.6903[0m  0.0278
      3        [36m0.6360[0m        [32m0.4813[0m  0.0342
      4        [36m0.4491[0m        [32m0.3471[0m  0.0341
      5        [36m0.3277[0m        [32m0.2759[0m  0.0307
      6        [36m0.2712[0m        [32m0.2510[0m  0.0361
      7        [36m0.2477[0m        0.2536  0.0317
      8        0.2538        0.2648  0.0412
      9        0.2622        0.2717  0.0362
     10        0.2655        0.2706  0.0264
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2545[0m        [32m0.2557[0m  0.0324
      2        0.2611        0.2575  0.0304
      3        [36m0.2511[0m        [32m0.2502[0m  0.0416
      4        0.2540        0.2542  0.0369
      5        0.2538        0.2503  0.0197
      6    

      5        [36m0.2502[0m        0.2562  0.0378
      6        0.2548        0.2545  0.0336
      7        0.2528        [32m0.2502[0m  0.0283
      8        [36m0.2499[0m        0.2503  0.0356
      9        0.2507        0.2502  0.0335
     10        0.2505        [32m0.2500[0m  0.0331
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3273[0m        [32m0.2503[0m  0.0305
      2        [36m0.2558[0m        0.2811  0.0322
      3        0.2794        0.2691  0.0256
      4        0.2569        [32m0.2499[0m  0.0466
      5        [36m0.2533[0m        0.2568  0.0326
      6        0.2593        0.2544  0.0408
      7        0.2533        0.2499  0.0385
      8        [36m0.2522[0m        0.2532  0.0287
      9        0.2526        0.2529  0.0478
     10        [36m0.2508[0m        0.2502  0.0497
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0

      2        [36m0.2713[0m        0.3298  0.0346
      3        0.3204        0.3106  0.0366
      4        0.2778        [32m0.2555[0m  0.0430
      5        [36m0.2543[0m        0.2556  0.0427
      6        0.2637        0.2605  0.0419
      7        0.2619        [32m0.2515[0m  0.0387
      8        [36m0.2517[0m        0.2521  0.0359
      9        [36m0.2513[0m        0.2568  0.0331
     10        0.2537        0.2555  0.0349
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2715[0m        [32m0.2682[0m  0.0314
      2        [36m0.2680[0m        [32m0.2509[0m  0.0329
      3        [36m0.2573[0m        0.2628  0.0400
      4        0.2573        0.2511  0.0292
      5        [36m0.2497[0m        0.2524  0.0495
      6        0.2540        [32m0.2509[0m  0.0203
      7        0.2499        [32m0.2508[0m  0.0276
      8        0.2508        0.2523  0.0401
      9        0.2510        [32m0.25

      7        0.2528        0.2537  0.0304
      8        0.2562        0.2542  0.0421
      9        0.2535        [32m0.2501[0m  0.0245
     10        0.2506        0.2528  0.0443
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2464[0m        [32m0.2645[0m  0.0313
      2        0.2604        [32m0.2523[0m  0.0372
      3        0.2508        0.2530  0.0277
      4        0.2526        [32m0.2500[0m  0.0686
      5        0.2507        0.2502  0.0390
      6        0.2514        [32m0.2500[0m  0.0390
      7        0.2506        0.2500  0.0274
      8        0.2503        0.2501  0.0375
      9        0.2501        0.2501  0.0316
     10        0.2510        0.2501  0.0416
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2850[0m        [32m0.2677[0m  0.0508
      2        [36m0.2618[0m        [32m0.2529[0m  0.0490
      3        [36m0.2584[0m

      9        [36m0.1977[0m        [32m0.1750[0m  0.0131
     10        0.1979        [32m0.1749[0m  0.0155
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2954[0m        [32m0.2985[0m  0.0183
      2        [36m0.2814[0m        [32m0.2553[0m  0.0146
      3        [36m0.2521[0m        [32m0.2512[0m  0.0132
      4        0.2558        [32m0.2423[0m  0.0071
      5        [36m0.2465[0m        0.2428  0.0213
      6        [36m0.2416[0m        [32m0.2358[0m  0.0138
      7        [36m0.2344[0m        [32m0.2238[0m  0.0136
      8        [36m0.2299[0m        [32m0.2156[0m  0.0129
      9        [36m0.2228[0m        [32m0.2079[0m  0.0218
     10        [36m0.2155[0m        [32m0.1990[0m  0.0138
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3340[0m        [32m0.2755[0m  0.0165
      2        [36m0.2822[0m        [32m0.

      2        [36m0.2458[0m        [32m0.2286[0m  0.0153
      3        [36m0.2360[0m        [32m0.2098[0m  0.0752
      4        [36m0.2183[0m        [32m0.1913[0m  0.0176
      5        [36m0.2072[0m        [32m0.1845[0m  0.0196
      6        0.2093        [32m0.1831[0m  0.0193
      7        [36m0.2062[0m        [32m0.1822[0m  0.0129
      8        [36m0.2047[0m        [32m0.1808[0m  0.0142
      9        0.2047        0.1812  0.0208
     10        [36m0.2041[0m        0.1843  0.0158
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3601[0m        [32m0.2879[0m  0.0142
      2        [36m0.2560[0m        [32m0.2760[0m  0.0169
      3        [36m0.2417[0m        [32m0.2245[0m  0.0173
      4        [36m0.2236[0m        0.2285  0.0225
      5        [36m0.2092[0m        0.2379  0.0177
      6        [36m0.2056[0m        [32m0.2241[0m  0.0190
      7        [36m0.1975[0m      

      7        [36m0.2079[0m        [32m0.1796[0m  0.0070
      8        [36m0.2064[0m        [32m0.1789[0m  0.0150
      9        [36m0.2060[0m        0.1849  0.0154
     10        [36m0.2049[0m        0.1803  0.0105
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3053[0m        [32m0.2322[0m  0.0127
      2        [36m0.2236[0m        0.2348  0.0148
      3        [36m0.2010[0m        [32m0.2214[0m  0.0137
      4        [36m0.1996[0m        0.2310  0.0139
      5        0.1999        0.2237  0.0209
      6        0.2022        [32m0.2167[0m  0.0158
      7        [36m0.1958[0m        0.2323  0.0124
      8        0.1969        [32m0.2147[0m  0.0135
      9        [36m0.1949[0m        [32m0.2141[0m  0.0138
     10        [36m0.1932[0m        0.2266  0.0147
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2733[0m        [32m0.3

      2        [36m0.3181[0m        [32m0.2361[0m  0.0079
      3        [36m0.2511[0m        0.2586  0.0156
      4        [36m0.2308[0m        0.2383  0.0149
      5        [36m0.2199[0m        0.2363  0.0164
      6        [36m0.2034[0m        [32m0.2209[0m  0.0139
      7        [36m0.2032[0m        [32m0.2206[0m  0.0105
      8        [36m0.1940[0m        0.2275  0.0162
      9        0.1968        0.2232  0.0225
     10        [36m0.1938[0m        [32m0.2172[0m  0.0221
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3447[0m        [32m0.2331[0m  0.0167
      2        [36m0.2464[0m        [32m0.2133[0m  0.0185
      3        [36m0.2175[0m        [32m0.2125[0m  0.0164
      4        [36m0.2093[0m        [32m0.1866[0m  0.0157
      5        [36m0.2062[0m        [32m0.1819[0m  0.0157
      6        [36m0.2055[0m        [32m0.1796[0m  0.0156
      7        [36m0.1990[0m      

      7        [36m0.2542[0m        [32m0.2539[0m  0.0214
      8        0.2600        0.2698  0.0156
      9        0.2766        0.2791  0.0190
     10        0.2813        0.2753  0.0205
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8950[0m        [32m0.5730[0m  0.0036
      2        [36m0.4733[0m        [32m0.2975[0m  0.0157
      3        [36m0.2741[0m        [32m0.2608[0m  0.0208
      4        0.2815        0.3201  0.0322
      5        0.3185        0.3136  0.0326
      6        0.2933        0.2694  0.0318
      7        [36m0.2583[0m        [32m0.2495[0m  0.0429
      8        [36m0.2515[0m        0.2551  0.0156
      9        0.2593        0.2584  0.0125
     10        0.2590        0.2533  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6501[0m        [32m0.3830[0m  0.0315
      2        [36m0.3370[0m        [32m0.25

      6        0.2532        [32m0.2380[0m  0.0166
      7        [36m0.2382[0m        [32m0.2329[0m  0.0161
      8        [36m0.2346[0m        [32m0.2201[0m  0.0227
      9        [36m0.2234[0m        [32m0.2133[0m  0.0156
     10        [36m0.2151[0m        [32m0.1935[0m  0.0090
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3094[0m        [32m0.3104[0m  0.0094
      2        [36m0.2731[0m        [32m0.2481[0m  0.0385
      3        [36m0.2592[0m        0.2648  0.0081
      4        0.2631        [32m0.2467[0m  0.0238
      5        [36m0.2489[0m        0.2563  0.0164
      6        [36m0.2484[0m        [32m0.2418[0m  0.0153
      7        [36m0.2442[0m        [32m0.2391[0m  0.0245
      8        [36m0.2432[0m        [32m0.2291[0m  0.0158
      9        [36m0.2349[0m        [32m0.2261[0m  0.0082
     10        [36m0.2264[0m        [32m0.2036[0m  0.0192
  epoch    train_l

     10        [36m0.2500[0m        [32m0.2471[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4918[0m        [32m0.4085[0m  0.0156
      2        [36m0.3762[0m        [32m0.2731[0m  0.0076
      3        [36m0.2602[0m        0.2738  0.0146
      4        0.2865        [32m0.2694[0m  0.0215
      5        0.2645        [32m0.2526[0m  0.0119
      6        [36m0.2563[0m        0.2682  0.0136
      7        0.2598        0.2544  0.0000
      8        [36m0.2512[0m        [32m0.2502[0m  0.0028
      9        0.2525        [32m0.2499[0m  0.0156
     10        [36m0.2499[0m        [32m0.2498[0m  0.0283
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4036[0m        [32m0.3471[0m  0.0000
      2        [36m0.3135[0m        [32m0.2501[0m  0.0170
      3        [36m0.2566[0m        0.2819  0.0157
      4        0.2748       

      2        [36m0.2668[0m        [32m0.2449[0m  0.0185
      3        [36m0.2545[0m        0.2566  0.0240
      4        [36m0.2457[0m        [32m0.2352[0m  0.0173
      5        [36m0.2379[0m        [32m0.2214[0m  0.0166
      6        [36m0.2242[0m        [32m0.1994[0m  0.0155
      7        [36m0.2127[0m        [32m0.1801[0m  0.0118
      8        [36m0.2025[0m        0.1805  0.0207
      9        [36m0.2021[0m        [32m0.1710[0m  0.0161
     10        [36m0.1995[0m        0.1754  0.0201
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4494[0m        [32m0.3206[0m  0.0135
      2        [36m0.2726[0m        0.3291  0.0074
      3        0.3090        [32m0.2574[0m  0.0273
      4        [36m0.2572[0m        0.2698  0.0142
      5        0.2649        [32m0.2460[0m  0.0071
      6        [36m0.2411[0m        0.2527  0.0134
      7        0.2430        [32m0.2411[0m  0.0212
 

      7        [36m0.2054[0m        [32m0.1754[0m  0.0000
      8        [36m0.2006[0m        [32m0.1750[0m  0.0156
      9        [36m0.1996[0m        [32m0.1685[0m  0.0156
     10        [36m0.1984[0m        0.1719  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3860[0m        [32m0.2955[0m  0.0156
      2        [36m0.3174[0m        [32m0.2522[0m  0.0156
      3        [36m0.2602[0m        0.2673  0.0156
      4        [36m0.2521[0m        [32m0.2455[0m  0.0287
      5        [36m0.2424[0m        [32m0.2356[0m  0.0164
      6        [36m0.2171[0m        [32m0.2252[0m  0.0157
      7        [36m0.2006[0m        [32m0.2240[0m  0.0161
      8        [36m0.1930[0m        [32m0.2161[0m  0.0224
      9        [36m0.1926[0m        [32m0.2107[0m  0.0166
     10        [36m0.1861[0m        0.2279  0.0158
  epoch    train_loss    valid_loss     dur
-------  ------------  ----

      4        [36m0.2500[0m        0.2581  0.0268
      5        0.2571        0.2542  0.0158
      6        0.2508        [32m0.2497[0m  0.0078
      7        0.2516        0.2521  0.0163
      8        0.2523        [32m0.2496[0m  0.0319
      9        [36m0.2496[0m        0.2501  0.0080
     10        [36m0.2493[0m        [32m0.2496[0m  0.0238
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.2782[0m        [32m0.7363[0m  0.0163
      2        [36m0.5743[0m        [32m0.3251[0m  0.0158
      3        [36m0.2805[0m        [32m0.2554[0m  0.0245
      4        [36m0.2756[0m        0.3285  0.0153
      5        0.3358        0.3390  0.0240
      6        0.3170        0.2872  0.0224
      7        [36m0.2721[0m        [32m0.2523[0m  0.0106
      8        [36m0.2503[0m        [32m0.2512[0m  0.0313
      9        0.2573        0.2580  0.0158
     10        0.2601        0.2555  0.0083
  epoch    

      6        0.2524        [32m0.2455[0m  0.0156
      7        [36m0.2454[0m        [32m0.2401[0m  0.0156
      8        [36m0.2397[0m        [32m0.2300[0m  0.0156
      9        [36m0.2269[0m        [32m0.2065[0m  0.0489
     10        [36m0.2084[0m        [32m0.1762[0m  0.0162
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2735[0m        [32m0.2507[0m  0.0157
      2        [36m0.2562[0m        0.2513  0.0313
      3        [36m0.2493[0m        [32m0.2503[0m  0.0000
      4        0.2512        0.2511  0.0188
      5        0.2516        0.2505  0.0165
      6        0.2503        [32m0.2476[0m  0.0078
      7        [36m0.2478[0m        [32m0.2476[0m  0.0156
      8        0.2495        [32m0.2403[0m  0.0156
      9        [36m0.2428[0m        [32m0.2301[0m  0.0156
     10        [36m0.2345[0m        [32m0.2177[0m  0.0314
  epoch    train_loss    valid_loss     dur
-------  -

      3        [36m0.2712[0m        0.2683  0.0157
      4        [36m0.2648[0m        [32m0.2523[0m  0.0156
      5        [36m0.2552[0m        0.2632  0.0156
      6        0.2559        [32m0.2487[0m  0.0156
      7        [36m0.2485[0m        [32m0.2467[0m  0.0156
      8        [36m0.2471[0m        [32m0.2430[0m  0.0206
      9        [36m0.2428[0m        [32m0.2384[0m  0.0156
     10        [36m0.2362[0m        [32m0.2227[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6400[0m        [32m0.3941[0m  0.0082
      2        [36m0.4360[0m        [32m0.3071[0m  0.0156
      3        [36m0.2775[0m        [32m0.2755[0m  0.0313
      4        0.2894        0.3034  0.0162
      5        0.2835        [32m0.2558[0m  0.0159
      6        [36m0.2540[0m        0.2594  0.0160
      7        0.2626        0.2571  0.0163
      8        0.2545        [32m0.2497[0m  0.0164
      9   

      2        [36m0.2572[0m        0.3066  0.0156
      3        0.2878        [32m0.2535[0m  0.0061
      4        [36m0.2512[0m        0.2608  0.0156
      5        0.2619        [32m0.2515[0m  0.0156
      6        [36m0.2486[0m        0.2526  0.0195
      7        0.2500        [32m0.2499[0m  0.0164
      8        [36m0.2400[0m        [32m0.2413[0m  0.0156
      9        [36m0.2335[0m        [32m0.2310[0m  0.0156
     10        [36m0.2177[0m        [32m0.2234[0m  0.0165
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4231[0m        [32m0.2957[0m  0.0164
      2        [36m0.2679[0m        0.3063  0.0156
      3        0.2930        [32m0.2501[0m  0.0254
      4        [36m0.2625[0m        0.2785  0.0194
      5        0.2668        [32m0.2500[0m  0.0158
      6        [36m0.2559[0m        0.2558  0.0156
      7        [36m0.2536[0m        [32m0.2492[0m  0.0185
      8        [36

      8        [36m0.2438[0m        [32m0.2444[0m  0.0156
      9        [36m0.2399[0m        [32m0.2428[0m  0.0156
     10        [36m0.2266[0m        [32m0.2286[0m  0.0000
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6483[0m        [32m0.3893[0m  0.0158
      2        [36m0.3173[0m        [32m0.3316[0m  0.0158
      3        0.3306        [32m0.2651[0m  0.0163
      4        [36m0.2653[0m        0.2869  0.0156
      5        0.2800        [32m0.2615[0m  0.0314
      6        [36m0.2477[0m        [32m0.2521[0m  0.0179
      7        0.2604        0.2609  0.0179
      8        0.2571        [32m0.2492[0m  0.0141
      9        0.2520        0.2574  0.0239
     10        0.2539        [32m0.2490[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4265[0m        [32m0.2714[0m  0.0168
      2        [36m0.2724[0m       

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2645[0m        [32m0.2512[0m  0.0219
      2        [36m0.2494[0m        0.2577  0.0139
      3        0.2531        [32m0.2506[0m  0.0162
      4        0.2514        [32m0.2499[0m  0.0156
      5        0.2517        0.2518  0.0156
      6        0.2499        0.2499  0.0156
      7        0.2509        0.2503  0.0128
      8        0.2499        0.2503  0.0000
      9        0.2500        0.2503  0.0156
     10        0.2517        0.2514  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3880[0m        [32m0.2575[0m  0.0157
      2        [36m0.3020[0m        0.3193  0.0250
      3        [36m0.2912[0m        [32m0.2518[0m  0.0156
      4        [36m0.2558[0m        0.2717  0.0156
      5        0.2717        0.2699  0.0156
      6        0.2620        [32m0.2511[0m  0.0162
      7     

      4        [36m0.2520[0m        0.2545  0.0155
      5        [36m0.2506[0m        0.2509  0.0126
      6        0.2544        0.2503  0.0162
      7        0.2520        0.2544  0.0271
      8        0.2570        0.2512  0.0159
      9        0.2513        0.2524  0.0164
     10        0.2516        [32m0.2497[0m  0.0239
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2825[0m        [32m0.2646[0m  0.0315
      2        [36m0.2666[0m        [32m0.2499[0m  0.0244
      3        [36m0.2515[0m        0.2605  0.0162
      4        0.2541        0.2508  0.0280
      5        0.2550        0.2514  0.0207
      6        0.2516        0.2532  0.0229
      7        0.2525        0.2502  0.0080
      8        [36m0.2514[0m        0.2511  0.0315
      9        [36m0.2508[0m        0.2505  0.0166
     10        0.2519        0.2514  0.0232
  epoch    train_loss    valid_loss     dur
-------  ------------  -------

      6        0.2601        0.2690  0.0106
      7        0.2597        [32m0.2503[0m  0.0156
      8        0.2547        0.2529  0.0073
      9        0.2515        0.2517  0.0162
     10        0.2543        0.2529  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4959[0m        [32m0.4652[0m  0.0156
      2        [36m0.3710[0m        [32m0.2508[0m  0.0312
      3        [36m0.2624[0m        0.2838  0.0158
      4        0.2857        0.2552  0.0315
      5        [36m0.2513[0m        0.2605  0.0169
      6        0.2616        0.2654  0.0302
      7        0.2530        [32m0.2498[0m  0.0071
      8        0.2555        0.2540  0.0313
      9        0.2554        [32m0.2496[0m  0.0156
     10        [36m0.2510[0m        0.2553  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5131[0m        [32m0.4429[0m  0.0312
      2     

      7        0.2523        [32m0.2493[0m  0.0264
      8        [36m0.2509[0m        0.2539  0.0410
      9        [36m0.2499[0m        [32m0.2460[0m  0.0277
     10        [36m0.2449[0m        [32m0.2375[0m  0.0323
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5180[0m        [32m0.2891[0m  0.0343
      2        [36m0.2923[0m        0.3555  0.0358
      3        0.3191        [32m0.2549[0m  0.0364
      4        [36m0.2589[0m        0.2788  0.0361
      5        0.2727        [32m0.2511[0m  0.0255
      6        [36m0.2523[0m        0.2611  0.0312
      7        0.2590        0.2544  0.0312
      8        0.2563        0.2515  0.0352
      9        [36m0.2522[0m        [32m0.2500[0m  0.0226
     10        [36m0.2494[0m        0.2500  0.0171
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6266[0m        [32m0.4988[0m  0.0314
  

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.3580[0m        [32m1.0028[0m  0.0156
      2        [36m0.8848[0m        [32m0.6512[0m  0.0273
      3        [36m0.5769[0m        [32m0.4284[0m  0.0313
      4        [36m0.3879[0m        [32m0.3023[0m  0.0468
      5        [36m0.2814[0m        [32m0.2536[0m  0.0312
      6        [36m0.2511[0m        0.2552  0.0443
      7        0.2586        0.2727  0.0376
      8        0.2728        0.2784  0.0340
      9        0.2731        0.2706  0.0256
     10        0.2639        0.2591  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2568[0m        [32m0.2515[0m  0.0158
      2        [36m0.2524[0m        0.2571  0.0312
      3        0.2528        [32m0.2510[0m  0.0156
      4        [36m0.2496[0m        [32m0.2500[0m  0.0160
      5        0.2500        [32m0.2500[0m  0.0259
  

      4        0.2585        0.2635  0.0162
      5        0.2583        0.2502  0.0156
      6        [36m0.2517[0m        [32m0.2499[0m  0.0156
      7        [36m0.2496[0m        0.2518  0.0156
      8        0.2525        0.2531  0.0156
      9        0.2497        0.2502  0.0156
     10        0.2524        0.2500  0.0316
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.4378[0m        [32m0.4892[0m  0.0312
      2        [36m0.3401[0m        [32m0.2556[0m  0.0156
      3        [36m0.2880[0m        0.3480  0.0156
      4        0.3545        0.3254  0.0159
      5        0.3069        0.2643  0.0156
      6        [36m0.2603[0m        [32m0.2511[0m  0.0156
      7        [36m0.2544[0m        0.2633  0.0156
      8        0.2644        0.2671  0.0156
      9        0.2622        0.2586  0.0156
     10        [36m0.2537[0m        [32m0.2509[0m  0.0157
  epoch    train_loss    valid_loss     dur
---

      9        [36m0.2501[0m        0.2513  0.0156
     10        0.2512        0.2506  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2955[0m        [32m0.2967[0m  0.0156
      2        [36m0.2863[0m        [32m0.2565[0m  0.0156
      3        [36m0.2781[0m        0.2664  0.0156
      4        [36m0.2612[0m        0.2693  0.0308
      5        0.2682        [32m0.2500[0m  0.0157
      6        [36m0.2543[0m        0.2605  0.0162
      7        [36m0.2525[0m        0.2502  0.0248
      8        0.2609        0.2559  0.0309
      9        [36m0.2497[0m        0.2565  0.0323
     10        0.2580        0.2541  0.0238
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3044[0m        [32m0.2504[0m  0.0262
      2        [36m0.2644[0m        0.2509  0.0218
      3        [36m0.2496[0m        0.2680  0.0100
      4        0.2568        

      2        [36m0.2678[0m        [32m0.2635[0m  0.0282
      3        [36m0.2505[0m        0.2727  0.0281
      4        0.2665        [32m0.2514[0m  0.0416
      5        0.2553        0.2515  0.0390
      6        0.2538        0.2569  0.0317
      7        0.2529        [32m0.2499[0m  0.0156
      8        0.2516        0.2507  0.0430
      9        0.2512        0.2500  0.0233
     10        [36m0.2497[0m        0.2517  0.0205
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3948[0m        [32m0.2627[0m  0.0209
      2        [36m0.2827[0m        0.3036  0.0208
      3        [36m0.2739[0m        [32m0.2598[0m  0.0278
      4        [36m0.2714[0m        [32m0.2566[0m  0.0206
      5        [36m0.2541[0m        0.2629  0.0208
      6        0.2615        [32m0.2521[0m  0.0070
      7        0.2583        0.2564  0.0156
      8        [36m0.2537[0m        [32m0.2507[0m  0.0329
      9    

      4        0.2969        0.2564  0.0156
      5        [36m0.2543[0m        0.2726  0.0199
      6        0.2742        0.2538  0.0122
      7        [36m0.2483[0m        0.2590  0.0355
      8        0.2593        0.2596  0.0184
      9        0.2553        [32m0.2500[0m  0.0305
     10        0.2540        0.2527  0.0379
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4582[0m        [32m0.2505[0m  0.0276
      2        [36m0.3211[0m        0.3021  0.0309
      3        [36m0.2661[0m        0.3017  0.0242
      4        0.2931        0.2555  0.0325
      5        [36m0.2580[0m        0.2720  0.0235
      6        0.2673        [32m0.2499[0m  0.0252
      7        [36m0.2517[0m        0.2593  0.0069
      8        0.2544        0.2499  0.0113
      9        0.2531        0.2522  0.0289
     10        0.2525        0.2514  0.0160
  epoch    train_loss    valid_loss     dur
-------  ------------  -------

      9        0.2505        0.2505  0.0160
     10        0.2505        0.2499  0.0253
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3623[0m        [32m0.2758[0m  0.0286
      2        [36m0.2959[0m        0.3064  0.0157
      3        [36m0.2719[0m        [32m0.2507[0m  0.0282
      4        [36m0.2588[0m        0.2645  0.0174
      5        0.2656        0.2511  0.0363
      6        [36m0.2488[0m        0.2605  0.0177
      7        0.2587        0.2614  0.0271
      8        0.2538        [32m0.2500[0m  0.0201
      9        0.2534        0.2517  0.0273
     10        0.2521        0.2505  0.0157
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2622[0m        [32m0.2500[0m  0.0313
      2        [36m0.2518[0m        0.2519  0.0319
      3        [36m0.2495[0m        0.2512  0.0156
      4        0.2514        0.2501  0.0319
      5     

      3        [36m0.2804[0m        [32m0.2549[0m  0.0161
      4        [36m0.2546[0m        0.2635  0.0314
      5        0.2634        [32m0.2505[0m  0.0156
      6        [36m0.2541[0m        0.2609  0.0348
      7        0.2574        0.2511  0.0313
      8        [36m0.2507[0m        0.2526  0.0156
      9        0.2525        [32m0.2500[0m  0.0314
     10        [36m0.2503[0m        0.2525  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3356[0m        [32m0.3207[0m  0.0156
      2        [36m0.2753[0m        [32m0.2693[0m  0.0196
      3        0.2764        [32m0.2550[0m  0.0312
      4        [36m0.2522[0m        0.2678  0.0183
      5        0.2641        [32m0.2528[0m  0.0156
      6        0.2527        0.2546  0.0208
      7        0.2564        [32m0.2499[0m  0.0314
      8        [36m0.2498[0m        0.2510  0.0156
      9        0.2509        0.2512  0.0312
     10    

      5        0.2609        0.2502  0.0388
      6        [36m0.2516[0m        0.2516  0.0303
      7        0.2527        0.2502  0.0314
      8        [36m0.2506[0m        0.2515  0.0345
      9        [36m0.2503[0m        0.2500  0.0318
     10        0.2525        0.2507  0.0320
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4171[0m        [32m0.3772[0m  0.0341
      2        [36m0.3143[0m        [32m0.2500[0m  0.0305
      3        [36m0.2590[0m        0.2693  0.0250
      4        0.2718        0.2521  0.0284
      5        [36m0.2528[0m        0.2560  0.0290
      6        0.2542        0.2580  0.0152
      7        0.2534        0.2503  0.0370
      8        [36m0.2495[0m        [32m0.2500[0m  0.0318
      9        0.2511        [32m0.2499[0m  0.0294
     10        0.2501        0.2508  0.0315
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1     

      9        [36m0.2527[0m        0.2539  0.0157
     10        0.2567        [32m0.2500[0m  0.0323
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3360[0m        [32m0.2825[0m  0.0312
      2        [36m0.3008[0m        [32m0.2501[0m  0.0156
      3        [36m0.2653[0m        0.2871  0.0251
      4        0.2678        0.2538  0.0127
      5        [36m0.2626[0m        0.2547  0.0156
      6        [36m0.2561[0m        0.2588  0.0312
      7        0.2564        0.2514  0.0156
      8        [36m0.2475[0m        0.2522  0.0298
      9        0.2548        0.2512  0.0158
     10        0.2512        0.2516  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5727[0m        [32m0.4233[0m  0.0156
      2        [36m0.3109[0m        [32m0.2639[0m  0.0349
      3        [36m0.2894[0m        0.2831  0.0161
      4        [36m0.2664[0

      2        [36m0.2575[0m        0.2688  0.0312
      3        0.2629        0.2548  0.0314
      4        [36m0.2522[0m        [32m0.2512[0m  0.0156
      5        0.2541        0.2532  0.0258
      6        0.2534        [32m0.2499[0m  0.0156
      7        [36m0.2500[0m        0.2514  0.0156
      8        0.2511        0.2516  0.0322
      9        0.2506        0.2500  0.0312
     10        0.2506        0.2501  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4003[0m        [32m0.2658[0m  0.0156
      2        [36m0.2575[0m        [32m0.2638[0m  0.0312
      3        0.2684        0.2909  0.0156
      4        0.2801        0.2767  0.0313
      5        0.2600        [32m0.2544[0m  0.0312
      6        [36m0.2511[0m        [32m0.2510[0m  0.0313
      7        0.2557        0.2545  0.0321
      8        0.2570        0.2512  0.0319
      9        0.2518        [32m0.2505[0m  0.0162
   

      8        0.2556        0.2581  0.0280
      9        0.2534        0.2502  0.0255
     10        0.2505        0.2502  0.0351
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3497[0m        [32m0.3208[0m  0.0162
      2        [36m0.3202[0m        [32m0.2584[0m  0.0225
      3        [36m0.2599[0m        0.2811  0.0273
      4        0.2774        0.2668  0.0206
      5        [36m0.2575[0m        [32m0.2513[0m  0.0313
      6        [36m0.2547[0m        0.2568  0.0156
      7        0.2559        [32m0.2500[0m  0.0171
      8        [36m0.2500[0m        0.2525  0.0314
      9        0.2525        0.2515  0.0156
     10        0.2507        0.2501  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2549[0m        [32m0.2503[0m  0.0312
      2        [36m0.2518[0m        0.2513  0.0156
      3        [36m0.2509[0m        0.2510  0

      2        [36m0.2751[0m        [32m0.2829[0m  0.0263
      3        [36m0.2743[0m        [32m0.2503[0m  0.0158
      4        [36m0.2541[0m        0.2637  0.0157
      5        0.2597        0.2505  0.0323
      6        [36m0.2521[0m        0.2506  0.0328
      7        [36m0.2501[0m        0.2517  0.0316
      8        0.2518        0.2513  0.0156
      9        [36m0.2493[0m        0.2504  0.0414
     10        0.2514        0.2504  0.0400
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3058[0m        [32m0.3272[0m  0.0373
      2        [36m0.2903[0m        [32m0.2516[0m  0.0372
      3        [36m0.2602[0m        0.2534  0.0376
      4        [36m0.2507[0m        0.2562  0.0402
      5        0.2551        0.2542  0.0340
      6        0.2605        0.2523  0.0377
      7        0.2535        0.2559  0.0384
      8        0.2538        0.2523  0.0369
      9        [36m0.2502[0m        

      6        0.2545        0.2632  0.0320
      7        0.2588        0.2597  0.0310
      8        [36m0.2524[0m        0.2503  0.0318
      9        [36m0.2509[0m        0.2507  0.0366
     10        0.2525        0.2503  0.0256
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3457[0m        [32m0.2563[0m  0.0322
      2        [36m0.2909[0m        0.2623  0.0494
      3        [36m0.2589[0m        0.2777  0.0405
      4        0.2668        [32m0.2515[0m  0.0298
      5        0.2659        0.2711  0.0269
      6        0.2627        0.2542  0.0335
      7        [36m0.2568[0m        0.2515  0.0292
      8        [36m0.2529[0m        0.2566  0.0156
      9        0.2534        [32m0.2504[0m  0.0313
     10        0.2531        0.2529  0.0555
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3420[0m        [32m0.2503[0m  0.0344
      2     

      2        [36m0.3642[0m        [32m0.2561[0m  0.0312
      3        [36m0.3017[0m        0.3093  0.0206
      4        [36m0.2685[0m        0.2650  0.0333
      5        0.2787        0.2648  0.0347
      6        [36m0.2557[0m        [32m0.2557[0m  0.0323
      7        0.2596        0.2596  0.0281
      8        [36m0.2527[0m        [32m0.2514[0m  0.0279
      9        0.2547        0.2528  0.0162
     10        [36m0.2512[0m        [32m0.2502[0m  0.0316
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7687[0m        [32m0.4777[0m  0.0323
      2        [36m0.4194[0m        [32m0.3032[0m  0.0282
      3        [36m0.2685[0m        [32m0.2547[0m  0.0317
      4        0.2716        0.2744  0.0276
      5        0.2763        0.2564  0.0313
      6        [36m0.2560[0m        [32m0.2527[0m  0.0245
      7        [36m0.2531[0m        0.2624  0.0313
      8        0.2576        0.2579  

      7        [36m0.2503[0m        0.2560  0.0421
      8        0.2537        [32m0.2501[0m  0.0368
      9        [36m0.2500[0m        0.2504  0.0374
     10        [36m0.2499[0m        0.2503  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3203[0m        [32m0.2737[0m  0.0319
      2        [36m0.3051[0m        0.2872  0.0234
      3        [36m0.2698[0m        [32m0.2518[0m  0.0285
      4        [36m0.2614[0m        0.2729  0.0315
      5        0.2666        0.2543  0.0369
      6        [36m0.2500[0m        [32m0.2510[0m  0.0312
      7        0.2539        0.2542  0.0319
      8        0.2534        [32m0.2502[0m  0.0323
      9        [36m0.2497[0m        0.2518  0.0322
     10        0.2529        0.2531  0.0329
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6441[0m        [32m0.2681[0m  0.0327
      2        [36m

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3759[0m        [32m0.3432[0m  0.0322
      2        [36m0.3009[0m        [32m0.2501[0m  0.0318
      3        [36m0.2606[0m        0.2700  0.0331
      4        0.2629        [32m0.2499[0m  0.0318
      5        [36m0.2531[0m        0.2634  0.0313
      6        0.2584        0.2512  0.0344
      7        [36m0.2490[0m        0.2512  0.0242
      8        0.2529        0.2507  0.0397
      9        0.2508        0.2510  0.0479
     10        0.2508        0.2513  0.0406
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3597[0m        [32m0.3643[0m  0.0361
      2        [36m0.2851[0m        [32m0.2564[0m  0.0386
      3        [36m0.2780[0m        0.2741  0.0345
      4        [36m0.2689[0m        [32m0.2539[0m  0.0307
      5        [36m0.2555[0m        0.2657  0.0461
      6        [36m

      2        [36m0.2906[0m        [32m0.2689[0m  0.0312
      3        [36m0.2706[0m        [32m0.2622[0m  0.0244
      4        [36m0.2538[0m        [32m0.2561[0m  0.0232
      5        0.2577        [32m0.2500[0m  0.0313
      6        [36m0.2515[0m        0.2568  0.0313
      7        0.2525        [32m0.2500[0m  0.0156
      8        [36m0.2505[0m        0.2500  0.0156
      9        [36m0.2500[0m        0.2507  0.0364
     10        0.2502        0.2511  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3830[0m        [32m0.3949[0m  0.0312
      2        [36m0.3739[0m        [32m0.2565[0m  0.0454
      3        [36m0.2702[0m        0.3121  0.0263
      4        0.2950        [32m0.2505[0m  0.0197
      5        [36m0.2547[0m        0.2735  0.0312
      6        0.2671        0.2548  0.0213
      7        [36m0.2516[0m        0.2540  0.0313
      8        0.2559        0.2527  

      5        [36m0.2608[0m        0.2646  0.0447
      6        [36m0.2590[0m        [32m0.2500[0m  0.0199
      7        [36m0.2526[0m        0.2578  0.0313
      8        0.2557        0.2527  0.0313
      9        [36m0.2507[0m        0.2505  0.0314
     10        0.2518        0.2513  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4356[0m        [32m0.2529[0m  0.0312
      2        [36m0.2818[0m        0.2960  0.0262
      3        [36m0.2700[0m        0.2657  0.0331
      4        [36m0.2692[0m        0.2609  0.0348
      5        [36m0.2547[0m        0.2570  0.0319
      6        0.2583        [32m0.2512[0m  0.0313
      7        [36m0.2490[0m        0.2545  0.0312
      8        0.2565        0.2525  0.0156
      9        [36m0.2486[0m        0.2515  0.0322
     10        0.2546        [32m0.2511[0m  0.0329
  epoch    train_loss    valid_loss     dur
-------  ------------  ------

      7        0.2891        0.2884  0.0323
      8        0.2792        0.2708  0.0313
      9        0.2613        0.2570  0.0378
     10        0.2533        [32m0.2504[0m  0.0318
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4064[0m        [32m0.2788[0m  0.0314
      2        [36m0.2651[0m        [32m0.2540[0m  0.0320
      3        [36m0.2605[0m        0.2822  0.0313
      4        0.2770        0.2827  0.0312
      5        0.2693        0.2629  0.0313
      6        [36m0.2545[0m        [32m0.2508[0m  0.0284
      7        [36m0.2506[0m        [32m0.2508[0m  0.0314
      8        0.2536        0.2520  0.0220
      9        0.2536        [32m0.2504[0m  0.0206
     10        0.2516        [32m0.2504[0m  0.0333
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2566[0m        [32m0.2502[0m  0.0460
      2        [36m0.2527[0m        

      3        [36m0.2630[0m        0.2800  0.0363
      4        0.2704        0.2529  0.0354
      5        [36m0.2551[0m        0.2562  0.0313
      6        0.2572        0.2511  0.0367
      7        [36m0.2501[0m        0.2504  0.0375
      8        0.2516        0.2537  0.0315
      9        0.2519        0.2506  0.0409
     10        0.2521        0.2507  0.0388
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2891[0m        [32m0.2537[0m  0.0374
      2        [36m0.2618[0m        0.2620  0.0351
      3        [36m0.2573[0m        0.2595  0.0260
      4        [36m0.2568[0m        [32m0.2509[0m  0.0325
      5        [36m0.2546[0m        0.2541  0.0351
      6        0.2550        0.2510  0.0383
      7        [36m0.2509[0m        [32m0.2506[0m  0.0382
      8        [36m0.2507[0m        [32m0.2501[0m  0.0355
      9        [36m0.2502[0m        0.2503  0.0423
     10        0.2508        

      9        0.2522        0.2508  0.0514
     10        0.2513        0.2515  0.0452
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3734[0m        [32m0.3382[0m  0.0386
      2        [36m0.2753[0m        [32m0.2719[0m  0.0400
      3        0.2846        [32m0.2587[0m  0.0429
      4        [36m0.2552[0m        0.2691  0.0367
      5        0.2672        0.2608  0.0312
      6        [36m0.2538[0m        [32m0.2526[0m  0.0312
      7        0.2591        [32m0.2521[0m  0.0313
      8        [36m0.2529[0m        0.2548  0.0312
      9        0.2541        0.2550  0.0312
     10        [36m0.2505[0m        [32m0.2499[0m  0.0318
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3081[0m        [32m0.3250[0m  0.0442
      2        [36m0.3058[0m        [32m0.2512[0m  0.0347
      3        [36m0.2843[0m        0.2824  0.0227
      4    

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3868[0m        [32m0.2664[0m  0.0314
      2        [36m0.2585[0m        0.2909  0.0329
      3        0.2735        [32m0.2506[0m  0.0405
      4        [36m0.2575[0m        0.2631  0.0313
      5        0.2588        [32m0.2502[0m  0.0398
      6        [36m0.2525[0m        0.2573  0.0387
      7        0.2544        0.2509  0.0336
      8        [36m0.2509[0m        0.2511  0.0313
      9        0.2520        [32m0.2501[0m  0.0411
     10        [36m0.2497[0m        0.2504  0.0415
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5286[0m        [32m0.4531[0m  0.0515
      2        [36m0.3781[0m        [32m0.2694[0m  0.0272
      3        [36m0.2560[0m        [32m0.2638[0m  0.0400
      4        0.2767        0.2784  0.0317
      5        0.2743        [32m0.2569[0m  0.0242
      6    

      2        [36m0.2898[0m        [32m0.2503[0m  0.0442
      3        [36m0.2633[0m        0.2654  0.0377
      4        [36m0.2551[0m        0.2556  0.0355
      5        0.2578        0.2508  0.0326
      6        [36m0.2514[0m        0.2554  0.0275
      7        0.2535        [32m0.2499[0m  0.0345
      8        [36m0.2501[0m        0.2499  0.0226
      9        0.2506        [32m0.2499[0m  0.0313
     10        0.2519        0.2514  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5983[0m        [32m0.4621[0m  0.0312
      2        [36m0.3119[0m        [32m0.2736[0m  0.0312
      3        0.3165        0.2824  0.0182
      4        [36m0.2583[0m        [32m0.2730[0m  0.0312
      5        0.2814        0.2756  0.0312
      6        0.2632        [32m0.2573[0m  0.0313
      7        0.2645        [32m0.2540[0m  0.0430
      8        [36m0.2508[0m        0.2560  0.0334
      9    

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2718[0m        [32m0.2720[0m  0.0165
      2        [36m0.2576[0m        [32m0.2485[0m  0.0187
      3        [36m0.2494[0m        [32m0.2397[0m  0.0162
      4        [36m0.2379[0m        [32m0.2235[0m  0.0156
      5        [36m0.2261[0m        [32m0.2074[0m  0.0118
      6        [36m0.2196[0m        [32m0.1979[0m  0.0264
      7        [36m0.2078[0m        [32m0.1803[0m  0.0135
      8        [36m0.2031[0m        [32m0.1751[0m  0.0156
      9        [36m0.1991[0m        [32m0.1744[0m  0.0000
     10        0.1999        [32m0.1707[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4715[0m        [32m0.3860[0m  0.0000
      2        [36m0.3363[0m        [32m0.2442[0m  0.0000
      3        [36m0.2558[0m        0.2861  0.0000
      4        0.2595        [32m0.

      2        [36m0.4442[0m        [32m0.2576[0m  0.0156
      3        [36m0.2508[0m        0.2785  0.0156
      4        0.3010        0.2734  0.0156
      5        0.2631        [32m0.2348[0m  0.0162
      6        [36m0.2480[0m        0.2528  0.0000
      7        [36m0.2424[0m        [32m0.2227[0m  0.0005
      8        [36m0.2205[0m        [32m0.2042[0m  0.0167
      9        [36m0.2186[0m        [32m0.1930[0m  0.0156
     10        [36m0.2085[0m        [32m0.1806[0m  0.0163
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3420[0m        [32m0.4665[0m  0.0156
      2        [36m0.3224[0m        [32m0.3058[0m  0.0156
      3        [36m0.3020[0m        [32m0.2511[0m  0.0156
      4        [36m0.2218[0m        0.2539  0.0101
      5        0.2356        [32m0.2187[0m  0.0156
      6        [36m0.2078[0m        0.2468  0.0156
      7        [36m0.2030[0m        [32m0.2168[0m 

      6        [36m0.2117[0m        [32m0.1845[0m  0.0130
      7        [36m0.2014[0m        [32m0.1772[0m  0.0156
      8        0.2078        [32m0.1747[0m  0.0110
      9        0.2018        0.1849  0.0156
     10        0.2014        [32m0.1733[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4255[0m        [32m0.2638[0m  0.0257
      2        [36m0.2686[0m        [32m0.2334[0m  0.0000
      3        [36m0.2190[0m        [32m0.2319[0m  0.0162
      4        [36m0.2004[0m        0.2374  0.0000
      5        0.2061        [32m0.2246[0m  0.0162
      6        [36m0.1915[0m        [32m0.2165[0m  0.0156
      7        0.1932        0.2224  0.0162
      8        [36m0.1909[0m        0.2196  0.0000
      9        [36m0.1869[0m        [32m0.2113[0m  0.0170
     10        0.1874        0.2130  0.0157
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  

     10        0.2018        0.1744  0.0000
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4786[0m        [32m0.3899[0m  0.0135
      2        [36m0.3665[0m        [32m0.2402[0m  0.0100
      3        [36m0.2423[0m        0.2824  0.0163
      4        [36m0.2409[0m        [32m0.2265[0m  0.0147
      5        [36m0.2225[0m        0.2551  0.0188
      6        [36m0.2068[0m        [32m0.2146[0m  0.0010
      7        [36m0.1970[0m        0.2157  0.0212
      8        [36m0.1921[0m        0.2219  0.0156
      9        [36m0.1907[0m        0.2208  0.0156
     10        [36m0.1873[0m        [32m0.2119[0m  0.0132
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8957[0m        [32m0.7903[0m  0.0044
      2        [36m0.7532[0m        [32m0.4217[0m  0.0146
      3        [36m0.3280[0m        [32m0.2651[0m  0.0180
      4        [36

      2        [36m0.7453[0m        [32m0.3674[0m  0.0158
      3        [36m0.2785[0m        [32m0.2512[0m  0.0157
      4        [36m0.2686[0m        0.2810  0.0035
      5        [36m0.2663[0m        [32m0.2383[0m  0.0156
      6        [36m0.2240[0m        [32m0.2293[0m  0.0156
      7        [36m0.2130[0m        [32m0.2271[0m  0.0141
      8        [36m0.2008[0m        [32m0.2202[0m  0.0165
      9        [36m0.1937[0m        [32m0.2123[0m  0.0156
     10        [36m0.1839[0m        [32m0.2120[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8214[0m        [32m0.5186[0m  0.0156
      2        [36m0.5504[0m        [32m0.3698[0m  0.0000
      3        [36m0.3235[0m        [32m0.2481[0m  0.0156
      4        [36m0.2430[0m        0.2592  0.0156
      5        0.2607        0.2549  0.0156
      6        [36m0.2424[0m        [32m0.2223[0m  0.0180
      7        [3

      9        [36m0.2313[0m        [32m0.2373[0m  0.0156
     10        [36m0.2231[0m        [32m0.2279[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3607[0m        [32m0.3474[0m  0.0000
      2        [36m0.3222[0m        [32m0.2654[0m  0.0166
      3        [36m0.2566[0m        [32m0.2619[0m  0.0178
      4        0.2689        0.2629  0.0157
      5        0.2574        [32m0.2496[0m  0.0156
      6        [36m0.2515[0m        0.2570  0.0156
      7        0.2580        0.2573  0.0156
      8        0.2534        [32m0.2494[0m  0.0156
      9        [36m0.2503[0m        0.2507  0.0091
     10        0.2521        0.2497  0.0162
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6071[0m        [32m0.3090[0m  0.0167
      2        [36m0.3613[0m        0.3820  0.0005
      3        [36m0.3143[0m        [32m0.2504[0m  

      7        [36m0.2274[0m        [32m0.2150[0m  0.0205
      8        [36m0.2163[0m        [32m0.1878[0m  0.0000
      9        [36m0.1976[0m        [32m0.1664[0m  0.0156
     10        [36m0.1912[0m        [32m0.1610[0m  0.0000
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6998[0m        [32m0.6047[0m  0.0162
      2        [36m0.4192[0m        [32m0.2511[0m  0.0167
      3        [36m0.3055[0m        0.3252  0.0156
      4        0.3057        0.2519  0.0101
      5        [36m0.2557[0m        0.2918  0.0258
      6        0.2767        0.2585  0.0156
      7        [36m0.2521[0m        0.2523  0.0063
      8        0.2601        [32m0.2480[0m  0.0233
      9        [36m0.2464[0m        [32m0.2468[0m  0.0083
     10        0.2466        0.2474  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3830[0m        [32m0.4

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4151[0m        [32m0.2534[0m  0.0092
      2        [36m0.2915[0m        0.2579  0.0195
      3        [36m0.2716[0m        0.2893  0.0138
      4        [36m0.2645[0m        0.2537  0.0157
      5        [36m0.2592[0m        [32m0.2522[0m  0.0157
      6        [36m0.2519[0m        0.2566  0.0163
      7        0.2550        0.2540  0.0128
      8        [36m0.2504[0m        [32m0.2509[0m  0.0156
      9        0.2528        [32m0.2492[0m  0.0092
     10        0.2509        0.2522  0.0157
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5533[0m        [32m0.2813[0m  0.0000
      2        [36m0.2813[0m        0.3314  0.0156
      3        0.2955        [32m0.2531[0m  0.0156
      4        [36m0.2704[0m        0.2614  0.0159
      5        [36m0.2563[0m        0.2641  0.0158
      6    

      5        [36m0.2497[0m        0.2499  0.0156
      6        0.2523        [32m0.2461[0m  0.0252
      7        [36m0.2460[0m        0.2474  0.0042
      8        [36m0.2444[0m        [32m0.2407[0m  0.0156
      9        [36m0.2374[0m        [32m0.2258[0m  0.0156
     10        [36m0.2269[0m        [32m0.2048[0m  0.0257
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5120[0m        [32m0.2549[0m  0.0258
      2        [36m0.2877[0m        0.3022  0.0071
      3        [36m0.2772[0m        [32m0.2493[0m  0.0168
      4        [36m0.2500[0m        [32m0.2410[0m  0.0156
      5        [36m0.2237[0m        [32m0.2219[0m  0.0156
      6        [36m0.1981[0m        [32m0.2141[0m  0.0120
      7        [36m0.1850[0m        [32m0.2125[0m  0.0160
      8        [36m0.1812[0m        [32m0.2104[0m  0.0156
      9        [36m0.1780[0m        0.2141  0.0101
     10        [36m0.1772

      8        [36m0.1971[0m        0.1779  0.0156
      9        0.1993        [32m0.1710[0m  0.0158
     10        [36m0.1935[0m        [32m0.1682[0m  0.0100
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.0112[0m        [32m0.4287[0m  0.0095
      2        [36m0.3129[0m        [32m0.2907[0m  0.0162
      3        0.3613        0.4157  0.0156
      4        0.3873        0.3022  0.0156
      5        [36m0.2765[0m        [32m0.2522[0m  0.0156
      6        [36m0.2611[0m        0.2888  0.0156
      7        0.2887        0.2846  0.0156
      8        0.2694        0.2539  0.0084
      9        [36m0.2507[0m        0.2538  0.0163
     10        0.2603        0.2601  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8551[0m        [32m0.3210[0m  0.0156
      2        [36m0.2665[0m        0.3442  0.0000
      3        0.3904        

     10        [36m0.2204[0m        [32m0.2190[0m  0.0184
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6885[0m        [32m0.3294[0m  0.0156
      2        [36m0.3743[0m        0.3445  0.0145
      3        [36m0.2874[0m        [32m0.2514[0m  0.0156
      4        [36m0.2641[0m        0.2844  0.0320
      5        0.2847        0.2644  0.0159
      6        [36m0.2587[0m        0.2523  0.0156
      7        [36m0.2585[0m        0.2652  0.0156
      8        0.2590        0.2533  0.0156
      9        [36m0.2534[0m        0.2521  0.0156
     10        0.2559        0.2533  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3036[0m        [32m0.2557[0m  0.0156
      2        [36m0.2624[0m        [32m0.2505[0m  0.0156
      3        [36m0.2622[0m        0.2606  0.0156
      4        0.2628        0.2562  0.0156
      5        [36m

      6        [36m0.2347[0m        [32m0.2171[0m  0.0125
      7        [36m0.2171[0m        [32m0.1891[0m  0.0156
      8        [36m0.1961[0m        [32m0.1669[0m  0.0000
      9        [36m0.1875[0m        [32m0.1583[0m  0.0156
     10        [36m0.1853[0m        0.1631  0.0018
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4688[0m        [32m0.4002[0m  0.0156
      2        [36m0.2969[0m        [32m0.2600[0m  0.0156
      3        [36m0.2837[0m        0.2858  0.0156
      4        [36m0.2820[0m        [32m0.2531[0m  0.0168
      5        [36m0.2539[0m        0.2606  0.0156
      6        0.2590        0.2638  0.0156
      7        0.2565        [32m0.2510[0m  0.0133
      8        [36m0.2494[0m        [32m0.2499[0m  0.0156
      9        0.2512        [32m0.2499[0m  0.0313
     10        0.2505        [32m0.2496[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  -

      2        [36m0.3224[0m        0.3354  0.0156
      3        [36m0.2982[0m        0.2692  0.0000
      4        [36m0.2836[0m        0.2615  0.0094
      5        [36m0.2511[0m        0.2570  0.0170
      6        0.2601        [32m0.2426[0m  0.0160
      7        [36m0.2416[0m        [32m0.2373[0m  0.0246
      8        [36m0.2308[0m        [32m0.2063[0m  0.0086
      9        [36m0.2101[0m        [32m0.1813[0m  0.0254
     10        [36m0.1986[0m        [32m0.1642[0m  0.0044
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5727[0m        [32m0.2939[0m  0.0162
      2        [36m0.2885[0m        0.3155  0.0179
      3        0.3064        [32m0.2786[0m  0.0080
      4        [36m0.2626[0m        [32m0.2526[0m  0.0167
      5        [36m0.2575[0m        0.2604  0.0156
      6        [36m0.2571[0m        [32m0.2500[0m  0.0027
      7        [36m0.2496[0m        0.2527  0.0176
 

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.9063[0m        [32m0.3567[0m  0.0186
      2        [36m0.3206[0m        0.4034  0.0196
      3        0.3698        [32m0.2781[0m  0.0190
      4        [36m0.2681[0m        0.2785  0.0000
      5        0.2710        [32m0.2511[0m  0.0278
      6        [36m0.2487[0m        0.2557  0.0025
      7        0.2552        0.2551  0.0156
      8        0.2527        [32m0.2500[0m  0.0262
      9        0.2521        0.2511  0.0156
     10        0.2510        0.2501  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6459[0m        [32m0.2740[0m  0.0158
      2        [36m0.3308[0m        [32m0.2623[0m  0.0156
      3        [36m0.2987[0m        0.2853  0.0000
      4        [36m0.2578[0m        0.2783  0.0031
      5        0.2662        [32m0.2384[0m  0.0156
      6        [36m0.2504[0

      2        [36m0.2484[0m        0.2569  0.0156
      3        0.2554        [32m0.2515[0m  0.0156
      4        0.2530        0.2517  0.0175
      5        0.2506        0.2517  0.0181
      6        0.2516        [32m0.2507[0m  0.0131
      7        0.2522        0.2511  0.0156
      8        0.2498        [32m0.2502[0m  0.0156
      9        0.2538        0.2502  0.0156
     10        0.2548        0.2546  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3759[0m        [32m0.3216[0m  0.0156
      2        [36m0.3163[0m        [32m0.2891[0m  0.0156
      3        [36m0.2669[0m        [32m0.2539[0m  0.0198
      4        0.2671        0.2633  0.0159
      5        [36m0.2618[0m        [32m0.2508[0m  0.0156
      6        [36m0.2521[0m        0.2619  0.0067
      7        0.2566        0.2554  0.0156
      8        [36m0.2489[0m        [32m0.2503[0m  0.0190
      9        0.2529        

      5        0.2558        0.2505  0.0156
      6        [36m0.2544[0m        0.2562  0.0179
      7        [36m0.2516[0m        [32m0.2500[0m  0.0323
      8        0.2517        [32m0.2499[0m  0.0156
      9        [36m0.2493[0m        0.2547  0.0156
     10        0.2518        0.2504  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3688[0m        [32m0.2738[0m  0.0161
      2        [36m0.2707[0m        0.2917  0.0116
      3        [36m0.2699[0m        [32m0.2528[0m  0.0257
      4        [36m0.2578[0m        0.2548  0.0000
      5        [36m0.2519[0m        0.2539  0.0000
      6        0.2543        [32m0.2520[0m  0.0100
      7        [36m0.2497[0m        [32m0.2514[0m  0.0158
      8        0.2526        [32m0.2502[0m  0.0159
      9        0.2502        0.2542  0.0157
     10        0.2517        [32m0.2496[0m  0.0229
  epoch    train_loss    valid_loss     dur
-------  --

      5        [36m0.2586[0m        0.2688  0.0156
      6        0.2649        0.2586  0.0196
      7        [36m0.2516[0m        0.2509  0.0156
      8        0.2552        0.2560  0.0090
      9        0.2545        0.2503  0.0157
     10        [36m0.2493[0m        0.2527  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5134[0m        [32m0.2944[0m  0.0156
      2        [36m0.2693[0m        0.3265  0.0156
      3        0.2961        [32m0.2516[0m  0.0156
      4        [36m0.2659[0m        0.2813  0.0156
      5        [36m0.2609[0m        0.2560  0.0156
      6        0.2640        0.2548  0.0156
      7        [36m0.2506[0m        0.2584  0.0174
      8        0.2573        0.2521  0.0156
      9        0.2528        0.2532  0.0157
     10        0.2533        [32m0.2503[0m  0.0157
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0

      9        [36m0.2511[0m        [32m0.2512[0m  0.0156
     10        0.2529        [32m0.2495[0m  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5797[0m        [32m0.4897[0m  0.0156
      2        [36m0.4204[0m        [32m0.2509[0m  0.0328
      3        [36m0.2818[0m        0.3077  0.0188
      4        0.3011        0.2569  0.0156
      5        [36m0.2535[0m        0.2647  0.0167
      6        0.2651        0.2691  0.0157
      7        0.2556        [32m0.2504[0m  0.0156
      8        [36m0.2519[0m        0.2525  0.0156
      9        0.2555        0.2513  0.0156
     10        0.2526        0.2504  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7432[0m        [32m0.2725[0m  0.0156
      2        [36m0.2813[0m        0.3362  0.0155
      3        0.2945        [32m0.2544[0m  0.0163
      4        [36m0.2714[0

     10        [36m0.2504[0m        [32m0.2500[0m  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7225[0m        [32m0.2622[0m  0.0156
      2        [36m0.2898[0m        0.3400  0.0156
      3        0.3186        0.2677  0.0283
      4        [36m0.2667[0m        0.2673  0.0156
      5        [36m0.2624[0m        [32m0.2502[0m  0.0156
      6        [36m0.2517[0m        0.2534  0.0125
      7        0.2522        0.2512  0.0034
      8        [36m0.2499[0m        [32m0.2500[0m  0.0159
      9        0.2506        0.2506  0.0156
     10        0.2508        0.2500  0.0165
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.7943[0m        [32m0.6564[0m  0.0165
      2        [36m0.4953[0m        [32m0.2591[0m  0.0156
      3        [36m0.3007[0m        0.3313  0.0156
      4        0.3197        0.2683  0.0193
      5        [36m

      5        [36m0.2541[0m        0.2681  0.0156
      6        0.2663        0.2580  0.0156
      7        [36m0.2496[0m        [32m0.2512[0m  0.0156
      8        0.2550        0.2552  0.0156
      9        0.2535        [32m0.2501[0m  0.0162
     10        0.2513        0.2547  0.0197
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2962[0m        [32m0.3051[0m  0.0176
      2        [36m0.2882[0m        [32m0.2620[0m  0.0314
      3        [36m0.2736[0m        0.2658  0.0156
      4        [36m0.2553[0m        [32m0.2601[0m  0.0156
      5        0.2640        [32m0.2500[0m  0.0141
      6        [36m0.2532[0m        0.2678  0.0162
      7        0.2592        0.2502  0.0157
      8        [36m0.2506[0m        0.2514  0.0283
      9        0.2527        0.2503  0.0156
     10        [36m0.2502[0m        0.2577  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------

     10        0.2505        0.2503  0.0268
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3346[0m        [32m0.2530[0m  0.0160
      2        [36m0.2601[0m        0.2557  0.0191
      3        [36m0.2532[0m        0.2574  0.0156
      4        0.2551        [32m0.2516[0m  0.0156
      5        [36m0.2527[0m        0.2518  0.0126
      6        0.2541        0.2523  0.0314
      7        [36m0.2506[0m        [32m0.2509[0m  0.0156
      8        [36m0.2506[0m        0.2523  0.0156
      9        0.2512        [32m0.2501[0m  0.0156
     10        0.2515        [32m0.2500[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3674[0m        [32m0.3001[0m  0.0159
      2        [36m0.2977[0m        [32m0.2501[0m  0.0156
      3        [36m0.2701[0m        0.2758  0.0156
      4        [36m0.2546[0m        0.2607  0.0156
      5    

      2        [36m0.3062[0m        0.2744  0.0263
      3        [36m0.2613[0m        0.2861  0.0273
      4        0.2718        [32m0.2502[0m  0.0276
      5        [36m0.2510[0m        0.2602  0.0219
      6        0.2600        0.2502  0.0235
      7        0.2510        0.2540  0.0191
      8        0.2527        0.2509  0.0264
      9        [36m0.2503[0m        0.2505  0.0156
     10        0.2537        0.2504  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6658[0m        [32m0.6439[0m  0.0038
      2        [36m0.5837[0m        [32m0.2733[0m  0.0156
      3        [36m0.2818[0m        0.3440  0.0156
      4        0.3100        [32m0.2615[0m  0.0173
      5        [36m0.2500[0m        [32m0.2558[0m  0.0020
      6        0.2642        0.2617  0.0324
      7        0.2638        [32m0.2523[0m  0.0156
      8        0.2538        [32m0.2515[0m  0.0156
      9        0.2525        

      5        [36m0.2675[0m        [32m0.2510[0m  0.0177
      6        [36m0.2609[0m        0.2602  0.0203
      7        [36m0.2508[0m        0.2550  0.0218
      8        0.2572        0.2512  0.0179
      9        0.2565        0.2556  0.0157
     10        [36m0.2485[0m        0.2531  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6442[0m        [32m0.2541[0m  0.0156
      2        [36m0.2873[0m        0.3325  0.0156
      3        0.2932        0.2685  0.0156
      4        [36m0.2689[0m        [32m0.2502[0m  0.0314
      5        [36m0.2528[0m        0.2610  0.0156
      6        0.2561        [32m0.2502[0m  0.0156
      7        0.2550        0.2568  0.0156
      8        0.2537        0.2516  0.0156
      9        [36m0.2527[0m        0.2529  0.0156
     10        [36m0.2515[0m        [32m0.2501[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------

      8        [36m0.2499[0m        [32m0.2507[0m  0.0264
      9        0.2516        0.2525  0.0177
     10        0.2533        0.2517  0.0219
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2906[0m        [32m0.3109[0m  0.0156
      2        [36m0.2734[0m        [32m0.2536[0m  0.0156
      3        [36m0.2665[0m        0.2653  0.0312
      4        [36m0.2575[0m        [32m0.2532[0m  0.0156
      5        0.2584        0.2605  0.0313
      6        [36m0.2562[0m        [32m0.2505[0m  0.0156
      7        [36m0.2538[0m        0.2517  0.0156
      8        [36m0.2502[0m        0.2511  0.0156
      9        0.2522        0.2534  0.0156
     10        0.2518        [32m0.2501[0m  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2673[0m        [32m0.2761[0m  0.0313
      2        [36m0.2581[0m        [32m0.2628[0m  0.0156
  

      2        [36m0.3544[0m        [32m0.2556[0m  0.0156
      3        [36m0.2671[0m        0.2815  0.0370
      4        0.2860        0.2582  0.0193
      5        [36m0.2569[0m        [32m0.2548[0m  0.0158
      6        0.2571        0.2647  0.0313
      7        [36m0.2556[0m        [32m0.2516[0m  0.0270
      8        [36m0.2515[0m        [32m0.2513[0m  0.0156
      9        0.2533        [32m0.2501[0m  0.0156
     10        [36m0.2498[0m        0.2507  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3124[0m        [32m0.2514[0m  0.0156
      2        [36m0.2746[0m        0.2559  0.0318
      3        [36m0.2573[0m        0.2764  0.0237
      4        0.2649        [32m0.2500[0m  0.0313
      5        [36m0.2559[0m        0.2562  0.0156
      6        [36m0.2528[0m        0.2508  0.0156
      7        0.2536        0.2560  0.0312
      8        [36m0.2524[0m        0.2502  

      5        0.2709        [32m0.2547[0m  0.0156
      6        [36m0.2548[0m        0.2591  0.0312
      7        0.2576        [32m0.2501[0m  0.0162
      8        [36m0.2533[0m        0.2565  0.0321
      9        [36m0.2526[0m        0.2502  0.0286
     10        [36m0.2515[0m        0.2509  0.0296
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6205[0m        [32m0.3285[0m  0.0279
      2        [36m0.2743[0m        0.3474  0.0158
      3        0.3284        [32m0.2513[0m  0.0156
      4        [36m0.2610[0m        0.3047  0.0286
      5        0.2827        [32m0.2508[0m  0.0157
      6        0.2610        0.2622  0.0156
      7        [36m0.2583[0m        0.2523  0.0315
      8        [36m0.2534[0m        0.2557  0.0156
      9        [36m0.2513[0m        [32m0.2503[0m  0.0156
     10        0.2535        0.2515  0.0207
  epoch    train_loss    valid_loss     dur
-------  -----------

      7        0.2567        0.2517  0.0238
      8        [36m0.2511[0m        0.2503  0.0160
      9        0.2529        0.2537  0.0317
     10        0.2521        0.2512  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6045[0m        [32m0.2499[0m  0.0312
      2        [36m0.2893[0m        0.2814  0.0317
      3        [36m0.2686[0m        0.3072  0.0156
      4        0.2724        0.2568  0.0312
      5        [36m0.2681[0m        0.2516  0.0156
      6        [36m0.2553[0m        0.2735  0.0156
      7        0.2588        0.2517  0.0304
      8        [36m0.2548[0m        [32m0.2499[0m  0.0230
      9        [36m0.2528[0m        0.2569  0.0156
     10        [36m0.2519[0m        0.2504  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6059[0m        [32m0.2501[0m  0.0194
      2        [36m0.2941[0m        0.3019  0

      8        0.2518        0.2590  0.0162
      9        0.2538        0.2515  0.0156
     10        [36m0.2500[0m        0.2510  0.0368
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8610[0m        [32m0.6938[0m  0.0158
      2        [36m0.6074[0m        [32m0.2759[0m  0.0317
      3        [36m0.2772[0m        0.3359  0.0156
      4        0.3065        [32m0.2518[0m  0.0312
      5        [36m0.2507[0m        0.2637  0.0157
      6        0.2652        0.2679  0.0156
      7        0.2616        0.2569  0.0156
      8        0.2551        [32m0.2500[0m  0.0312
      9        0.2523        0.2526  0.0156
     10        0.2533        0.2512  0.0178
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6244[0m        [32m0.3164[0m  0.0157
      2        [36m0.3514[0m        [32m0.2506[0m  0.0313
      3        [36m0.2761[0m        0.2849  0

      3        [36m0.2591[0m        0.2801  0.0165
      4        0.2781        0.2567  0.0313
      5        [36m0.2545[0m        0.2574  0.0335
      6        0.2598        0.2597  0.0156
      7        [36m0.2541[0m        [32m0.2499[0m  0.0312
      8        [36m0.2519[0m        0.2528  0.0313
      9        0.2530        0.2501  0.0164
     10        [36m0.2499[0m        0.2515  0.0339
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3751[0m        [32m0.4003[0m  0.0156
      2        [36m0.3265[0m        [32m0.2502[0m  0.0156
      3        [36m0.2558[0m        0.2793  0.0312
      4        0.2793        0.2528  0.0312
      5        [36m0.2530[0m        0.2695  0.0156
      6        0.2649        0.2626  0.0313
      7        0.2554        [32m0.2500[0m  0.0276
      8        [36m0.2526[0m        0.2514  0.0221
      9        [36m0.2515[0m        0.2502  0.0313
     10        [36m0.2507[0

      9        0.2524        0.2565  0.0156
     10        0.2520        0.2502  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4333[0m        [32m0.2697[0m  0.0312
      2        [36m0.2690[0m        0.3054  0.0156
      3        0.2901        [32m0.2577[0m  0.0156
      4        [36m0.2478[0m        0.2631  0.0254
      5        0.2655        [32m0.2548[0m  0.0157
      6        0.2528        [32m0.2543[0m  0.0156
      7        0.2548        0.2560  0.0156
      8        0.2523        [32m0.2500[0m  0.0316
      9        0.2520        0.2520  0.0258
     10        0.2517        [32m0.2500[0m  0.0100
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.8555[0m        [32m0.3745[0m  0.0312
      2        [36m0.3684[0m        [32m0.2893[0m  0.0156
      3        [36m0.2660[0m        [32m0.2591[0m  0.0156
      4        0.2686        

      2        [36m0.4231[0m        [32m0.2517[0m  0.0156
      3        [36m0.2740[0m        0.2936  0.0312
      4        [36m0.2657[0m        0.2541  0.0312
      5        [36m0.2618[0m        0.2621  0.0312
      6        [36m0.2578[0m        [32m0.2500[0m  0.0312
      7        [36m0.2545[0m        0.2595  0.0156
      8        [36m0.2542[0m        0.2502  0.0312
      9        [36m0.2509[0m        0.2542  0.0313
     10        0.2561        0.2503  0.0253
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6565[0m        [32m0.6209[0m  0.0156
      2        [36m0.5877[0m        [32m0.2913[0m  0.0490
      3        [36m0.2646[0m        0.3217  0.0351
      4        0.3125        [32m0.2740[0m  0.0278
      5        [36m0.2608[0m        [32m0.2569[0m  0.0374
      6        0.2658        0.2616  0.0275
      7        0.2624        [32m0.2506[0m  0.0239
      8        [36m0.2514[0m       

      2        [36m0.2949[0m        [32m0.2499[0m  0.0428
      3        [36m0.2619[0m        0.2731  0.0400
      4        [36m0.2545[0m        0.2618  0.0295
      5        0.2619        0.2531  0.0329
      6        [36m0.2545[0m        0.2527  0.0267
      7        [36m0.2500[0m        0.2510  0.0298
      8        0.2514        0.2527  0.0317
      9        0.2532        0.2518  0.0281
     10        0.2528        0.2500  0.0249
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.0396[0m        [32m1.0821[0m  0.0329
      2        [36m0.8973[0m        [32m0.3409[0m  0.0279
      3        [36m0.2898[0m        [32m0.3020[0m  0.0288
      4        0.3026        [32m0.2635[0m  0.0263
      5        [36m0.2553[0m        [32m0.2527[0m  0.0295
      6        [36m0.2552[0m        0.2596  0.0481
      7        0.2573        0.2564  0.0373
      8        [36m0.2532[0m        [32m0.2512[0m  0.0182
  

      3        [36m0.2729[0m        0.3180  0.0491
      4        0.3321        0.3229  0.0397
      5        0.3098        0.2788  0.0353
      6        [36m0.2674[0m        0.2515  0.0326
      7        [36m0.2526[0m        0.2541  0.0313
      8        0.2558        0.2618  0.0312
      9        0.2603        0.2604  0.0369
     10        0.2571        0.2538  0.0314
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.2614[0m        [32m0.2504[0m  0.0168
      2        0.2658        0.2529  0.0391
      3        [36m0.2517[0m        0.2585  0.0319
      4        0.2547        0.2513  0.0318
      5        0.2550        0.2557  0.0318
      6        0.2520        0.2527  0.0319
      7        0.2537        [32m0.2500[0m  0.0328
      8        0.2529        0.2545  0.0313
      9        [36m0.2509[0m        0.2501  0.0314
     10        0.2513        0.2506  0.0328
  epoch    train_loss    valid_loss     dur
----

      8        0.2519        [32m0.2500[0m  0.0440
      9        [36m0.2497[0m        0.2523  0.0378
     10        0.2515        0.2504  0.0405
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3224[0m        [32m0.3603[0m  0.0238
      2        [36m0.2882[0m        [32m0.2680[0m  0.0437
      3        [36m0.2835[0m        [32m0.2560[0m  0.0236
      4        [36m0.2601[0m        0.2756  0.0319
      5        0.2621        [32m0.2505[0m  0.0310
      6        [36m0.2532[0m        0.2534  0.0318
      7        0.2544        [32m0.2503[0m  0.0353
      8        [36m0.2506[0m        0.2560  0.0157
      9        0.2524        0.2514  0.0403
     10        0.2520        0.2503  0.0175
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3440[0m        [32m0.3767[0m  0.0315
      2        [36m0.3135[0m        [32m0.2531[0m  0.0319
      3    

     10        [36m0.2542[0m        0.2517  0.0304
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5207[0m        [32m0.5464[0m  0.0317
      2        [36m0.4019[0m        [32m0.2736[0m  0.0327
      3        [36m0.3040[0m        0.2988  0.0313
      4        [36m0.2671[0m        [32m0.2707[0m  0.0331
      5        0.2799        [32m0.2552[0m  0.0208
      6        [36m0.2646[0m        0.2766  0.0422
      7        0.2656        [32m0.2501[0m  0.0418
      8        [36m0.2547[0m        0.2545  0.0298
      9        [36m0.2522[0m        0.2519  0.0355
     10        0.2524        0.2526  0.0284
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4285[0m        [32m0.2861[0m  0.0318
      2        [36m0.2763[0m        [32m0.2846[0m  0.0318
      3        [36m0.2730[0m        [32m0.2536[0m  0.0200
      4        [36m0.2609[0m       

      2        [36m0.3287[0m        [32m0.2501[0m  0.0509
      3        [36m0.2801[0m        0.2893  0.0339
      4        [36m0.2620[0m        0.2718  0.0442
      5        0.2732        0.2517  0.0255
      6        [36m0.2548[0m        0.2663  0.0341
      7        0.2596        [32m0.2500[0m  0.0430
      8        0.2607        0.2579  0.0446
      9        0.2559        0.2556  0.0201
     10        0.2565        0.2510  0.0477
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5407[0m        [32m0.3703[0m  0.0243
      2        [36m0.3721[0m        [32m0.2499[0m  0.0409
      3        [36m0.2694[0m        0.2990  0.0313
      4        [36m0.2642[0m        0.2593  0.0400
      5        0.2681        0.2524  0.0313
      6        [36m0.2502[0m        0.2654  0.0312
      7        0.2591        0.2513  0.0210
      8        0.2531        0.2533  0.0319
      9        0.2534        0.2517  0.0161
   

      4        [36m0.2628[0m        0.2607  0.0313
      5        [36m0.2598[0m        0.2533  0.0286
      6        [36m0.2567[0m        0.2524  0.0472
      7        [36m0.2544[0m        0.2514  0.0197
      8        [36m0.2506[0m        0.2570  0.0318
      9        0.2559        [32m0.2499[0m  0.0312
     10        0.2527        0.2505  0.0313
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m1.1311[0m        [32m0.8455[0m  0.0156
      2        [36m0.7345[0m        [32m0.3898[0m  0.0312
      3        [36m0.3209[0m        [32m0.2531[0m  0.0281
      4        [36m0.2537[0m        0.2793  0.0312
      5        0.2829        0.2641  0.0325
      6        0.2584        [32m0.2505[0m  0.0397
      7        [36m0.2514[0m        0.2570  0.0471
      8        0.2566        0.2556  0.0388
      9        0.2523        [32m0.2503[0m  0.0483
     10        [36m0.2507[0m        0.2510  0.0307
  epoch    

      8        0.2587        [32m0.2509[0m  0.0156
      9        0.2552        0.2596  0.0313
     10        0.2554        0.2515  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3679[0m        [32m0.3718[0m  0.0313
      2        [36m0.3219[0m        [32m0.2505[0m  0.0312
      3        [36m0.2542[0m        0.2771  0.0156
      4        0.2750        0.2556  0.0469
      5        [36m0.2521[0m        0.2558  0.0272
      6        0.2585        0.2611  0.0174
      7        0.2556        [32m0.2503[0m  0.0367
      8        [36m0.2503[0m        0.2515  0.0325
      9        0.2542        0.2511  0.0312
     10        0.2529        0.2503  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.9925[0m        [32m0.2694[0m  0.0322
      2        [36m0.2595[0m        0.3071  0.0321
      3        0.3106        0.3199  0.0312
      4     

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.3403[0m        [32m0.2924[0m  0.0313
      2        [36m0.2995[0m        [32m0.2502[0m  0.0312
      3        [36m0.2719[0m        0.2779  0.0157
      4        [36m0.2528[0m        0.2596  0.0331
      5        0.2679        0.2521  0.0391
      6        [36m0.2496[0m        0.2642  0.0274
      7        0.2575        0.2518  0.0403
      8        0.2540        0.2533  0.0398
      9        0.2526        0.2517  0.0343
     10        0.2535        0.2553  0.0440
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.4910[0m        [32m0.2750[0m  0.0269
      2        [36m0.2697[0m        0.3349  0.0326
      3        0.2973        [32m0.2569[0m  0.0313
      4        [36m0.2695[0m        0.2571  0.0227
      5        0.2711        0.2694  0.0156
      6        [36m0.2573[0m        [32m0.2515[0m  0

      2        [36m0.2614[0m        0.2750  0.0262
      3        0.2647        0.2628  0.0255
      4        [36m0.2584[0m        0.2517  0.0312
      5        [36m0.2555[0m        0.2534  0.0323
      6        [36m0.2515[0m        0.2530  0.0323
      7        0.2522        0.2506  0.0312
      8        0.2529        0.2507  0.0312
      9        [36m0.2499[0m        0.2507  0.0312
     10        0.2516        0.2506  0.0156
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5439[0m        [32m0.2527[0m  0.0338
      2        [36m0.2798[0m        0.3121  0.0260
      3        [36m0.2785[0m        0.2822  0.0312
      4        0.2878        [32m0.2520[0m  0.0290
      5        [36m0.2712[0m        0.2790  0.0156
      6        [36m0.2668[0m        0.2539  0.0312
      7        [36m0.2610[0m        0.2603  0.0312
      8        [36m0.2512[0m        0.2535  0.0312
      9        0.2588        0.2521  0

      8        0.2559        0.2523  0.0395
      9        [36m0.2512[0m        0.2501  0.0315
     10        [36m0.2507[0m        0.2503  0.0407
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5815[0m        [32m0.3113[0m  0.0324
      2        [36m0.3632[0m        [32m0.2873[0m  0.0297
      3        [36m0.2739[0m        [32m0.2682[0m  0.0268
      4        [36m0.2635[0m        [32m0.2581[0m  0.0271
      5        [36m0.2521[0m        [32m0.2500[0m  0.0312
      6        [36m0.2516[0m        0.2506  0.0293
      7        [36m0.2516[0m        0.2500  0.0313
      8        [36m0.2504[0m        0.2527  0.0313
      9        0.2509        0.2516  0.0320
     10        [36m0.2499[0m        [32m0.2500[0m  0.0312
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.6253[0m        [32m0.5358[0m  0.0312
      2        [36m0.4861[0m       