In [4]:
!pip install torch numpy datasets scikit-learn pyplot

[31mERROR: Could not find a version that satisfies the requirement pyplot (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pyplot[0m[31m
[0m

# Более оптимизированный метод

In [3]:
import json
import numpy as np
import torch
import torch.nn as nn
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
import time
from tqdm import tqdm
import pickle
import os

# 1. Предобработка данных
def card_to_vector(card):
    """Преобразует карту (например, '14S') в вектор."""
    if not card or not isinstance(card, str):
        return np.zeros(5)
    try:
        nominal = int(card[:-1]) - 9  # 9->0, 10->1, ..., 14->5
        if nominal < 0 or nominal > 5:
            return np.zeros(5)
        suit = {'S': 0, 'C': 1, 'D': 2, 'H': 3}[card[-1]]
        vector = np.zeros(5)
        vector[0] = nominal
        vector[1 + suit] = 1
        return vector
    except (ValueError, KeyError):
        return np.zeros(5)

def state_to_vector(state):
    """Кодирует состояние игрока."""
    states = ['attack', 'defend', 'bat', 'pass', 'take']
    vector = np.zeros(len(states))
    if state in states:
        vector[states.index(state)] = 1
    return vector

def snapshot_to_input(parsed_data):
    """Преобразует предварительно разобранный снимок в вектор состояния."""
    try:
        # Проверяем наличие необходимых ключей
        if not isinstance(parsed_data, dict) or 'players' not in parsed_data or not parsed_data['players'] or 'trump' not in parsed_data:
            return None

        # Рука игрока
        player = next((p for p in parsed_data['players'] if p['id'] == parsed_data['players'][0]['id']), None)
        if player is None or 'hand' not in player:
            return None
        hand = np.zeros((24, 5))
        for i, card in enumerate(player.get('hand', [])[:24]):
            hand[i] = card_to_vector(card)

        # Стол
        table = np.zeros((6, 2, 5))  # 6 пар, атака+защита
        for i, pair in enumerate(parsed_data.get('table', [])[:6]):
            table[i, 0] = card_to_vector(pair.get('attack_card', {}).get('card', ''))
            table[i, 1] = card_to_vector(pair.get('defend_card', {}).get('card', ''))

        # Козырь
        trump = np.zeros(4)
        trump_suit = parsed_data.get('trump', 'S')[-1] if isinstance(parsed_data.get('trump'), str) else 'S'
        trump[{'S': 0, 'C': 1, 'D': 2, 'H': 3}.get(trump_suit, 0)] = 1

        # Состояние игрока
        state = state_to_vector(player.get('state', 'pass'))

        # Тип игры
        game_type = np.array([parsed_data.get('game_rules', {}).get('game_type', 0)])

        return np.concatenate([hand.flatten(), table.flatten(), trump, state, game_type])
    except Exception:
        return None

def extract_action(prev_data, curr_data):
    """Определяет действие между снимками."""
    try:
        player_id = prev_data['players'][0]['id']
        prev_table = prev_data.get('table', [])
        curr_table = curr_data.get('table', [])

        # Проверяем изменения на столе
        if len(curr_table) > len(prev_table):
            # Новая карта атаки
            new_pair = curr_table[-1]
            if (new_pair.get('attack_card', {}).get('user_id') == player_id and
                'card' in new_pair.get('attack_card', {})):
                return {'type': 'attack', 'move': new_pair['attack_card']['card']}
        elif any('defend_card' in pair for pair in curr_table) and not any('defend_card' in pair for pair in prev_table):
            # Новая карта защиты
            for pair in curr_table:
                if (pair.get('defend_card', {}).get('user_id') == player_id and
                    'card' in pair.get('attack_card', {}) and
                    'card' in pair.get('defend_card', {})):
                    return {
                        'type': 'defend',
                        'move': [pair['attack_card']['card'], pair['defend_card']['card']]
                    }
        elif prev_data['players'][0].get('state') != curr_data['players'][0].get('state'):
            # Изменение состояния
            new_state = curr_data['players'][0].get('state')
            if new_state in ['bat', 'pass', 'take']:
                return {'type': 'state', 'state': new_state, 'move': None}

        return {'type': 'wait', 'move': None}
    except Exception:
        return None

# 2. Кастомный датасет
class DurakDataset(Dataset):
    def __init__(self, dataset, cache_file="durak_dataset.pkl"):
        start_time = time.time()
        self.cache_file = cache_file

        # Проверяем, существует ли кэш
        if os.path.exists(cache_file):
            print(f"Loading cached dataset from {cache_file}...")
            with open(cache_file, 'rb') as f:
                cached_data = pickle.load(f)
            self.vectors = cached_data['vectors']
            self.actions = cached_data['actions']
            print(f"Loaded {len(self.vectors)} valid snapshots from cache")
        else:
            print("No cache found, processing dataset...")
            self.dataset = dataset['train']
            self.vectors = []
            self.actions = []

            # Парсим снимки и группируем по game_id
            parsed_snaps = []
            for s in tqdm(self.dataset, desc="Parsing snapshots"):
                try:
                    parsed = json.loads(s['snapshot'])
                    parsed_snaps.append((s, parsed))
                except Exception:
                    continue

            game_snaps_dict = {}
            for snap, parsed in parsed_snaps:
                game_id = snap['game_id']
                if game_id not in game_snaps_dict:
                    game_snaps_dict[game_id] = []
                game_snaps_dict[game_id].append((snap, parsed))

            print(f"Found {len(game_snaps_dict)} games")
            for game_id in tqdm(game_snaps_dict.keys(), desc="Processing games"):
                game_snaps = sorted(game_snaps_dict[game_id], key=lambda x: x[1].get('timestamp', 0))
                for i in range(len(game_snaps) - 1):
                    curr_snap, curr_parsed = game_snaps[i]
                    next_snap, next_parsed = game_snaps[i + 1]
                    # Проверяем входной вектор
                    input_vector = snapshot_to_input(curr_parsed)
                    if input_vector is None:
                        continue
                    # Извлекаем действие
                    action = extract_action(curr_parsed, next_parsed)
                    if (action is None or not isinstance(action, dict) or
                        'type' not in action or 'move' not in action or
                        action['type'] not in ['attack', 'defend', 'state', 'wait']):
                        continue
                    self.vectors.append(input_vector)
                    self.actions.append(action)

            print(f"Processed {len(self.vectors)} valid snapshots")
            # Сохраняем кэш
            if self.vectors:
                print(f"Saving dataset to {cache_file}...")
                with open(cache_file, 'wb') as f:
                    pickle.dump({'vectors': self.vectors, 'actions': self.actions}, f)

        if not self.vectors:
            raise ValueError("No valid snapshots available after processing")

        print(f"DurakDataset initialization took {time.time() - start_time:.3f} seconds")

    def __len__(self):
        return len(self.vectors)

    def __getitem__(self, idx):
        return torch.tensor(self.vectors[idx], dtype=torch.float32), self.actions[idx]

# 3. Модель
class DurakModel(nn.Module):
    def __init__(self, input_size=190, hidden_size=256, num_cards=24):
        super(DurakModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.action_head = nn.Linear(hidden_size, 4)  # attack, defend, state, wait
        self.attack_card_head = nn.Linear(hidden_size, num_cards)
        self.defend_card_head = nn.Linear(hidden_size, num_cards)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        action_probs = torch.softmax(self.action_head(x), dim=-1)
        attack_probs = torch.softmax(self.attack_card_head(x), dim=-1)
        defend_probs = torch.softmax(self.defend_card_head(x), dim=-1)
        return action_probs, attack_probs, defend_probs

# 4. Вспомогательная функция
def card_to_idx(card):
    """Преобразует карту в индекс (0–23)."""
    if not card or not isinstance(card, str):
        return 0
    try:
        nominal = int(card[:-1]) - 9  # 9->0, ..., 14->5
        if nominal < 0 or nominal > 5:
            return 0
        suit = {'S': 0, 'C': 1, 'D': 2, 'H': 3}[card[-1]]
        return nominal * 4 + suit
    except (ValueError, KeyError):
        return 0

# 5. Обучение
def train_model():
    start_time = time.time()
    print("Loading dataset...")
    dataset = load_dataset("neuronetties/durak")
    print(f"Dataset loaded in {time.time() - start_time:.3f} seconds")

    print("Initializing DurakDataset...")
    durak_dataset = DurakDataset(dataset)
    print("Creating DataLoader...")

    def custom_collate(batch):
        batch = [item for item in batch if item is not None]
        if not batch:
            return None
        inputs, actions = zip(*batch)
        return torch.stack(inputs), actions

    dataloader = DataLoader(durak_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate)
    print(f"DataLoader created in {time.time() - start_time:.3f} seconds")

    model = DurakModel()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    action_criterion = nn.CrossEntropyLoss()
    card_criterion = nn.CrossEntropyLoss()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(10):
        epoch_start_time = time.time()
        total_loss = 0
        batch_count = 0
        for batch_idx, batch in enumerate(tqdm(dataloader, desc=f"Epoch {epoch+1}")):
            if batch is None:
                continue

            inputs, actions = batch
            inputs = inputs.to(device)

            optimizer.zero_grad()
            action_probs, attack_probs, defend_probs = model(inputs)

            # Подготовка меток
            action_labels = []
            attack_labels = []
            defend_labels = []
            valid_indices = []
            for i, action in enumerate(actions):
                if not isinstance(action, dict) or action['type'] not in ['attack', 'defend', 'state', 'wait']:
                    continue
                try:
                    if action['type'] == 'attack' and action['move']:
                        action_labels.append(0)
                        attack_labels.append(card_to_idx(action['move']))
                        defend_labels.append(0)
                    elif action['type'] == 'defend' and action['move'] and len(action['move']) == 2:
                        action_labels.append(1)
                        attack_labels.append(0)
                        defend_labels.append(card_to_idx(action['move'][1]))
                    elif action['type'] == 'state':
                        action_labels.append(2)
                        attack_labels.append(0)
                        defend_labels.append(0)
                    else:  # wait
                        action_labels.append(3)
                        attack_labels.append(0)
                        defend_labels.append(0)
                    valid_indices.append(i)
                except Exception:
                    continue

            if not valid_indices:
                continue

            # Фильтруем входные данные
            inputs = inputs[valid_indices]
            action_labels = torch.tensor(action_labels, dtype=torch.long, device=device)
            attack_labels = torch.tensor(attack_labels, dtype=torch.long, device=device)
            defend_labels = torch.tensor(defend_labels, dtype=torch.long, device=device)

            action_probs, attack_probs, defend_probs = model(inputs)

            action_loss = action_criterion(action_probs, action_labels)
            attack_loss = card_criterion(attack_probs, attack_labels)
            defend_loss = card_criterion(defend_probs, defend_labels)

            loss = action_loss + attack_loss + defend_loss
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            batch_count += 1
            if batch_idx % 100 == 0:
                print(f"Batch {batch_idx}, Loss: {loss.item():.4f}")

        if batch_count > 0:
            print(f"Epoch {epoch+1} completed in {time.time() - epoch_start_time:.3f} seconds")
            print(f"Average Loss: {total_loss / batch_count:.4f}")
        else:
            print(f"Epoch {epoch+1} skipped: no valid batches")

    print(f"Training completed in {time.time() - start_time:.3f} seconds")
    return model

# Запуск
if __name__ == "__main__":
    # Удалите старый кэш для пересоздания
    # os.remove("durak_dataset.pkl")
    train_model()

Loading dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/321 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/292962 [00:00<?, ? examples/s]

Dataset loaded in 5.809 seconds
Initializing DurakDataset...
No cache found, processing dataset...


Parsing snapshots: 100%|██████████| 292962/292962 [00:25<00:00, 11610.57it/s]


Found 6331 games


Processing games: 100%|██████████| 6331/6331 [00:12<00:00, 517.88it/s]


Processed 286631 valid snapshots
Saving dataset to durak_dataset.pkl...
DurakDataset initialization took 47.234 seconds
Creating DataLoader...
DataLoader created in 53.048 seconds


Epoch 1:   0%|          | 15/8958 [00:00<02:25, 61.56it/s]

Batch 0, Loss: 7.7345


Epoch 1:   1%|▏         | 120/8958 [00:01<01:06, 132.23it/s]

Batch 100, Loss: 5.7068


Epoch 1:   2%|▏         | 222/8958 [00:01<01:02, 140.18it/s]

Batch 200, Loss: 5.6135


Epoch 1:   4%|▎         | 325/8958 [00:02<00:44, 193.23it/s]

Batch 300, Loss: 5.7381


Epoch 1:   5%|▍         | 428/8958 [00:02<00:43, 198.28it/s]

Batch 400, Loss: 6.0819


Epoch 1:   6%|▌         | 533/8958 [00:03<00:42, 198.16it/s]

Batch 500, Loss: 5.6756


Epoch 1:   7%|▋         | 637/8958 [00:03<00:42, 197.69it/s]

Batch 600, Loss: 5.7381


Epoch 1:   8%|▊         | 733/8958 [00:04<00:47, 174.61it/s]

Batch 700, Loss: 5.8006


Epoch 1:   9%|▉         | 823/8958 [00:05<00:49, 164.47it/s]

Batch 800, Loss: 5.9881


Epoch 1:  10%|█         | 926/8958 [00:05<00:48, 165.38it/s]

Batch 900, Loss: 5.8631


Epoch 1:  12%|█▏        | 1032/8958 [00:06<00:47, 165.78it/s]

Batch 1000, Loss: 5.9256


Epoch 1:  12%|█▏        | 1118/8958 [00:06<00:46, 167.25it/s]

Batch 1100, Loss: 5.9236


Epoch 1:  14%|█▎        | 1226/8958 [00:07<00:43, 176.69it/s]

Batch 1200, Loss: 5.9863


Epoch 1:  15%|█▍        | 1334/8958 [00:08<00:45, 169.11it/s]

Batch 1300, Loss: 5.8128


Epoch 1:  16%|█▌        | 1425/8958 [00:08<00:43, 173.99it/s]

Batch 1400, Loss: 5.6569


Epoch 1:  17%|█▋        | 1535/8958 [00:09<00:43, 170.08it/s]

Batch 1500, Loss: 5.6801


Epoch 1:  18%|█▊        | 1627/8958 [00:09<00:41, 178.22it/s]

Batch 1600, Loss: 5.8817


Epoch 1:  19%|█▉        | 1738/8958 [00:10<00:40, 178.45it/s]

Batch 1700, Loss: 5.5820


Epoch 1:  20%|██        | 1828/8958 [00:11<00:54, 131.30it/s]

Batch 1800, Loss: 5.6151


Epoch 1:  21%|██        | 1899/8958 [00:11<00:58, 121.45it/s]

Batch 1900, Loss: 5.9792


Epoch 1:  23%|██▎       | 2024/8958 [00:13<01:21, 84.56it/s]

Batch 2000, Loss: 5.9216


Epoch 1:  24%|██▎       | 2125/8958 [00:14<00:59, 115.80it/s]

Batch 2100, Loss: 5.8583


Epoch 1:  25%|██▍       | 2220/8958 [00:14<00:48, 138.40it/s]

Batch 2200, Loss: 5.4885


Epoch 1:  26%|██▌       | 2324/8958 [00:15<00:41, 160.10it/s]

Batch 2300, Loss: 5.7055


Epoch 1:  27%|██▋       | 2432/8958 [00:16<00:38, 167.84it/s]

Batch 2400, Loss: 5.6142


Epoch 1:  28%|██▊       | 2519/8958 [00:16<00:37, 169.52it/s]

Batch 2500, Loss: 5.5819


Epoch 1:  29%|██▉       | 2627/8958 [00:17<00:36, 172.76it/s]

Batch 2600, Loss: 5.6144


Epoch 1:  31%|███       | 2735/8958 [00:17<00:36, 171.18it/s]

Batch 2700, Loss: 5.8013


Epoch 1:  32%|███▏      | 2825/8958 [00:18<00:35, 171.73it/s]

Batch 2800, Loss: 5.7192


Epoch 1:  33%|███▎      | 2933/8958 [00:19<00:35, 171.06it/s]

Batch 2900, Loss: 5.7685


Epoch 1:  34%|███▎      | 3023/8958 [00:19<00:35, 167.07it/s]

Batch 3000, Loss: 5.5514


Epoch 1:  35%|███▍      | 3128/8958 [00:20<00:34, 167.94it/s]

Batch 3100, Loss: 5.8306


Epoch 1:  36%|███▌      | 3232/8958 [00:20<00:34, 167.01it/s]

Batch 3200, Loss: 5.7382


Epoch 1:  37%|███▋      | 3320/8958 [00:21<00:32, 171.86it/s]

Batch 3300, Loss: 5.9602


Epoch 1:  38%|███▊      | 3424/8958 [00:21<00:33, 165.01it/s]

Batch 3400, Loss: 5.6763


Epoch 1:  39%|███▉      | 3528/8958 [00:22<00:32, 168.09it/s]

Batch 3500, Loss: 5.7064


Epoch 1:  41%|████      | 3632/8958 [00:23<00:31, 169.34it/s]

Batch 3600, Loss: 5.7997


Epoch 1:  42%|████▏     | 3720/8958 [00:23<00:32, 163.27it/s]

Batch 3700, Loss: 5.5815


Epoch 1:  43%|████▎     | 3825/8958 [00:24<00:29, 171.68it/s]

Batch 3800, Loss: 5.6301


Epoch 1:  44%|████▍     | 3924/8958 [00:25<00:38, 129.30it/s]

Batch 3900, Loss: 5.6792


Epoch 1:  45%|████▍     | 4016/8958 [00:25<00:41, 119.19it/s]

Batch 4000, Loss: 5.5849


Epoch 1:  46%|████▌     | 4117/8958 [00:26<00:41, 117.04it/s]

Batch 4100, Loss: 5.7043


Epoch 1:  47%|████▋     | 4222/8958 [00:27<00:34, 136.14it/s]

Batch 4200, Loss: 5.7615


Epoch 1:  48%|████▊     | 4324/8958 [00:28<00:28, 164.49it/s]

Batch 4300, Loss: 5.9228


Epoch 1:  49%|████▉     | 4427/8958 [00:28<00:27, 167.27it/s]

Batch 4400, Loss: 5.6736


Epoch 1:  51%|█████     | 4534/8958 [00:29<00:25, 173.26it/s]

Batch 4500, Loss: 5.8355


Epoch 1:  52%|█████▏    | 4620/8958 [00:29<00:26, 162.33it/s]

Batch 4600, Loss: 5.6752


Epoch 1:  53%|█████▎    | 4728/8958 [00:30<00:24, 173.76it/s]

Batch 4700, Loss: 5.6758


Epoch 1:  54%|█████▍    | 4836/8958 [00:31<00:23, 171.83it/s]

Batch 4800, Loss: 5.8063


Epoch 1:  55%|█████▍    | 4926/8958 [00:31<00:23, 173.11it/s]

Batch 4900, Loss: 5.4257


Epoch 1:  56%|█████▌    | 5031/8958 [00:32<00:23, 168.86it/s]

Batch 5000, Loss: 5.8638


Epoch 1:  57%|█████▋    | 5135/8958 [00:32<00:22, 167.07it/s]

Batch 5100, Loss: 5.5512


Epoch 1:  58%|█████▊    | 5222/8958 [00:33<00:22, 166.98it/s]

Batch 5200, Loss: 5.7683


Epoch 1:  59%|█████▉    | 5326/8958 [00:34<00:22, 162.73it/s]

Batch 5300, Loss: 5.5501


Epoch 1:  61%|██████    | 5428/8958 [00:34<00:21, 164.72it/s]

Batch 5400, Loss: 5.8005


Epoch 1:  62%|██████▏   | 5530/8958 [00:35<00:20, 163.93it/s]

Batch 5500, Loss: 5.6130


Epoch 1:  63%|██████▎   | 5633/8958 [00:36<00:20, 159.00it/s]

Batch 5600, Loss: 5.7691


Epoch 1:  64%|██████▍   | 5720/8958 [00:36<00:19, 166.32it/s]

Batch 5700, Loss: 5.7379


Epoch 1:  65%|██████▍   | 5822/8958 [00:37<00:19, 156.84it/s]

Batch 5800, Loss: 5.7038


Epoch 1:  66%|██████▌   | 5923/8958 [00:38<00:25, 118.10it/s]

Batch 5900, Loss: 5.4872


Epoch 1:  67%|██████▋   | 6022/8958 [00:38<00:24, 119.42it/s]

Batch 6000, Loss: 5.6314


Epoch 1:  68%|██████▊   | 6119/8958 [00:39<00:25, 109.76it/s]

Batch 6100, Loss: 5.7071


Epoch 1:  70%|██████▉   | 6231/8958 [00:40<00:18, 146.44it/s]

Batch 6200, Loss: 5.7688


Epoch 1:  71%|███████   | 6331/8958 [00:41<00:16, 160.38it/s]

Batch 6300, Loss: 5.7683


Epoch 1:  72%|███████▏  | 6431/8958 [00:41<00:15, 159.64it/s]

Batch 6400, Loss: 5.5482


Epoch 1:  73%|███████▎  | 6529/8958 [00:42<00:15, 153.37it/s]

Batch 6500, Loss: 5.4839


Epoch 1:  74%|███████▍  | 6625/8958 [00:43<00:14, 156.93it/s]

Batch 6600, Loss: 5.7065


Epoch 1:  75%|███████▌  | 6724/8958 [00:43<00:14, 152.80it/s]

Batch 6700, Loss: 5.7380


Epoch 1:  76%|███████▌  | 6821/8958 [00:44<00:13, 154.41it/s]

Batch 6800, Loss: 5.6262


Epoch 1:  77%|███████▋  | 6927/8958 [00:45<00:13, 145.71it/s]

Batch 6900, Loss: 5.8029


Epoch 1:  78%|███████▊  | 7022/8958 [00:45<00:13, 145.88it/s]

Batch 7000, Loss: 5.5868


Epoch 1:  80%|███████▉  | 7129/8958 [00:46<00:12, 149.61it/s]

Batch 7100, Loss: 5.9840


Epoch 1:  81%|████████  | 7224/8958 [00:47<00:11, 150.23it/s]

Batch 7200, Loss: 5.5506


Epoch 1:  82%|████████▏ | 7317/8958 [00:47<00:11, 147.52it/s]

Batch 7300, Loss: 5.7059


Epoch 1:  83%|████████▎ | 7425/8958 [00:48<00:10, 146.45it/s]

Batch 7400, Loss: 5.4885


Epoch 1:  84%|████████▍ | 7520/8958 [00:49<00:09, 153.15it/s]

Batch 7500, Loss: 5.6730


Epoch 1:  85%|████████▌ | 7632/8958 [00:49<00:08, 150.71it/s]

Batch 7600, Loss: 5.7068


Epoch 1:  86%|████████▌ | 7721/8958 [00:50<00:10, 118.27it/s]

Batch 7700, Loss: 5.7381


Epoch 1:  87%|████████▋ | 7818/8958 [00:51<00:09, 115.55it/s]

Batch 7800, Loss: 5.9250


Epoch 1:  88%|████████▊ | 7912/8958 [00:52<00:09, 108.02it/s]

Batch 7900, Loss: 5.6124


Epoch 1:  90%|████████▉ | 8031/8958 [00:53<00:06, 145.15it/s]

Batch 8000, Loss: 5.7718


Epoch 1:  91%|█████████ | 8126/8958 [00:53<00:05, 152.72it/s]

Batch 8100, Loss: 5.7693


Epoch 1:  92%|█████████▏| 8221/8958 [00:54<00:04, 148.95it/s]

Batch 8200, Loss: 5.5506


Epoch 1:  93%|█████████▎| 8330/8958 [00:55<00:04, 144.27it/s]

Batch 8300, Loss: 5.6119


Epoch 1:  94%|█████████▍| 8426/8958 [00:55<00:03, 150.86it/s]

Batch 8400, Loss: 5.5504


Epoch 1:  95%|█████████▌| 8518/8958 [00:56<00:02, 147.33it/s]

Batch 8500, Loss: 5.7693


Epoch 1:  96%|█████████▋| 8627/8958 [00:57<00:02, 145.31it/s]

Batch 8600, Loss: 5.8961


Epoch 1:  97%|█████████▋| 8720/8958 [00:57<00:01, 149.36it/s]

Batch 8700, Loss: 5.5187


Epoch 1:  99%|█████████▊| 8827/8958 [00:58<00:00, 146.32it/s]

Batch 8800, Loss: 5.7072


Epoch 1: 100%|█████████▉| 8920/8958 [00:59<00:00, 150.21it/s]

Batch 8900, Loss: 5.5488


Epoch 1: 100%|██████████| 8958/8958 [00:59<00:00, 150.35it/s]


Epoch 1 completed in 59.586 seconds
Average Loss: 5.7231


Epoch 2:   0%|          | 14/8958 [00:00<01:05, 137.24it/s]

Batch 0, Loss: 5.5486


Epoch 2:   1%|▏         | 118/8958 [00:00<01:02, 140.88it/s]

Batch 100, Loss: 5.6130


Epoch 2:   2%|▏         | 223/8958 [00:01<01:00, 144.27it/s]

Batch 200, Loss: 5.8603


Epoch 2:   4%|▎         | 317/8958 [00:02<00:58, 148.65it/s]

Batch 300, Loss: 5.8005


Epoch 2:   5%|▍         | 425/8958 [00:02<00:58, 146.44it/s]

Batch 400, Loss: 5.7997


Epoch 2:   6%|▌         | 512/8958 [00:03<01:14, 112.99it/s]

Batch 500, Loss: 5.7385


Epoch 2:   7%|▋         | 614/8958 [00:04<01:20, 103.57it/s]

Batch 600, Loss: 5.5198


Epoch 2:   8%|▊         | 710/8958 [00:05<01:27, 94.72it/s]

Batch 700, Loss: 5.7376


Epoch 2:   9%|▉         | 828/8958 [00:06<00:58, 139.73it/s]

Batch 800, Loss: 5.7065


Epoch 2:  10%|█         | 916/8958 [00:07<00:57, 139.72it/s]

Batch 900, Loss: 5.6437


Epoch 2:  11%|█▏        | 1021/8958 [00:07<00:58, 136.61it/s]

Batch 1000, Loss: 5.6813


Epoch 2:  13%|█▎        | 1122/8958 [00:08<00:56, 137.98it/s]

Batch 1100, Loss: 5.6131


Epoch 2:  14%|█▎        | 1220/8958 [00:09<01:00, 128.95it/s]

Batch 1200, Loss: 5.6756


Epoch 2:  15%|█▍        | 1320/8958 [00:10<00:56, 135.36it/s]

Batch 1300, Loss: 5.5189


Epoch 2:  16%|█▌        | 1418/8958 [00:10<00:58, 129.93it/s]

Batch 1400, Loss: 5.5818


Epoch 2:  17%|█▋        | 1518/8958 [00:11<00:56, 132.60it/s]

Batch 1500, Loss: 5.6752


Epoch 2:  18%|█▊        | 1615/8958 [00:12<00:56, 129.98it/s]

Batch 1600, Loss: 5.7694


Epoch 2:  19%|█▉        | 1729/8958 [00:13<00:54, 133.28it/s]

Batch 1700, Loss: 5.6443


Epoch 2:  20%|██        | 1827/8958 [00:14<00:55, 129.20it/s]

Batch 1800, Loss: 5.7416


Epoch 2:  21%|██▏       | 1924/8958 [00:14<00:54, 130.16it/s]

Batch 1900, Loss: 5.8005


Epoch 2:  23%|██▎       | 2020/8958 [00:15<00:53, 129.55it/s]

Batch 2000, Loss: 5.6443


Epoch 2:  24%|██▎       | 2117/8958 [00:16<01:03, 108.21it/s]

Batch 2100, Loss: 5.6401


Epoch 2:  25%|██▍       | 2217/8958 [00:17<01:06, 101.05it/s]

Batch 2200, Loss: 5.6699


Epoch 2:  26%|██▌       | 2312/8958 [00:18<01:11, 92.39it/s]

Batch 2300, Loss: 5.5206


Epoch 2:  27%|██▋       | 2418/8958 [00:19<00:53, 123.01it/s]

Batch 2400, Loss: 5.5506


Epoch 2:  28%|██▊       | 2515/8958 [00:20<00:47, 134.82it/s]

Batch 2500, Loss: 5.6751


Epoch 2:  29%|██▉       | 2629/8958 [00:20<00:47, 132.82it/s]

Batch 2600, Loss: 5.6752


Epoch 2:  30%|███       | 2727/8958 [00:21<00:48, 128.81it/s]

Batch 2700, Loss: 5.5813


Epoch 2:  32%|███▏      | 2825/8958 [00:22<00:48, 125.84it/s]

Batch 2800, Loss: 5.7695


Epoch 2:  33%|███▎      | 2917/8958 [00:23<00:48, 124.12it/s]

Batch 2900, Loss: 5.8019


Epoch 2:  34%|███▎      | 3023/8958 [00:24<00:47, 124.52it/s]

Batch 3000, Loss: 5.6751


Epoch 2:  35%|███▍      | 3108/8958 [00:25<01:35, 61.09it/s]

Batch 3100, Loss: 5.5506


Epoch 2:  36%|███▌      | 3204/8958 [00:27<02:45, 34.79it/s]

Batch 3200, Loss: 5.7058


Epoch 2:  37%|███▋      | 3308/8958 [00:31<02:15, 41.76it/s]

Batch 3300, Loss: 5.7693


Epoch 2:  38%|███▊      | 3403/8958 [00:35<05:33, 16.64it/s]

Batch 3400, Loss: 5.4599


Epoch 2:  39%|███▉      | 3519/8958 [00:37<00:55, 98.78it/s]

Batch 3500, Loss: 5.7687


Epoch 2:  40%|████      | 3621/8958 [00:38<00:43, 123.77it/s]

Batch 3600, Loss: 5.7377


Epoch 2:  42%|████▏     | 3724/8958 [00:39<00:43, 121.12it/s]

Batch 3700, Loss: 5.7693


Epoch 2:  43%|████▎     | 3823/8958 [00:39<00:43, 117.19it/s]

Batch 3800, Loss: 5.5506


Epoch 2:  44%|████▎     | 3915/8958 [00:40<00:41, 122.52it/s]

Batch 3900, Loss: 5.5817


Epoch 2:  45%|████▍     | 4019/8958 [00:41<00:41, 120.34it/s]

Batch 4000, Loss: 5.7377


Epoch 2:  46%|████▌     | 4126/8958 [00:42<00:37, 127.78it/s]

Batch 4100, Loss: 5.6130


Epoch 2:  47%|████▋     | 4217/8958 [00:43<00:38, 122.11it/s]

Batch 4200, Loss: 5.6443


Epoch 2:  48%|████▊     | 4319/8958 [00:44<00:39, 118.67it/s]

Batch 4300, Loss: 5.6144


Epoch 2:  49%|████▉     | 4420/8958 [00:44<00:38, 118.89it/s]

Batch 4400, Loss: 5.7360


Epoch 2:  50%|█████     | 4519/8958 [00:45<00:36, 120.38it/s]

Batch 4500, Loss: 5.5810


Epoch 2:  52%|█████▏    | 4620/8958 [00:46<00:35, 122.56it/s]

Batch 4600, Loss: 5.7065


Epoch 2:  53%|█████▎    | 4710/8958 [00:47<00:46, 92.22it/s]

Batch 4700, Loss: 5.6755


Epoch 2:  54%|█████▎    | 4810/8958 [00:48<00:45, 91.01it/s]

Batch 4800, Loss: 5.7687


Epoch 2:  55%|█████▍    | 4925/8958 [00:49<00:36, 110.02it/s]

Batch 4900, Loss: 5.5500


Epoch 2:  56%|█████▌    | 5023/8958 [00:50<00:33, 119.05it/s]

Batch 5000, Loss: 5.8005


Epoch 2:  57%|█████▋    | 5125/8958 [00:51<00:31, 121.08it/s]

Batch 5100, Loss: 5.6443


Epoch 2:  58%|█████▊    | 5215/8958 [00:52<00:31, 118.45it/s]

Batch 5200, Loss: 5.6573


Epoch 2:  59%|█████▉    | 5316/8958 [00:53<00:31, 115.14it/s]

Batch 5300, Loss: 5.6131


Epoch 2:  60%|██████    | 5415/8958 [00:53<00:30, 118.05it/s]

Batch 5400, Loss: 5.7046


Epoch 2:  62%|██████▏   | 5513/8958 [00:54<00:30, 113.68it/s]

Batch 5500, Loss: 5.8006


Epoch 2:  63%|██████▎   | 5624/8958 [00:55<00:28, 115.33it/s]

Batch 5600, Loss: 5.6440


Epoch 2:  64%|██████▍   | 5723/8958 [00:56<00:27, 118.52it/s]

Batch 5700, Loss: 5.6441


Epoch 2:  65%|██████▌   | 5827/8958 [00:57<00:24, 126.96it/s]

Batch 5800, Loss: 5.8313


Epoch 2:  66%|██████▌   | 5917/8958 [00:58<00:25, 119.84it/s]

Batch 5900, Loss: 5.4561


Epoch 2:  67%|██████▋   | 6014/8958 [00:59<00:24, 119.27it/s]

Batch 6000, Loss: 5.7698


Epoch 2:  68%|██████▊   | 6111/8958 [00:59<00:29, 96.83it/s] 

Batch 6100, Loss: 5.6443


Epoch 2:  69%|██████▉   | 6211/8958 [01:01<00:29, 92.04it/s]

Batch 6200, Loss: 5.7065


Epoch 2:  70%|███████   | 6303/8958 [01:02<00:49, 54.08it/s]

Batch 6300, Loss: 5.8002


Epoch 2:  72%|███████▏  | 6411/8958 [01:04<00:32, 78.54it/s]

Batch 6400, Loss: 5.6115


Epoch 2:  73%|███████▎  | 6522/8958 [01:05<00:21, 110.84it/s]

Batch 6500, Loss: 5.6443


Epoch 2:  74%|███████▍  | 6615/8958 [01:06<00:21, 110.10it/s]

Batch 6600, Loss: 5.8003


Epoch 2:  75%|███████▌  | 6722/8958 [01:07<00:20, 109.67it/s]

Batch 6700, Loss: 5.6131


Epoch 2:  76%|███████▌  | 6822/8958 [01:07<00:19, 107.11it/s]

Batch 6800, Loss: 5.3944


Epoch 2:  77%|███████▋  | 6923/8958 [01:08<00:18, 110.30it/s]

Batch 6900, Loss: 5.5505


Epoch 2:  78%|███████▊  | 7019/8958 [01:09<00:18, 106.88it/s]

Batch 7000, Loss: 5.6211


Epoch 2:  79%|███████▉  | 7112/8958 [01:10<00:17, 107.79it/s]

Batch 7100, Loss: 5.6441


Epoch 2:  81%|████████  | 7223/8958 [01:11<00:16, 105.28it/s]

Batch 7200, Loss: 5.7068


Epoch 2:  82%|████████▏ | 7317/8958 [01:12<00:16, 102.04it/s]

Batch 7300, Loss: 5.7686


Epoch 2:  83%|████████▎ | 7410/8958 [01:13<00:19, 77.77it/s]

Batch 7400, Loss: 5.6757


Epoch 2:  84%|████████▍ | 7510/8958 [01:15<00:19, 75.72it/s]

Batch 7500, Loss: 5.6342


Epoch 2:  85%|████████▌ | 7616/8958 [01:16<00:13, 101.01it/s]

Batch 7600, Loss: 5.6759


Epoch 2:  86%|████████▌ | 7722/8958 [01:17<00:11, 104.01it/s]

Batch 7700, Loss: 5.5816


Epoch 2:  87%|████████▋ | 7812/8958 [01:18<00:10, 106.14it/s]

Batch 7800, Loss: 5.4257


Epoch 2:  88%|████████▊ | 7922/8958 [01:19<00:09, 104.03it/s]

Batch 7900, Loss: 5.6756


Epoch 2:  90%|████████▉ | 8023/8958 [01:20<00:08, 106.30it/s]

Batch 8000, Loss: 5.6763


Epoch 2:  91%|█████████ | 8112/8958 [01:20<00:08, 104.26it/s]

Batch 8100, Loss: 5.5193


Epoch 2:  92%|█████████▏| 8219/8958 [01:22<00:07, 99.50it/s]

Batch 8200, Loss: 5.7374


Epoch 2:  93%|█████████▎| 8313/8958 [01:22<00:06, 97.90it/s]

Batch 8300, Loss: 5.5506


Epoch 2:  94%|█████████▍| 8413/8958 [01:23<00:05, 103.56it/s]

Batch 8400, Loss: 5.7690


Epoch 2:  95%|█████████▌| 8512/8958 [01:24<00:04, 101.95it/s]

Batch 8500, Loss: 5.7069


Epoch 2:  96%|█████████▌| 8614/8958 [01:26<00:04, 78.80it/s]

Batch 8600, Loss: 5.6125


Epoch 2:  97%|█████████▋| 8715/8958 [01:27<00:03, 79.30it/s]

Batch 8700, Loss: 5.7995


Epoch 2:  98%|█████████▊| 8818/8958 [01:28<00:01, 98.16it/s]

Batch 8800, Loss: 5.7380


Epoch 2: 100%|█████████▉| 8915/8958 [01:29<00:00, 103.36it/s]

Batch 8900, Loss: 5.6996


Epoch 2: 100%|██████████| 8958/8958 [01:29<00:00, 99.56it/s] 


Epoch 2 completed in 89.980 seconds
Average Loss: 5.6833


Epoch 3:   0%|          | 10/8958 [00:00<01:33, 95.56it/s]

Batch 0, Loss: 5.6137


Epoch 3:   1%|▏         | 113/8958 [00:01<01:23, 105.75it/s]

Batch 100, Loss: 5.8000


Epoch 3:   2%|▏         | 212/8958 [00:02<01:24, 103.91it/s]

Batch 200, Loss: 5.8623


Epoch 3:   3%|▎         | 312/8958 [00:02<01:25, 101.19it/s]

Batch 300, Loss: 5.6129


Epoch 3:   5%|▍         | 412/8958 [00:03<01:22, 103.35it/s]

Batch 400, Loss: 5.6442


Epoch 3:   6%|▌         | 522/8958 [00:04<01:20, 105.19it/s]

Batch 500, Loss: 5.6753


Epoch 3:   7%|▋         | 615/8958 [00:05<01:16, 109.08it/s]

Batch 600, Loss: 5.7394


Epoch 3:   8%|▊         | 715/8958 [00:06<01:18, 105.42it/s]

Batch 700, Loss: 5.7699


Epoch 3:   9%|▉         | 815/8958 [00:07<01:18, 104.25it/s]

Batch 800, Loss: 5.6756


Epoch 3:  10%|█         | 909/8958 [00:08<01:41, 79.34it/s]

Batch 900, Loss: 5.5817


Epoch 3:  11%|█▏        | 1016/8958 [00:10<01:40, 78.81it/s]

Batch 1000, Loss: 5.5570


Epoch 3:  12%|█▏        | 1119/8958 [00:11<01:19, 98.24it/s]

Batch 1100, Loss: 5.8627


Epoch 3:  14%|█▎        | 1219/8958 [00:12<01:14, 103.69it/s]

Batch 1200, Loss: 5.6131


Epoch 3:  15%|█▍        | 1317/8958 [00:13<01:17, 98.39it/s]

Batch 1300, Loss: 5.7818


Epoch 3:  16%|█▌        | 1413/8958 [00:14<01:17, 97.22it/s]

Batch 1400, Loss: 5.6754


Epoch 3:  17%|█▋        | 1512/8958 [00:15<01:13, 101.72it/s]

Batch 1500, Loss: 5.5189


Epoch 3:  18%|█▊        | 1620/8958 [00:16<01:15, 97.61it/s]

Batch 1600, Loss: 5.6130


Epoch 3:  19%|█▉        | 1719/8958 [00:17<01:10, 101.99it/s]

Batch 1700, Loss: 5.8003


Epoch 3:  20%|██        | 1819/8958 [00:18<01:08, 104.62it/s]

Batch 1800, Loss: 5.5819


Epoch 3:  21%|██▏       | 1918/8958 [00:19<01:07, 103.87it/s]

Batch 1900, Loss: 5.6441


Epoch 3:  22%|██▏       | 2012/8958 [00:20<01:03, 110.14it/s]

Batch 2000, Loss: 5.8312


Epoch 3:  24%|██▎       | 2112/8958 [00:21<01:09, 97.87it/s] 

Batch 2100, Loss: 5.8630


Epoch 3:  25%|██▍       | 2215/8958 [00:22<01:18, 85.45it/s]

Batch 2200, Loss: 5.6749


Epoch 3:  26%|██▌       | 2313/8958 [00:23<01:23, 79.91it/s]

Batch 2300, Loss: 5.4568


Epoch 3:  27%|██▋       | 2412/8958 [00:24<01:01, 106.61it/s]

Batch 2400, Loss: 5.5810


Epoch 3:  28%|██▊       | 2514/8958 [00:25<00:58, 110.93it/s]

Batch 2500, Loss: 5.9947


Epoch 3:  29%|██▉       | 2620/8958 [00:26<00:56, 112.85it/s]

Batch 2600, Loss: 5.6443


Epoch 3:  30%|███       | 2717/8958 [00:27<00:55, 112.29it/s]

Batch 2700, Loss: 5.6129


Epoch 3:  31%|███▏      | 2813/8958 [00:28<00:54, 111.82it/s]

Batch 2800, Loss: 5.6123


Epoch 3:  33%|███▎      | 2914/8958 [00:29<00:57, 105.19it/s]

Batch 2900, Loss: 5.4568


Epoch 3:  34%|███▎      | 3017/8958 [00:30<00:59, 99.67it/s] 

Batch 3000, Loss: 5.6164


Epoch 3:  35%|███▍      | 3118/8958 [00:30<00:54, 107.99it/s]

Batch 3100, Loss: 5.7369


Epoch 3:  36%|███▌      | 3215/8958 [00:31<00:51, 112.14it/s]

Batch 3200, Loss: 5.6438


Epoch 3:  37%|███▋      | 3321/8958 [00:32<00:51, 109.49it/s]

Batch 3300, Loss: 5.6448


Epoch 3:  38%|███▊      | 3410/8958 [00:33<00:54, 102.04it/s]

Batch 3400, Loss: 5.7068


Epoch 3:  39%|███▉      | 3515/8958 [00:34<01:02, 86.85it/s]

Batch 3500, Loss: 5.7376


Epoch 3:  40%|████      | 3616/8958 [00:36<01:06, 79.96it/s]

Batch 3600, Loss: 5.7374


Epoch 3:  41%|████▏     | 3713/8958 [00:37<00:50, 104.44it/s]

Batch 3700, Loss: 5.5502


Epoch 3:  43%|████▎     | 3820/8958 [00:38<00:44, 115.10it/s]

Batch 3800, Loss: 5.6359


Epoch 3:  44%|████▍     | 3921/8958 [00:38<00:42, 118.14it/s]

Batch 3900, Loss: 5.5188


Epoch 3:  45%|████▍     | 4021/8958 [00:39<00:42, 117.21it/s]

Batch 4000, Loss: 5.7068


Epoch 3:  46%|████▌     | 4117/8958 [00:40<00:42, 115.21it/s]

Batch 4100, Loss: 5.9250


Epoch 3:  47%|████▋     | 4214/8958 [00:41<00:41, 113.19it/s]

Batch 4200, Loss: 5.7063


Epoch 3:  48%|████▊     | 4322/8958 [00:42<00:41, 110.97it/s]

Batch 4300, Loss: 5.4570


Epoch 3:  49%|████▉     | 4413/8958 [00:43<00:42, 106.95it/s]

Batch 4400, Loss: 5.5386


Epoch 3:  50%|█████     | 4522/8958 [00:44<00:40, 110.30it/s]

Batch 4500, Loss: 5.6131


Epoch 3:  52%|█████▏    | 4622/8958 [00:45<00:36, 118.17it/s]

Batch 4600, Loss: 5.5193


Epoch 3:  53%|█████▎    | 4718/8958 [00:45<00:36, 116.56it/s]

Batch 4700, Loss: 5.7380


Epoch 3:  54%|█████▎    | 4812/8958 [00:46<00:45, 91.39it/s]

Batch 4800, Loss: 5.7376


Epoch 3:  55%|█████▍    | 4913/8958 [00:48<00:48, 83.76it/s]

Batch 4900, Loss: 5.6131


Epoch 3:  56%|█████▌    | 5013/8958 [00:49<00:41, 95.06it/s]

Batch 5000, Loss: 5.5506


Epoch 3:  57%|█████▋    | 5124/8958 [00:50<00:31, 119.87it/s]

Batch 5100, Loss: 5.6342


Epoch 3:  58%|█████▊    | 5222/8958 [00:51<00:31, 116.98it/s]

Batch 5200, Loss: 5.5193


Epoch 3:  59%|█████▉    | 5319/8958 [00:51<00:30, 117.51it/s]

Batch 5300, Loss: 5.6733


Epoch 3:  60%|██████    | 5416/8958 [00:52<00:32, 110.13it/s]

Batch 5400, Loss: 5.6126


Epoch 3:  62%|██████▏   | 5511/8958 [00:53<00:32, 106.47it/s]

Batch 5500, Loss: 6.0806


Epoch 3:  63%|██████▎   | 5618/8958 [00:54<00:29, 112.32it/s]

Batch 5600, Loss: 5.7068


Epoch 3:  64%|██████▍   | 5719/8958 [00:55<00:30, 107.66it/s]

Batch 5700, Loss: 5.7681


Epoch 3:  65%|██████▌   | 5823/8958 [00:56<00:28, 111.42it/s]

Batch 5800, Loss: 5.6129


Epoch 3:  66%|██████▌   | 5919/8958 [00:57<00:27, 111.96it/s]

Batch 5900, Loss: 5.8943


Epoch 3:  67%|██████▋   | 6017/8958 [00:58<00:25, 114.61it/s]

Batch 6000, Loss: 5.6750


Epoch 3:  68%|██████▊   | 6111/8958 [00:59<00:27, 102.29it/s]

Batch 6100, Loss: 5.5500


Epoch 3:  69%|██████▉   | 6214/8958 [01:00<00:34, 79.47it/s]

Batch 6200, Loss: 5.7066


Epoch 3:  70%|███████   | 6311/8958 [01:01<00:32, 82.08it/s]

Batch 6300, Loss: 5.8943


Epoch 3:  72%|███████▏  | 6413/8958 [01:02<00:23, 110.10it/s]

Batch 6400, Loss: 5.8314


Epoch 3:  73%|███████▎  | 6522/8958 [01:03<00:21, 114.80it/s]

Batch 6500, Loss: 5.5193


Epoch 3:  74%|███████▍  | 6620/8958 [01:04<00:20, 115.08it/s]

Batch 6600, Loss: 5.5761


Epoch 3:  75%|███████▍  | 6718/8958 [01:05<00:19, 115.67it/s]

Batch 6700, Loss: 5.7693


Epoch 3:  76%|███████▌  | 6818/8958 [01:06<00:17, 119.81it/s]

Batch 6800, Loss: 5.7378


Epoch 3:  77%|███████▋  | 6918/8958 [01:06<00:17, 119.92it/s]

Batch 6900, Loss: 5.6744


Epoch 3:  78%|███████▊  | 7021/8958 [01:07<00:16, 120.00it/s]

Batch 7000, Loss: 5.6142


Epoch 3:  79%|███████▉  | 7120/8958 [01:08<00:15, 115.59it/s]

Batch 7100, Loss: 5.6441


Epoch 3:  81%|████████  | 7218/8958 [01:09<00:15, 112.04it/s]

Batch 7200, Loss: 5.6444


Epoch 3:  82%|████████▏ | 7313/8958 [01:10<00:15, 107.89it/s]

Batch 7300, Loss: 5.7057


Epoch 3:  83%|████████▎ | 7417/8958 [01:11<00:14, 107.21it/s]

Batch 7400, Loss: 5.7376


Epoch 3:  84%|████████▍ | 7514/8958 [01:12<00:17, 82.22it/s]

Batch 7500, Loss: 5.7063


Epoch 3:  85%|████████▍ | 7613/8958 [01:13<00:15, 84.67it/s]

Batch 7600, Loss: 5.5243


Epoch 3:  86%|████████▌ | 7717/8958 [01:14<00:12, 99.07it/s]

Batch 7700, Loss: 5.7992


Epoch 3:  87%|████████▋ | 7817/8958 [01:15<00:10, 110.93it/s]

Batch 7800, Loss: 5.6753


Epoch 3:  88%|████████▊ | 7915/8958 [01:16<00:09, 114.21it/s]

Batch 7900, Loss: 5.6434


Epoch 3:  89%|████████▉ | 8014/8958 [01:17<00:08, 116.68it/s]

Batch 8000, Loss: 5.8621


Epoch 3:  91%|█████████ | 8112/8958 [01:18<00:08, 102.55it/s]

Batch 8100, Loss: 5.4558


Epoch 3:  92%|█████████▏| 8222/8958 [01:19<00:06, 107.77it/s]

Batch 8200, Loss: 5.7381


Epoch 3:  93%|█████████▎| 8318/8958 [01:20<00:06, 105.50it/s]

Batch 8300, Loss: 5.7693


Epoch 3:  94%|█████████▍| 8413/8958 [01:21<00:04, 116.79it/s]

Batch 8400, Loss: 6.0528


Epoch 3:  95%|█████████▌| 8518/8958 [01:22<00:04, 97.72it/s] 

Batch 8500, Loss: 5.4987


Epoch 3:  96%|█████████▌| 8619/8958 [01:23<00:02, 113.77it/s]

Batch 8600, Loss: 5.8314


Epoch 3:  97%|█████████▋| 8715/8958 [01:24<00:02, 111.12it/s]

Batch 8700, Loss: 5.8938


Epoch 3:  98%|█████████▊| 8813/8958 [01:25<00:01, 80.28it/s]

Batch 8800, Loss: 5.8620


Epoch 3:  99%|█████████▉| 8910/8958 [01:26<00:00, 80.02it/s]

Batch 8900, Loss: 5.5815


Epoch 3: 100%|██████████| 8958/8958 [01:27<00:00, 102.96it/s]


Epoch 3 completed in 87.006 seconds
Average Loss: 5.6803


Epoch 4:   0%|          | 6/8958 [00:00<02:33, 58.21it/s]

Batch 0, Loss: 5.6121


Epoch 4:   1%|▏         | 119/8958 [00:01<01:24, 104.92it/s]

Batch 100, Loss: 5.7066


Epoch 4:   2%|▏         | 211/8958 [00:02<01:21, 107.28it/s]

Batch 200, Loss: 5.6126


Epoch 4:   4%|▎         | 315/8958 [00:03<01:32, 93.55it/s]

Batch 300, Loss: 5.6856


Epoch 4:   5%|▍         | 411/8958 [00:04<01:19, 106.97it/s]

Batch 400, Loss: 5.8316


Epoch 4:   6%|▌         | 521/8958 [00:05<01:24, 99.57it/s]

Batch 500, Loss: 5.7064


Epoch 4:   7%|▋         | 615/8958 [00:06<01:20, 103.58it/s]

Batch 600, Loss: 5.7784


Epoch 4:   8%|▊         | 714/8958 [00:07<01:26, 94.94it/s]

Batch 700, Loss: 5.6128


Epoch 4:   9%|▉         | 813/8958 [00:08<01:17, 105.22it/s]

Batch 800, Loss: 5.8631


Epoch 4:  10%|█         | 912/8958 [00:09<01:22, 97.25it/s]

Batch 900, Loss: 5.7688


Epoch 4:  11%|█▏        | 1012/8958 [00:10<01:24, 94.39it/s]

Batch 1000, Loss: 5.6104


Epoch 4:  12%|█▏        | 1112/8958 [00:11<01:42, 76.53it/s]

Batch 1100, Loss: 5.6143


Epoch 4:  14%|█▎        | 1210/8958 [00:12<01:46, 72.97it/s]

Batch 1200, Loss: 5.5810


Epoch 4:  15%|█▍        | 1310/8958 [00:13<01:24, 90.12it/s]

Batch 1300, Loss: 5.8311


Epoch 4:  16%|█▌        | 1412/8958 [00:15<01:23, 90.36it/s]

Batch 1400, Loss: 5.7687


Epoch 4:  17%|█▋        | 1510/8958 [00:15<01:14, 100.56it/s]

Batch 1500, Loss: 5.6147


Epoch 4:  18%|█▊        | 1611/8958 [00:17<01:19, 91.93it/s]

Batch 1600, Loss: 5.7087


Epoch 4:  19%|█▉        | 1717/8958 [00:18<01:09, 103.80it/s]

Batch 1700, Loss: 5.7037


Epoch 4:  20%|██        | 1817/8958 [00:19<01:10, 101.62it/s]

Batch 1800, Loss: 5.8520


Epoch 4:  21%|██▏       | 1915/8958 [00:20<01:08, 103.30it/s]

Batch 1900, Loss: 5.8464


Epoch 4:  23%|██▎       | 2016/8958 [00:21<01:04, 106.87it/s]

Batch 2000, Loss: 5.5800


Epoch 4:  24%|██▎       | 2114/8958 [00:22<01:10, 97.36it/s]

Batch 2100, Loss: 5.6747


Epoch 4:  25%|██▍       | 2210/8958 [00:22<01:07, 99.55it/s] 

Batch 2200, Loss: 5.6451


Epoch 4:  26%|██▌       | 2315/8958 [00:24<01:30, 73.30it/s]

Batch 2300, Loss: 5.8084


Epoch 4:  27%|██▋       | 2413/8958 [00:25<01:22, 79.19it/s]

Batch 2400, Loss: 5.7384


Epoch 4:  28%|██▊       | 2518/8958 [00:26<01:02, 102.58it/s]

Batch 2500, Loss: 5.6161


Epoch 4:  29%|██▉       | 2614/8958 [00:27<01:04, 97.68it/s]

Batch 2600, Loss: 5.8310


Epoch 4:  30%|███       | 2721/8958 [00:28<00:59, 104.28it/s]

Batch 2700, Loss: 6.0088


Epoch 4:  31%|███▏      | 2815/8958 [00:29<01:03, 96.88it/s]

Batch 2800, Loss: 6.0179


Epoch 4:  33%|███▎      | 2915/8958 [00:30<00:58, 104.01it/s]

Batch 2900, Loss: 5.6743


Epoch 4:  34%|███▎      | 3020/8958 [00:31<01:00, 98.62it/s]

Batch 3000, Loss: 5.7059


Epoch 4:  35%|███▍      | 3113/8958 [00:32<01:02, 93.52it/s]

Batch 3100, Loss: 5.5499


Epoch 4:  36%|███▌      | 3215/8958 [00:33<00:58, 98.71it/s]

Batch 3200, Loss: 5.8698


Epoch 4:  37%|███▋      | 3316/8958 [00:34<00:57, 98.08it/s]

Batch 3300, Loss: 5.7325


Epoch 4:  38%|███▊      | 3414/8958 [00:35<01:07, 81.60it/s]

Batch 3400, Loss: 5.8647


Epoch 4:  39%|███▉      | 3512/8958 [00:37<01:12, 74.77it/s]

Batch 3500, Loss: 5.8936


Epoch 4:  40%|████      | 3612/8958 [00:38<01:01, 86.89it/s]

Batch 3600, Loss: 5.4256


Epoch 4:  41%|████▏     | 3713/8958 [00:39<00:56, 93.43it/s]

Batch 3700, Loss: 5.6832


Epoch 4:  43%|████▎     | 3813/8958 [00:40<00:53, 95.51it/s]

Batch 3800, Loss: 5.6560


Epoch 4:  44%|████▎     | 3914/8958 [00:41<00:51, 98.58it/s]

Batch 3900, Loss: 5.6962


Epoch 4:  45%|████▍     | 4015/8958 [00:42<00:52, 93.83it/s]

Batch 4000, Loss: 5.8004


Epoch 4:  46%|████▌     | 4116/8958 [00:44<00:52, 92.87it/s]

Batch 4100, Loss: 5.7031


Epoch 4:  47%|████▋     | 4212/8958 [00:45<00:53, 88.11it/s]

Batch 4200, Loss: 5.7705


Epoch 4:  48%|████▊     | 4317/8958 [00:46<00:51, 89.67it/s]

Batch 4300, Loss: 5.6756


Epoch 4:  49%|████▉     | 4412/8958 [00:47<00:51, 87.70it/s]

Batch 4400, Loss: 5.5883


Epoch 4:  50%|█████     | 4512/8958 [00:48<01:00, 73.06it/s]

Batch 4500, Loss: 5.6755


Epoch 4:  52%|█████▏    | 4614/8958 [00:50<01:07, 64.78it/s]

Batch 4600, Loss: 5.8314


Epoch 4:  53%|█████▎    | 4712/8958 [00:51<00:50, 83.47it/s]

Batch 4700, Loss: 5.6747


Epoch 4:  54%|█████▎    | 4811/8958 [00:52<00:45, 90.47it/s]

Batch 4800, Loss: 5.5510


Epoch 4:  55%|█████▍    | 4918/8958 [00:53<00:44, 90.47it/s]

Batch 4900, Loss: 5.7065


Epoch 4:  56%|█████▌    | 5012/8958 [00:54<00:43, 89.98it/s]

Batch 5000, Loss: 5.8630


Epoch 4:  57%|█████▋    | 5111/8958 [00:55<00:41, 93.12it/s]

Batch 5100, Loss: 5.5817


Epoch 4:  58%|█████▊    | 5211/8958 [00:56<00:39, 93.74it/s]

Batch 5200, Loss: 5.9564


Epoch 4:  59%|█████▉    | 5311/8958 [00:58<00:41, 88.05it/s]

Batch 5300, Loss: 5.5812


Epoch 4:  60%|██████    | 5409/8958 [00:59<00:39, 90.44it/s]

Batch 5400, Loss: 5.7061


Epoch 4:  62%|██████▏   | 5517/8958 [01:00<00:37, 91.03it/s]

Batch 5500, Loss: 5.8310


Epoch 4:  63%|██████▎   | 5613/8958 [01:01<00:44, 74.36it/s]

Batch 5600, Loss: 5.6713


Epoch 4:  64%|██████▎   | 5709/8958 [01:02<00:43, 74.71it/s]

Batch 5700, Loss: 5.5813


Epoch 4:  65%|██████▍   | 5811/8958 [01:04<00:36, 86.28it/s]

Batch 5800, Loss: 5.8631


Epoch 4:  66%|██████▌   | 5912/8958 [01:05<00:32, 93.00it/s]

Batch 5900, Loss: 5.7654


Epoch 4:  67%|██████▋   | 6015/8958 [01:06<00:29, 100.35it/s]

Batch 6000, Loss: 5.7073


Epoch 4:  68%|██████▊   | 6119/8958 [01:07<00:29, 96.39it/s]

Batch 6100, Loss: 5.6938


Epoch 4:  69%|██████▉   | 6219/8958 [01:08<00:29, 93.56it/s]

Batch 6200, Loss: 5.8751


Epoch 4:  70%|███████   | 6311/8958 [01:09<00:27, 96.15it/s]

Batch 6300, Loss: 5.6443


Epoch 4:  72%|███████▏  | 6412/8958 [01:10<00:26, 96.16it/s]

Batch 6400, Loss: 5.6443


Epoch 4:  73%|███████▎  | 6513/8958 [01:11<00:24, 98.65it/s]

Batch 6500, Loss: 5.5345


Epoch 4:  74%|███████▍  | 6614/8958 [01:12<00:25, 93.25it/s]

Batch 6600, Loss: 5.8648


Epoch 4:  75%|███████▍  | 6713/8958 [01:13<00:25, 87.42it/s]

Batch 6700, Loss: 5.5529


Epoch 4:  76%|███████▌  | 6810/8958 [01:15<00:30, 70.43it/s]

Batch 6800, Loss: 5.5875


Epoch 4:  77%|███████▋  | 6916/8958 [01:16<00:26, 77.28it/s]

Batch 6900, Loss: 5.4904


Epoch 4:  78%|███████▊  | 7014/8958 [01:17<00:19, 97.77it/s]

Batch 7000, Loss: 5.7938


Epoch 4:  79%|███████▉  | 7116/8958 [01:18<00:18, 97.44it/s]

Batch 7100, Loss: 5.5722


Epoch 4:  81%|████████  | 7218/8958 [01:20<00:21, 81.56it/s]

Batch 7200, Loss: 5.6759


Epoch 4:  82%|████████▏ | 7311/8958 [01:21<00:17, 93.59it/s]

Batch 7300, Loss: 5.7384


Epoch 4:  83%|████████▎ | 7411/8958 [01:22<00:16, 93.34it/s]

Batch 7400, Loss: 5.8001


Epoch 4:  84%|████████▍ | 7513/8958 [01:23<00:14, 97.19it/s]

Batch 7500, Loss: 5.7694


Epoch 4:  85%|████████▌ | 7620/8958 [01:24<00:13, 96.94it/s]

Batch 7600, Loss: 5.7068


Epoch 4:  86%|████████▌ | 7720/8958 [01:25<00:13, 94.19it/s]

Batch 7700, Loss: 5.7056


Epoch 4:  87%|████████▋ | 7816/8958 [01:26<00:10, 106.16it/s]

Batch 7800, Loss: 5.6452


Epoch 4:  88%|████████▊ | 7909/8958 [01:27<00:14, 73.22it/s]

Batch 7900, Loss: 5.7722


Epoch 4:  89%|████████▉ | 8013/8958 [01:28<00:12, 73.27it/s]

Batch 8000, Loss: 5.3007


Epoch 4:  91%|█████████ | 8113/8958 [01:30<00:08, 99.37it/s]

Batch 8100, Loss: 5.5188


Epoch 4:  92%|█████████▏| 8220/8958 [01:31<00:07, 99.13it/s]

Batch 8200, Loss: 5.4254


Epoch 4:  93%|█████████▎| 8315/8958 [01:32<00:06, 94.33it/s]

Batch 8300, Loss: 5.6514


Epoch 4:  94%|█████████▍| 8419/8958 [01:33<00:05, 99.81it/s]

Batch 8400, Loss: 5.6130


Epoch 4:  95%|█████████▌| 8516/8958 [01:34<00:04, 104.28it/s]

Batch 8500, Loss: 5.7981


Epoch 4:  96%|█████████▌| 8619/8958 [01:35<00:03, 92.12it/s]

Batch 8600, Loss: 5.8312


Epoch 4:  97%|█████████▋| 8719/8958 [01:36<00:02, 95.86it/s]

Batch 8700, Loss: 5.4879


Epoch 4:  98%|█████████▊| 8812/8958 [01:37<00:01, 96.02it/s]

Batch 8800, Loss: 5.6126


Epoch 4: 100%|█████████▉| 8914/8958 [01:38<00:00, 96.00it/s]

Batch 8900, Loss: 5.7068


Epoch 4: 100%|██████████| 8958/8958 [01:38<00:00, 90.71it/s]


Epoch 4 completed in 98.762 seconds
Average Loss: 5.6734


Epoch 5:   0%|          | 8/8958 [00:00<01:52, 79.21it/s]

Batch 0, Loss: 5.7064


Epoch 5:   1%|          | 109/8958 [00:01<02:00, 73.58it/s]

Batch 100, Loss: 5.6527


Epoch 5:   2%|▏         | 214/8958 [00:02<01:59, 73.28it/s]

Batch 200, Loss: 5.5656


Epoch 5:   3%|▎         | 312/8958 [00:03<01:27, 99.30it/s]

Batch 300, Loss: 5.7971


Epoch 5:   5%|▍         | 414/8958 [00:04<01:28, 96.77it/s]

Batch 400, Loss: 5.7684


Epoch 5:   6%|▌         | 517/8958 [00:05<01:22, 102.12it/s]

Batch 500, Loss: 5.4464


Epoch 5:   7%|▋         | 618/8958 [00:06<01:17, 108.29it/s]

Batch 600, Loss: 5.6771


Epoch 5:   8%|▊         | 714/8958 [00:07<01:23, 98.28it/s]

Batch 700, Loss: 5.5503


Epoch 5:   9%|▉         | 813/8958 [00:08<01:19, 102.35it/s]

Batch 800, Loss: 5.5552


Epoch 5:  10%|█         | 916/8958 [00:09<01:22, 97.60it/s]

Batch 900, Loss: 5.8042


Epoch 5:  11%|█▏        | 1019/8958 [00:11<01:19, 99.85it/s]

Batch 1000, Loss: 5.5190


Epoch 5:  12%|█▏        | 1116/8958 [00:11<01:16, 102.25it/s]

Batch 1100, Loss: 5.7063


Epoch 5:  14%|█▎        | 1219/8958 [00:12<01:16, 100.55it/s]

Batch 1200, Loss: 5.6131


Epoch 5:  15%|█▍        | 1316/8958 [00:14<01:33, 81.59it/s]

Batch 1300, Loss: 5.6798


Epoch 5:  16%|█▌        | 1414/8958 [00:15<01:48, 69.46it/s]

Batch 1400, Loss: 5.6759


Epoch 5:  17%|█▋        | 1512/8958 [00:16<01:14, 100.07it/s]

Batch 1500, Loss: 5.8422


Epoch 5:  18%|█▊        | 1612/8958 [00:17<01:11, 102.34it/s]

Batch 1600, Loss: 5.6120


Epoch 5:  19%|█▉        | 1719/8958 [00:18<01:14, 96.86it/s]

Batch 1700, Loss: 5.7369


Epoch 5:  20%|██        | 1818/8958 [00:19<01:10, 101.09it/s]

Batch 1800, Loss: 5.6092


Epoch 5:  21%|██▏       | 1918/8958 [00:20<01:08, 102.93it/s]

Batch 1900, Loss: 5.5810


Epoch 5:  23%|██▎       | 2017/8958 [00:21<01:03, 108.72it/s]

Batch 2000, Loss: 5.7065


Epoch 5:  24%|██▎       | 2118/8958 [00:22<01:04, 105.85it/s]

Batch 2100, Loss: 5.6440


Epoch 5:  25%|██▍       | 2214/8958 [00:23<00:58, 114.84it/s]

Batch 2200, Loss: 5.6790


Epoch 5:  26%|██▌       | 2325/8958 [00:24<00:55, 119.92it/s]

Batch 2300, Loss: 5.6733


Epoch 5:  27%|██▋       | 2419/8958 [00:25<01:01, 106.45it/s]

Batch 2400, Loss: 5.6534


Epoch 5:  28%|██▊       | 2517/8958 [00:26<01:11, 89.91it/s]

Batch 2500, Loss: 5.4555


Epoch 5:  29%|██▉       | 2615/8958 [00:27<01:25, 74.05it/s]

Batch 2600, Loss: 5.4568


Epoch 5:  30%|███       | 2710/8958 [00:28<01:27, 71.61it/s]

Batch 2700, Loss: 5.5876


Epoch 5:  31%|███▏      | 2812/8958 [00:29<01:03, 97.05it/s]

Batch 2800, Loss: 5.9207


Epoch 5:  33%|███▎      | 2920/8958 [00:30<00:57, 105.75it/s]

Batch 2900, Loss: 5.7378


Epoch 5:  34%|███▎      | 3020/8958 [00:31<00:57, 102.59it/s]

Batch 3000, Loss: 5.5733


Epoch 5:  35%|███▍      | 3117/8958 [00:32<00:59, 97.46it/s]

Batch 3100, Loss: 5.6734


Epoch 5:  36%|███▌      | 3222/8958 [00:33<00:56, 101.60it/s]

Batch 3200, Loss: 5.6465


Epoch 5:  37%|███▋      | 3324/8958 [00:34<00:52, 108.17it/s]

Batch 3300, Loss: 5.6129


Epoch 5:  38%|███▊      | 3420/8958 [00:35<00:51, 106.72it/s]

Batch 3400, Loss: 5.7375


Epoch 5:  39%|███▉      | 3522/8958 [00:36<00:50, 108.57it/s]

Batch 3500, Loss: 5.4560


Epoch 5:  40%|████      | 3621/8958 [00:37<00:51, 102.66it/s]

Batch 3600, Loss: 5.6131


Epoch 5:  42%|████▏     | 3721/8958 [00:38<00:50, 103.74it/s]

Batch 3700, Loss: 5.8316


Epoch 5:  43%|████▎     | 3809/8958 [00:39<01:04, 79.96it/s]

Batch 3800, Loss: 5.9878


Epoch 5:  44%|████▎     | 3912/8958 [00:41<01:07, 75.13it/s]

Batch 3900, Loss: 5.7696


Epoch 5:  45%|████▍     | 4011/8958 [00:42<00:49, 99.29it/s]

Batch 4000, Loss: 5.5836


Epoch 5:  46%|████▌     | 4112/8958 [00:43<00:45, 105.96it/s]

Batch 4100, Loss: 5.5499


Epoch 5:  47%|████▋     | 4212/8958 [00:44<00:46, 101.93it/s]

Batch 4200, Loss: 5.4247


Epoch 5:  48%|████▊     | 4311/8958 [00:45<00:47, 98.87it/s]

Batch 4300, Loss: 5.5005


Epoch 5:  49%|████▉     | 4411/8958 [00:46<00:46, 98.84it/s] 

Batch 4400, Loss: 5.7127


Epoch 5:  50%|█████     | 4511/8958 [00:47<00:42, 104.20it/s]

Batch 4500, Loss: 5.5247


Epoch 5:  51%|█████▏    | 4611/8958 [00:48<00:43, 99.61it/s] 

Batch 4600, Loss: 5.5238


Epoch 5:  53%|█████▎    | 4721/8958 [00:49<00:38, 109.08it/s]

Batch 4700, Loss: 5.7725


Epoch 5:  54%|█████▎    | 4806/8958 [00:49<00:36, 114.97it/s]

Batch 4800, Loss: 5.6912


Epoch 5:  55%|█████▍    | 4902/8958 [00:52<02:59, 22.54it/s]

Batch 4900, Loss: 5.7640


Epoch 5:  56%|█████▌    | 5008/8958 [00:56<01:16, 51.69it/s]

Batch 5000, Loss: 5.5910


Epoch 5:  57%|█████▋    | 5108/8958 [00:58<01:04, 60.15it/s]

Batch 5100, Loss: 5.5498


Epoch 5:  58%|█████▊    | 5220/8958 [00:59<00:34, 109.25it/s]

Batch 5200, Loss: 5.8539


Epoch 5:  59%|█████▉    | 5319/8958 [01:00<00:31, 113.89it/s]

Batch 5300, Loss: 5.9708


Epoch 5:  61%|██████    | 5423/8958 [01:01<00:32, 109.08it/s]

Batch 5400, Loss: 5.6123


Epoch 5:  62%|██████▏   | 5520/8958 [01:01<00:30, 114.07it/s]

Batch 5500, Loss: 5.7429


Epoch 5:  63%|██████▎   | 5620/8958 [01:02<00:27, 119.65it/s]

Batch 5600, Loss: 5.6694


Epoch 5:  64%|██████▍   | 5720/8958 [01:03<00:26, 120.03it/s]

Batch 5700, Loss: 5.5839


Epoch 5:  65%|██████▌   | 5824/8958 [01:04<00:25, 123.16it/s]

Batch 5800, Loss: 5.6754


Epoch 5:  66%|██████▌   | 5924/8958 [01:05<00:26, 115.75it/s]

Batch 5900, Loss: 5.7379


Epoch 5:  67%|██████▋   | 6021/8958 [01:06<00:25, 113.76it/s]

Batch 6000, Loss: 5.5739


Epoch 5:  68%|██████▊   | 6118/8958 [01:07<00:24, 116.02it/s]

Batch 6100, Loss: 5.6297


Epoch 5:  69%|██████▉   | 6215/8958 [01:07<00:24, 112.17it/s]

Batch 6200, Loss: 5.6126


Epoch 5:  71%|███████   | 6319/8958 [01:09<00:29, 88.48it/s]

Batch 6300, Loss: 5.6444


Epoch 5:  72%|███████▏  | 6410/8958 [01:10<00:30, 83.55it/s]

Batch 6400, Loss: 5.7686


Epoch 5:  73%|███████▎  | 6518/8958 [01:11<00:23, 103.94it/s]

Batch 6500, Loss: 5.6753


Epoch 5:  74%|███████▍  | 6615/8958 [01:12<00:19, 117.34it/s]

Batch 6600, Loss: 5.6772


Epoch 5:  75%|███████▌  | 6723/8958 [01:13<00:19, 114.82it/s]

Batch 6700, Loss: 5.8768


Epoch 5:  76%|███████▌  | 6820/8958 [01:13<00:18, 113.46it/s]

Batch 6800, Loss: 5.8362


Epoch 5:  77%|███████▋  | 6917/8958 [01:14<00:18, 112.82it/s]

Batch 6900, Loss: 6.0276


Epoch 5:  78%|███████▊  | 7015/8958 [01:15<00:17, 110.81it/s]

Batch 7000, Loss: 5.6173


Epoch 5:  79%|███████▉  | 7114/8958 [01:16<00:15, 118.38it/s]

Batch 7100, Loss: 5.4926


Epoch 5:  81%|████████  | 7222/8958 [01:17<00:15, 115.54it/s]

Batch 7200, Loss: 5.7693


Epoch 5:  82%|████████▏ | 7320/8958 [01:18<00:14, 115.36it/s]

Batch 7300, Loss: 5.6756


Epoch 5:  83%|████████▎ | 7416/8958 [01:19<00:13, 112.10it/s]

Batch 7400, Loss: 5.5492


Epoch 5:  84%|████████▍ | 7513/8958 [01:20<00:12, 113.85it/s]

Batch 7500, Loss: 5.6125


Epoch 5:  85%|████████▌ | 7624/8958 [01:21<00:11, 115.65it/s]

Batch 7600, Loss: 5.8178


Epoch 5:  86%|████████▌ | 7716/8958 [01:22<00:14, 84.80it/s]

Batch 7700, Loss: 5.8838


Epoch 5:  87%|████████▋ | 7817/8958 [01:23<00:14, 80.78it/s]

Batch 7800, Loss: 5.5818


Epoch 5:  88%|████████▊ | 7920/8958 [01:24<00:09, 109.18it/s]

Batch 7900, Loss: 5.7442


Epoch 5:  90%|████████▉ | 8018/8958 [01:25<00:08, 113.68it/s]

Batch 8000, Loss: 5.5586


Epoch 5:  91%|█████████ | 8114/8958 [01:26<00:07, 114.28it/s]

Batch 8100, Loss: 5.8005


Epoch 5:  92%|█████████▏| 8223/8958 [01:27<00:06, 115.88it/s]

Batch 8200, Loss: 5.8038


Epoch 5:  93%|█████████▎| 8317/8958 [01:27<00:05, 107.80it/s]

Batch 8300, Loss: 5.8944


Epoch 5:  94%|█████████▍| 8421/8958 [01:28<00:04, 109.84it/s]

Batch 8400, Loss: 5.5836


Epoch 5:  95%|█████████▌| 8513/8958 [01:29<00:04, 110.60it/s]

Batch 8500, Loss: 5.4878


Epoch 5:  96%|█████████▌| 8614/8958 [01:30<00:03, 107.67it/s]

Batch 8600, Loss: 5.5813


Epoch 5:  97%|█████████▋| 8722/8958 [01:31<00:02, 110.63it/s]

Batch 8700, Loss: 5.6358


Epoch 5:  98%|█████████▊| 8817/8958 [01:32<00:01, 105.78it/s]

Batch 8800, Loss: 5.8007


Epoch 5:  99%|█████████▉| 8911/8958 [01:33<00:00, 106.87it/s]

Batch 8900, Loss: 5.6121


Epoch 5: 100%|██████████| 8958/8958 [01:33<00:00, 95.47it/s] 


Epoch 5 completed in 93.833 seconds
Average Loss: 5.6666


Epoch 6:   0%|          | 6/8958 [00:00<02:35, 57.69it/s]

Batch 0, Loss: 5.5502


Epoch 6:   1%|▏         | 116/8958 [00:01<01:39, 88.72it/s]

Batch 100, Loss: 5.6442


Epoch 6:   2%|▏         | 208/8958 [00:02<01:47, 81.13it/s]

Batch 200, Loss: 5.5080


Epoch 6:   4%|▎         | 320/8958 [00:03<01:22, 104.30it/s]

Batch 300, Loss: 5.7746


Epoch 6:   5%|▍         | 420/8958 [00:04<01:20, 105.44it/s]

Batch 400, Loss: 5.9734


Epoch 6:   6%|▌         | 520/8958 [00:05<01:18, 107.19it/s]

Batch 500, Loss: 5.5513


Epoch 6:   7%|▋         | 622/8958 [00:06<01:15, 110.46it/s]

Batch 600, Loss: 5.6434


Epoch 6:   8%|▊         | 718/8958 [00:07<01:12, 113.68it/s]

Batch 700, Loss: 5.7492


Epoch 6:   9%|▉         | 823/8958 [00:08<01:13, 110.20it/s]

Batch 800, Loss: 5.8682


Epoch 6:  10%|█         | 913/8958 [00:09<01:14, 107.50it/s]

Batch 900, Loss: 5.8926


Epoch 6:  11%|█▏        | 1019/8958 [00:10<01:12, 109.23it/s]

Batch 1000, Loss: 5.6039


Epoch 6:  12%|█▏        | 1119/8958 [00:11<01:14, 104.80it/s]

Batch 1100, Loss: 5.6490


Epoch 6:  14%|█▎        | 1218/8958 [00:12<01:14, 104.36it/s]

Batch 1200, Loss: 5.8027


Epoch 6:  15%|█▍        | 1311/8958 [00:12<01:12, 105.22it/s]

Batch 1300, Loss: 5.7728


Epoch 6:  16%|█▌        | 1413/8958 [00:14<01:37, 77.39it/s]

Batch 1400, Loss: 5.7630


Epoch 6:  17%|█▋        | 1516/8958 [00:15<01:39, 74.45it/s]

Batch 1500, Loss: 5.7689


Epoch 6:  18%|█▊        | 1623/8958 [00:16<01:09, 105.90it/s]

Batch 1600, Loss: 5.7377


Epoch 6:  19%|█▉        | 1715/8958 [00:17<01:09, 103.93it/s]

Batch 1700, Loss: 5.6161


Epoch 6:  20%|██        | 1820/8958 [00:18<01:04, 111.39it/s]

Batch 1800, Loss: 5.4881


Epoch 6:  21%|██▏       | 1916/8958 [00:19<01:04, 109.40it/s]

Batch 1900, Loss: 5.6681


Epoch 6:  23%|██▎       | 2023/8958 [00:20<01:02, 110.65it/s]

Batch 2000, Loss: 5.6440


Epoch 6:  24%|██▎       | 2113/8958 [00:21<01:03, 107.53it/s]

Batch 2100, Loss: 5.4776


Epoch 6:  25%|██▍       | 2215/8958 [00:22<01:03, 106.52it/s]

Batch 2200, Loss: 5.7068


Epoch 6:  26%|██▌       | 2321/8958 [00:23<00:59, 111.04it/s]

Batch 2300, Loss: 5.5818


Epoch 6:  27%|██▋       | 2413/8958 [00:23<01:01, 106.30it/s]

Batch 2400, Loss: 5.5693


Epoch 6:  28%|██▊       | 2515/8958 [00:24<00:58, 109.53it/s]

Batch 2500, Loss: 5.5896


Epoch 6:  29%|██▉       | 2610/8958 [00:25<00:56, 112.61it/s]

Batch 2600, Loss: 5.7687


Epoch 6:  30%|███       | 2715/8958 [00:27<01:15, 82.97it/s]

Batch 2700, Loss: 5.5662


Epoch 6:  31%|███▏      | 2809/8958 [00:28<01:22, 74.85it/s]

Batch 2800, Loss: 5.6198


Epoch 6:  33%|███▎      | 2922/8958 [00:29<00:57, 105.14it/s]

Batch 2900, Loss: 5.5555


Epoch 6:  34%|███▎      | 3013/8958 [00:30<00:54, 109.50it/s]

Batch 3000, Loss: 5.7063


Epoch 6:  35%|███▍      | 3114/8958 [00:31<00:58, 99.82it/s] 

Batch 3100, Loss: 5.7063


Epoch 6:  36%|███▌      | 3214/8958 [00:32<00:53, 107.24it/s]

Batch 3200, Loss: 5.6451


Epoch 6:  37%|███▋      | 3314/8958 [00:33<00:53, 106.47it/s]

Batch 3300, Loss: 5.8642


Epoch 6:  38%|███▊      | 3419/8958 [00:34<00:51, 108.01it/s]

Batch 3400, Loss: 5.7031


Epoch 6:  39%|███▉      | 3522/8958 [00:35<00:50, 107.74it/s]

Batch 3500, Loss: 5.6448


Epoch 6:  40%|████      | 3616/8958 [00:35<00:50, 106.24it/s]

Batch 3600, Loss: 5.7739


Epoch 6:  42%|████▏     | 3721/8958 [00:36<00:48, 107.02it/s]

Batch 3700, Loss: 5.7066


Epoch 6:  43%|████▎     | 3822/8958 [00:37<00:47, 107.60it/s]

Batch 3800, Loss: 5.7387


Epoch 6:  44%|████▎     | 3915/8958 [00:38<00:53, 93.90it/s]

Batch 3900, Loss: 5.7381


Epoch 6:  45%|████▍     | 4014/8958 [00:40<01:03, 77.63it/s]

Batch 4000, Loss: 5.6335


Epoch 6:  46%|████▌     | 4110/8958 [00:41<01:06, 72.89it/s]

Batch 4100, Loss: 5.5218


Epoch 6:  47%|████▋     | 4213/8958 [00:42<00:44, 106.28it/s]

Batch 4200, Loss: 5.6443


Epoch 6:  48%|████▊     | 4318/8958 [00:43<00:43, 107.82it/s]

Batch 4300, Loss: 5.6445


Epoch 6:  49%|████▉     | 4417/8958 [00:44<00:43, 105.16it/s]

Batch 4400, Loss: 5.7012


Epoch 6:  50%|█████     | 4523/8958 [00:45<00:39, 111.27it/s]

Batch 4500, Loss: 5.5846


Epoch 6:  52%|█████▏    | 4619/8958 [00:46<00:39, 111.01it/s]

Batch 4600, Loss: 5.7056


Epoch 6:  53%|█████▎    | 4722/8958 [00:47<00:38, 109.72it/s]

Batch 4700, Loss: 5.7998


Epoch 6:  54%|█████▎    | 4814/8958 [00:47<00:36, 113.03it/s]

Batch 4800, Loss: 5.4879


Epoch 6:  55%|█████▍    | 4920/8958 [00:48<00:36, 109.55it/s]

Batch 4900, Loss: 5.8169


Epoch 6:  56%|█████▌    | 5022/8958 [00:49<00:36, 107.88it/s]

Batch 5000, Loss: 5.5181


Epoch 6:  57%|█████▋    | 5115/8958 [00:50<00:35, 106.98it/s]

Batch 5100, Loss: 5.5475


Epoch 6:  58%|█████▊    | 5209/8958 [00:51<00:39, 94.30it/s] 

Batch 5200, Loss: 5.5210


Epoch 6:  59%|█████▉    | 5318/8958 [00:52<00:44, 81.11it/s]

Batch 5300, Loss: 5.8476


Epoch 6:  60%|██████    | 5410/8958 [00:54<00:45, 77.73it/s]

Batch 5400, Loss: 5.5263


Epoch 6:  62%|██████▏   | 5516/8958 [00:55<00:31, 107.65it/s]

Batch 5500, Loss: 5.7915


Epoch 6:  63%|██████▎   | 5618/8958 [00:56<00:30, 109.96it/s]

Batch 5600, Loss: 5.5850


Epoch 6:  64%|██████▍   | 5714/8958 [00:57<00:29, 111.01it/s]

Batch 5700, Loss: 5.4277


Epoch 6:  65%|██████▌   | 5823/8958 [00:57<00:28, 110.77it/s]

Batch 5800, Loss: 5.5549


Epoch 6:  66%|██████▌   | 5918/8958 [00:58<00:28, 106.12it/s]

Batch 5900, Loss: 5.7383


Epoch 6:  67%|██████▋   | 6021/8958 [00:59<00:26, 108.83it/s]

Batch 6000, Loss: 5.5506


Epoch 6:  68%|██████▊   | 6116/8958 [01:00<00:25, 111.15it/s]

Batch 6100, Loss: 5.5833


Epoch 6:  69%|██████▉   | 6219/8958 [01:01<00:24, 109.75it/s]

Batch 6200, Loss: 5.6559


Epoch 6:  70%|███████   | 6314/8958 [01:02<00:24, 109.43it/s]

Batch 6300, Loss: 5.6593


Epoch 6:  72%|███████▏  | 6414/8958 [01:03<00:24, 103.21it/s]

Batch 6400, Loss: 5.9129


Epoch 6:  73%|███████▎  | 6513/8958 [01:04<00:26, 93.38it/s] 

Batch 6500, Loss: 5.6263


Epoch 6:  74%|███████▍  | 6613/8958 [01:05<00:27, 84.23it/s]

Batch 6600, Loss: 5.7986


Epoch 6:  75%|███████▍  | 6715/8958 [01:07<00:30, 74.24it/s]

Batch 6700, Loss: 5.6121


Epoch 6:  76%|███████▌  | 6817/8958 [01:08<00:20, 102.39it/s]

Batch 6800, Loss: 5.6619


Epoch 6:  77%|███████▋  | 6916/8958 [01:09<00:19, 103.78it/s]

Batch 6900, Loss: 5.7063


Epoch 6:  78%|███████▊  | 7017/8958 [01:09<00:18, 107.29it/s]

Batch 7000, Loss: 5.6439


Epoch 6:  79%|███████▉  | 7119/8958 [01:10<00:16, 110.14it/s]

Batch 7100, Loss: 5.6520


Epoch 6:  81%|████████  | 7213/8958 [01:11<00:15, 111.06it/s]

Batch 7200, Loss: 5.7242


Epoch 6:  82%|████████▏ | 7322/8958 [01:12<00:14, 114.05it/s]

Batch 7300, Loss: 5.7870


Epoch 6:  83%|████████▎ | 7417/8958 [01:13<00:14, 108.26it/s]

Batch 7400, Loss: 5.6549


Epoch 6:  84%|████████▍ | 7521/8958 [01:14<00:13, 106.21it/s]

Batch 7500, Loss: 5.6443


Epoch 6:  85%|████████▍ | 7612/8958 [01:15<00:12, 106.89it/s]

Batch 7600, Loss: 5.6436


Epoch 6:  86%|████████▌ | 7712/8958 [01:16<00:12, 103.66it/s]

Batch 7700, Loss: 5.7693


Epoch 6:  87%|████████▋ | 7811/8958 [01:17<00:12, 94.74it/s] 

Batch 7800, Loss: 5.5524


Epoch 6:  88%|████████▊ | 7909/8958 [01:18<00:13, 76.21it/s]

Batch 7900, Loss: 5.6124


Epoch 6:  89%|████████▉ | 8011/8958 [01:19<00:12, 74.47it/s]

Batch 8000, Loss: 5.6131


Epoch 6:  91%|█████████ | 8119/8958 [01:21<00:07, 105.15it/s]

Batch 8100, Loss: 5.7070


Epoch 6:  92%|█████████▏| 8218/8958 [01:21<00:07, 104.19it/s]

Batch 8200, Loss: 5.6442


Epoch 6:  93%|█████████▎| 8317/8958 [01:22<00:06, 100.57it/s]

Batch 8300, Loss: 5.8600


Epoch 6:  94%|█████████▍| 8416/8958 [01:23<00:05, 100.26it/s]

Batch 8400, Loss: 5.5444


Epoch 6:  95%|█████████▌| 8517/8958 [01:24<00:04, 104.34it/s]

Batch 8500, Loss: 5.5873


Epoch 6:  96%|█████████▌| 8616/8958 [01:25<00:03, 103.38it/s]

Batch 8600, Loss: 5.7654


Epoch 6:  97%|█████████▋| 8716/8958 [01:26<00:02, 105.10it/s]

Batch 8700, Loss: 5.9240


Epoch 6:  98%|█████████▊| 8818/8958 [01:27<00:01, 107.04it/s]

Batch 8800, Loss: 5.5809


Epoch 6: 100%|█████████▉| 8919/8958 [01:28<00:00, 106.47it/s]

Batch 8900, Loss: 5.7359


Epoch 6: 100%|██████████| 8958/8958 [01:29<00:00, 100.46it/s]


Epoch 6 completed in 89.180 seconds
Average Loss: 5.6638


Epoch 7:   0%|          | 8/8958 [00:00<01:52, 79.84it/s]

Batch 0, Loss: 5.6846


Epoch 7:   1%|          | 109/8958 [00:01<01:38, 89.41it/s]

Batch 100, Loss: 5.6907


Epoch 7:   2%|▏         | 217/8958 [00:02<01:49, 79.87it/s]

Batch 200, Loss: 5.7682


Epoch 7:   3%|▎         | 310/8958 [00:03<01:52, 76.55it/s]

Batch 300, Loss: 5.7437


Epoch 7:   5%|▍         | 416/8958 [00:04<01:20, 105.71it/s]

Batch 400, Loss: 5.8000


Epoch 7:   6%|▌         | 515/8958 [00:05<01:19, 105.95it/s]

Batch 500, Loss: 5.8630


Epoch 7:   7%|▋         | 615/8958 [00:06<01:21, 102.81it/s]

Batch 600, Loss: 5.9235


Epoch 7:   8%|▊         | 714/8958 [00:07<01:19, 104.27it/s]

Batch 700, Loss: 5.6753


Epoch 7:   9%|▉         | 812/8958 [00:08<01:21, 100.29it/s]

Batch 800, Loss: 5.6989


Epoch 7:  10%|█         | 913/8958 [00:09<01:17, 103.83it/s]

Batch 900, Loss: 5.4988


Epoch 7:  11%|█▏        | 1012/8958 [00:10<01:16, 104.45it/s]

Batch 1000, Loss: 5.8953


Epoch 7:  12%|█▏        | 1112/8958 [00:11<01:15, 103.90it/s]

Batch 1100, Loss: 5.5189


Epoch 7:  14%|█▎        | 1213/8958 [00:12<01:15, 102.73it/s]

Batch 1200, Loss: 5.3748


Epoch 7:  15%|█▍        | 1314/8958 [00:13<01:13, 104.13it/s]

Batch 1300, Loss: 5.5684


Epoch 7:  16%|█▌        | 1416/8958 [00:14<01:34, 79.80it/s]

Batch 1400, Loss: 5.6183


Epoch 7:  17%|█▋        | 1515/8958 [00:15<01:34, 79.00it/s]

Batch 1500, Loss: 5.7598


Epoch 7:  18%|█▊        | 1613/8958 [00:16<01:17, 94.30it/s]

Batch 1600, Loss: 5.6775


Epoch 7:  19%|█▉        | 1713/8958 [00:17<01:08, 105.46it/s]

Batch 1700, Loss: 5.6787


Epoch 7:  20%|██        | 1816/8958 [00:18<01:06, 106.88it/s]

Batch 1800, Loss: 5.8000


Epoch 7:  21%|██▏       | 1915/8958 [00:19<01:09, 101.12it/s]

Batch 1900, Loss: 5.5880


Epoch 7:  22%|██▏       | 2014/8958 [00:20<01:09, 100.08it/s]

Batch 2000, Loss: 5.5502


Epoch 7:  24%|██▎       | 2121/8958 [00:21<01:08, 99.49it/s]

Batch 2100, Loss: 5.7995


Epoch 7:  25%|██▍       | 2221/8958 [00:22<01:04, 103.70it/s]

Batch 2200, Loss: 5.6215


Epoch 7:  26%|██▌       | 2320/8958 [00:23<01:05, 101.73it/s]

Batch 2300, Loss: 5.6257


Epoch 7:  27%|██▋       | 2420/8958 [00:24<01:05, 99.70it/s] 

Batch 2400, Loss: 5.5509


Epoch 7:  28%|██▊       | 2520/8958 [00:25<01:02, 103.18it/s]

Batch 2500, Loss: 5.5812


Epoch 7:  29%|██▉       | 2608/8958 [00:26<01:04, 98.44it/s] 

Batch 2600, Loss: 5.6685


Epoch 7:  30%|███       | 2709/8958 [00:28<01:21, 76.80it/s]

Batch 2700, Loss: 5.7068


Epoch 7:  31%|███▏      | 2813/8958 [00:29<01:21, 75.79it/s]

Batch 2800, Loss: 5.7981


Epoch 7:  33%|███▎      | 2916/8958 [00:30<00:59, 101.74it/s]

Batch 2900, Loss: 5.4831


Epoch 7:  34%|███▎      | 3016/8958 [00:31<00:56, 104.93it/s]

Batch 3000, Loss: 5.6369


Epoch 7:  35%|███▍      | 3115/8958 [00:32<01:00, 96.76it/s] 

Batch 3100, Loss: 5.5813


Epoch 7:  36%|███▌      | 3214/8958 [00:33<00:55, 103.23it/s]

Batch 3200, Loss: 5.6443


Epoch 7:  37%|███▋      | 3314/8958 [00:34<00:55, 101.88it/s]

Batch 3300, Loss: 5.6074


Epoch 7:  38%|███▊      | 3414/8958 [00:35<00:53, 104.22it/s]

Batch 3400, Loss: 5.5693


Epoch 7:  39%|███▉      | 3515/8958 [00:36<00:50, 108.53it/s]

Batch 3500, Loss: 5.6851


Epoch 7:  40%|████      | 3614/8958 [00:37<00:50, 106.21it/s]

Batch 3600, Loss: 5.5562


Epoch 7:  41%|████▏     | 3713/8958 [00:38<00:50, 103.80it/s]

Batch 3700, Loss: 5.6741


Epoch 7:  43%|████▎     | 3812/8958 [00:39<00:49, 103.71it/s]

Batch 3800, Loss: 5.4881


Epoch 7:  44%|████▎     | 3914/8958 [00:40<01:00, 83.00it/s]

Batch 3900, Loss: 5.9095


Epoch 7:  45%|████▍     | 4010/8958 [00:41<01:04, 77.04it/s]

Batch 4000, Loss: 5.6616


Epoch 7:  46%|████▌     | 4115/8958 [00:42<00:51, 94.55it/s]

Batch 4100, Loss: 5.7153


Epoch 7:  47%|████▋     | 4214/8958 [00:43<00:46, 102.99it/s]

Batch 4200, Loss: 5.8746


Epoch 7:  48%|████▊     | 4317/8958 [00:44<00:42, 109.57it/s]

Batch 4300, Loss: 5.8258


Epoch 7:  49%|████▉     | 4416/8958 [00:45<00:42, 106.02it/s]

Batch 4400, Loss: 5.7957


Epoch 7:  50%|█████     | 4517/8958 [00:46<00:40, 109.03it/s]

Batch 4500, Loss: 5.8873


Epoch 7:  52%|█████▏    | 4617/8958 [00:47<00:41, 104.32it/s]

Batch 4600, Loss: 5.4881


Epoch 7:  53%|█████▎    | 4717/8958 [00:48<00:40, 105.94it/s]

Batch 4700, Loss: 5.5816


Epoch 7:  54%|█████▍    | 4817/8958 [00:49<00:39, 104.25it/s]

Batch 4800, Loss: 5.5510


Epoch 7:  55%|█████▍    | 4917/8958 [00:50<00:37, 106.74it/s]

Batch 4900, Loss: 5.5507


Epoch 7:  56%|█████▌    | 5019/8958 [00:51<00:36, 107.65it/s]

Batch 5000, Loss: 5.5046


Epoch 7:  57%|█████▋    | 5120/8958 [00:52<00:37, 103.10it/s]

Batch 5100, Loss: 5.7136


Epoch 7:  58%|█████▊    | 5215/8958 [00:53<00:48, 76.73it/s]

Batch 5200, Loss: 5.4217


Epoch 7:  59%|█████▉    | 5311/8958 [00:54<00:49, 74.24it/s]

Batch 5300, Loss: 5.7689


Epoch 7:  60%|██████    | 5416/8958 [00:55<00:35, 99.83it/s]

Batch 5400, Loss: 5.5698


Epoch 7:  62%|██████▏   | 5514/8958 [00:56<00:33, 103.03it/s]

Batch 5500, Loss: 5.6385


Epoch 7:  63%|██████▎   | 5613/8958 [00:57<00:33, 99.85it/s] 

Batch 5600, Loss: 5.6703


Epoch 7:  64%|██████▍   | 5721/8958 [00:59<00:32, 100.71it/s]

Batch 5700, Loss: 5.6438


Epoch 7:  65%|██████▍   | 5817/8958 [00:59<00:31, 99.85it/s]

Batch 5800, Loss: 5.7216


Epoch 7:  66%|██████▌   | 5917/8958 [01:00<00:29, 102.97it/s]

Batch 5900, Loss: 5.7414


Epoch 7:  67%|██████▋   | 6017/8958 [01:01<00:27, 108.27it/s]

Batch 6000, Loss: 5.6973


Epoch 7:  68%|██████▊   | 6117/8958 [01:02<00:28, 101.00it/s]

Batch 6100, Loss: 5.6126


Epoch 7:  69%|██████▉   | 6217/8958 [01:03<00:25, 105.68it/s]

Batch 6200, Loss: 5.6915


Epoch 7:  71%|███████   | 6318/8958 [01:04<00:24, 105.92it/s]

Batch 6300, Loss: 5.7704


Epoch 7:  72%|███████▏  | 6411/8958 [01:05<00:30, 82.48it/s]

Batch 6400, Loss: 5.7882


Epoch 7:  73%|███████▎  | 6510/8958 [01:07<00:30, 80.56it/s]

Batch 6500, Loss: 5.7028


Epoch 7:  74%|███████▍  | 6622/8958 [01:08<00:25, 90.19it/s]

Batch 6600, Loss: 5.6903


Epoch 7:  75%|███████▌  | 6720/8958 [01:09<00:21, 102.74it/s]

Batch 6700, Loss: 5.7164


Epoch 7:  76%|███████▌  | 6817/8958 [01:10<00:21, 100.14it/s]

Batch 6800, Loss: 5.4581


Epoch 7:  77%|███████▋  | 6916/8958 [01:11<00:19, 106.02it/s]

Batch 6900, Loss: 5.6023


Epoch 7:  78%|███████▊  | 7015/8958 [01:12<00:18, 102.59it/s]

Batch 7000, Loss: 5.7340


Epoch 7:  79%|███████▉  | 7114/8958 [01:13<00:18, 100.88it/s]

Batch 7100, Loss: 5.6742


Epoch 7:  81%|████████  | 7221/8958 [01:14<00:16, 102.58it/s]

Batch 7200, Loss: 5.4568


Epoch 7:  82%|████████▏ | 7320/8958 [01:15<00:15, 103.87it/s]

Batch 7300, Loss: 5.4881


Epoch 7:  83%|████████▎ | 7417/8958 [01:16<00:15, 98.95it/s]

Batch 7400, Loss: 5.8810


Epoch 7:  84%|████████▍ | 7511/8958 [01:17<00:14, 98.33it/s]

Batch 7500, Loss: 5.5502


Epoch 7:  85%|████████▍ | 7613/8958 [01:18<00:14, 92.05it/s]

Batch 7600, Loss: 5.8006


Epoch 7:  86%|████████▌ | 7713/8958 [01:19<00:16, 75.31it/s]

Batch 7700, Loss: 5.4576


Epoch 7:  87%|████████▋ | 7811/8958 [01:21<00:16, 70.52it/s]

Batch 7800, Loss: 5.6063


Epoch 7:  88%|████████▊ | 7913/8958 [01:22<00:10, 97.96it/s]

Batch 7900, Loss: 5.6100


Epoch 7:  90%|████████▉ | 8021/8958 [01:23<00:09, 101.40it/s]

Batch 8000, Loss: 5.7073


Epoch 7:  91%|█████████ | 8120/8958 [01:24<00:08, 100.17it/s]

Batch 8100, Loss: 5.6737


Epoch 7:  92%|█████████▏| 8211/8958 [01:25<00:07, 95.29it/s]

Batch 8200, Loss: 5.6536


Epoch 7:  93%|█████████▎| 8316/8958 [01:26<00:06, 100.05it/s]

Batch 8300, Loss: 5.7768


Epoch 7:  94%|█████████▍| 8412/8958 [01:27<00:05, 100.78it/s]

Batch 8400, Loss: 6.0466


Epoch 7:  95%|█████████▌| 8515/8958 [01:28<00:04, 97.46it/s]

Batch 8500, Loss: 5.5931


Epoch 7:  96%|█████████▌| 8618/8958 [01:29<00:03, 98.24it/s]

Batch 8600, Loss: 5.6434


Epoch 7:  97%|█████████▋| 8715/8958 [01:30<00:02, 101.46it/s]

Batch 8700, Loss: 5.6466


Epoch 7:  98%|█████████▊| 8812/8958 [01:31<00:01, 90.48it/s]

Batch 8800, Loss: 5.6143


Epoch 7: 100%|█████████▉| 8914/8958 [01:32<00:00, 72.03it/s]

Batch 8900, Loss: 5.4855


Epoch 7: 100%|██████████| 8958/8958 [01:33<00:00, 95.95it/s]


Epoch 7 completed in 93.370 seconds
Average Loss: 5.6597


Epoch 8:   0%|          | 5/8958 [00:00<03:13, 46.25it/s]

Batch 0, Loss: 5.5549


Epoch 8:   1%|▏         | 112/8958 [00:01<01:33, 94.25it/s]

Batch 100, Loss: 5.6394


Epoch 8:   2%|▏         | 218/8958 [00:02<01:27, 100.38it/s]

Batch 200, Loss: 5.6440


Epoch 8:   3%|▎         | 313/8958 [00:03<01:25, 100.92it/s]

Batch 300, Loss: 5.8577


Epoch 8:   5%|▍         | 416/8958 [00:04<01:26, 98.83it/s]

Batch 400, Loss: 5.6971


Epoch 8:   6%|▌         | 513/8958 [00:05<01:23, 101.04it/s]

Batch 500, Loss: 5.6789


Epoch 8:   7%|▋         | 619/8958 [00:06<01:24, 99.18it/s]

Batch 600, Loss: 5.7693


Epoch 8:   8%|▊         | 715/8958 [00:07<01:20, 102.56it/s]

Batch 700, Loss: 5.7651


Epoch 8:   9%|▉         | 812/8958 [00:08<01:21, 100.52it/s]

Batch 800, Loss: 5.4953


Epoch 8:  10%|█         | 914/8958 [00:09<01:23, 96.66it/s]

Batch 900, Loss: 5.5914


Epoch 8:  11%|█▏        | 1017/8958 [00:10<01:20, 98.14it/s]

Batch 1000, Loss: 5.4872


Epoch 8:  12%|█▏        | 1107/8958 [00:11<01:44, 75.25it/s]

Batch 1100, Loss: 5.6299


Epoch 8:  14%|█▎        | 1212/8958 [00:13<01:50, 70.16it/s]

Batch 1200, Loss: 5.6119


Epoch 8:  15%|█▍        | 1311/8958 [00:14<01:21, 93.61it/s]

Batch 1300, Loss: 5.9813


Epoch 8:  16%|█▌        | 1413/8958 [00:15<01:17, 97.67it/s]

Batch 1400, Loss: 5.7068


Epoch 8:  17%|█▋        | 1517/8958 [00:16<01:16, 97.60it/s]

Batch 1500, Loss: 5.7370


Epoch 8:  18%|█▊        | 1618/8958 [00:17<01:15, 97.59it/s]

Batch 1600, Loss: 5.8525


Epoch 8:  19%|█▉        | 1719/8958 [00:18<01:13, 98.34it/s]

Batch 1700, Loss: 5.7090


Epoch 8:  20%|██        | 1811/8958 [00:19<01:12, 98.01it/s]

Batch 1800, Loss: 5.5868


Epoch 8:  21%|██▏       | 1915/8958 [00:20<01:10, 99.40it/s]

Batch 1900, Loss: 5.6241


Epoch 8:  23%|██▎       | 2019/8958 [00:21<01:09, 100.51it/s]

Batch 2000, Loss: 5.4619


Epoch 8:  24%|██▎       | 2115/8958 [00:22<01:07, 100.76it/s]

Batch 2100, Loss: 5.8629


Epoch 8:  25%|██▍       | 2219/8958 [00:23<01:07, 100.32it/s]

Batch 2200, Loss: 5.6753


Epoch 8:  26%|██▌       | 2309/8958 [00:24<01:24, 78.40it/s]

Batch 2300, Loss: 5.7014


Epoch 8:  27%|██▋       | 2415/8958 [00:26<01:33, 69.74it/s]

Batch 2400, Loss: 5.6755


Epoch 8:  28%|██▊       | 2518/8958 [00:27<01:08, 93.60it/s]

Batch 2500, Loss: 5.7118


Epoch 8:  29%|██▉       | 2615/8958 [00:28<01:03, 99.26it/s] 

Batch 2600, Loss: 5.7599


Epoch 8:  30%|███       | 2717/8958 [00:29<01:07, 92.59it/s]

Batch 2700, Loss: 5.5077


Epoch 8:  31%|███▏      | 2819/8958 [00:30<01:03, 95.98it/s]

Batch 2800, Loss: 5.4550


Epoch 8:  33%|███▎      | 2919/8958 [00:31<01:03, 94.77it/s]

Batch 2900, Loss: 5.5242


Epoch 8:  34%|███▎      | 3020/8958 [00:32<01:01, 96.51it/s]

Batch 3000, Loss: 5.5500


Epoch 8:  35%|███▍      | 3111/8958 [00:33<00:59, 98.25it/s]

Batch 3100, Loss: 5.6676


Epoch 8:  36%|███▌      | 3215/8958 [00:34<00:58, 98.67it/s]

Batch 3200, Loss: 5.6448


Epoch 8:  37%|███▋      | 3312/8958 [00:35<00:55, 102.01it/s]

Batch 3300, Loss: 5.8685


Epoch 8:  38%|███▊      | 3420/8958 [00:36<00:54, 102.35it/s]

Batch 3400, Loss: 5.5310


Epoch 8:  39%|███▉      | 3515/8958 [00:38<01:13, 73.60it/s]

Batch 3500, Loss: 5.6732


Epoch 8:  40%|████      | 3611/8958 [00:39<01:14, 71.43it/s]

Batch 3600, Loss: 5.7380


Epoch 8:  42%|████▏     | 3719/8958 [00:40<00:53, 98.22it/s]

Batch 3700, Loss: 5.7413


Epoch 8:  43%|████▎     | 3812/8958 [00:41<00:51, 99.90it/s]

Batch 3800, Loss: 5.6060


Epoch 8:  44%|████▍     | 3921/8958 [00:42<00:50, 99.72it/s]

Batch 3900, Loss: 5.8900


Epoch 8:  45%|████▍     | 4016/8958 [00:43<00:49, 99.01it/s]

Batch 4000, Loss: 5.6052


Epoch 8:  46%|████▌     | 4120/8958 [00:44<00:49, 98.33it/s]

Batch 4100, Loss: 5.5447


Epoch 8:  47%|████▋     | 4212/8958 [00:45<00:48, 98.30it/s]

Batch 4200, Loss: 5.7089


Epoch 8:  48%|████▊     | 4317/8958 [00:46<00:47, 97.86it/s]

Batch 4300, Loss: 5.8318


Epoch 8:  49%|████▉     | 4420/8958 [00:47<00:46, 98.41it/s]

Batch 4400, Loss: 5.6958


Epoch 8:  50%|█████     | 4513/8958 [00:48<00:45, 98.15it/s]

Batch 4500, Loss: 5.4605


Epoch 8:  52%|█████▏    | 4621/8958 [00:49<00:42, 102.54it/s]

Batch 4600, Loss: 5.5393


Epoch 8:  53%|█████▎    | 4710/8958 [00:50<00:55, 76.62it/s]

Batch 4700, Loss: 5.4059


Epoch 8:  54%|█████▎    | 4807/8958 [00:52<00:57, 71.63it/s]

Batch 4800, Loss: 5.5692


Epoch 8:  55%|█████▍    | 4920/8958 [00:53<00:40, 100.25it/s]

Batch 4900, Loss: 5.7064


Epoch 8:  56%|█████▌    | 5013/8958 [00:54<00:41, 95.29it/s]

Batch 5000, Loss: 5.5267


Epoch 8:  57%|█████▋    | 5114/8958 [00:55<00:39, 97.70it/s]

Batch 5100, Loss: 5.7883


Epoch 8:  58%|█████▊    | 5214/8958 [00:56<00:38, 97.64it/s]

Batch 5200, Loss: 5.7691


Epoch 8:  59%|█████▉    | 5317/8958 [00:57<00:37, 98.06it/s]

Batch 5300, Loss: 5.6130


Epoch 8:  60%|██████    | 5419/8958 [00:58<00:35, 99.03it/s]

Batch 5400, Loss: 5.6728


Epoch 8:  62%|██████▏   | 5520/8958 [00:59<00:36, 93.01it/s]

Batch 5500, Loss: 5.4667


Epoch 8:  63%|██████▎   | 5611/8958 [01:00<00:35, 93.29it/s]

Batch 5600, Loss: 6.0387


Epoch 8:  64%|██████▍   | 5711/8958 [01:01<00:34, 93.93it/s]

Batch 5700, Loss: 5.4467


Epoch 8:  65%|██████▍   | 5810/8958 [01:02<00:41, 75.93it/s]

Batch 5800, Loss: 5.5547


Epoch 8:  66%|██████▌   | 5909/8958 [01:04<00:46, 65.75it/s]

Batch 5900, Loss: 5.6122


Epoch 8:  67%|██████▋   | 6016/8958 [01:06<00:35, 82.08it/s]

Batch 6000, Loss: 5.7063


Epoch 8:  68%|██████▊   | 6115/8958 [01:07<00:31, 90.60it/s]

Batch 6100, Loss: 5.6106


Epoch 8:  69%|██████▉   | 6215/8958 [01:08<00:29, 91.70it/s]

Batch 6200, Loss: 5.6572


Epoch 8:  70%|███████   | 6315/8958 [01:09<00:28, 92.72it/s]

Batch 6300, Loss: 5.6941


Epoch 8:  72%|███████▏  | 6418/8958 [01:10<00:26, 96.30it/s]

Batch 6400, Loss: 5.4906


Epoch 8:  73%|███████▎  | 6511/8958 [01:11<00:25, 94.13it/s]

Batch 6500, Loss: 5.7135


Epoch 8:  74%|███████▍  | 6615/8958 [01:12<00:24, 94.60it/s]

Batch 6600, Loss: 5.5632


Epoch 8:  75%|███████▍  | 6717/8958 [01:13<00:23, 96.33it/s]

Batch 6700, Loss: 5.5383


Epoch 8:  76%|███████▌  | 6818/8958 [01:14<00:23, 91.73it/s]

Batch 6800, Loss: 5.6292


Epoch 8:  77%|███████▋  | 6919/8958 [01:15<00:21, 95.68it/s]

Batch 6900, Loss: 5.6162


Epoch 8:  78%|███████▊  | 7009/8958 [01:16<00:26, 72.74it/s]

Batch 7000, Loss: 5.7317


Epoch 8:  79%|███████▉  | 7113/8958 [01:18<00:26, 68.94it/s]

Batch 7100, Loss: 5.5639


Epoch 8:  81%|████████  | 7215/8958 [01:19<00:18, 96.21it/s]

Batch 7200, Loss: 5.4684


Epoch 8:  82%|████████▏ | 7317/8958 [01:20<00:16, 100.68it/s]

Batch 7300, Loss: 5.6416


Epoch 8:  83%|████████▎ | 7419/8958 [01:21<00:15, 99.29it/s]

Batch 7400, Loss: 5.7003


Epoch 8:  84%|████████▍ | 7521/8958 [01:22<00:14, 96.71it/s]

Batch 7500, Loss: 5.6750


Epoch 8:  85%|████████▌ | 7615/8958 [01:23<00:13, 103.01it/s]

Batch 7600, Loss: 5.8950


Epoch 8:  86%|████████▌ | 7714/8958 [01:24<00:12, 100.49it/s]

Batch 7700, Loss: 5.4586


Epoch 8:  87%|████████▋ | 7811/8958 [01:25<00:11, 100.33it/s]

Batch 7800, Loss: 5.4275


Epoch 8:  88%|████████▊ | 7913/8958 [01:26<00:10, 97.59it/s]

Batch 7900, Loss: 5.7654


Epoch 8:  89%|████████▉ | 8015/8958 [01:27<00:09, 95.80it/s]

Batch 8000, Loss: 5.6813


Epoch 8:  91%|█████████ | 8115/8958 [01:28<00:08, 95.55it/s]

Batch 8100, Loss: 5.7109


Epoch 8:  92%|█████████▏| 8216/8958 [01:30<00:10, 72.73it/s]

Batch 8200, Loss: 5.6674


Epoch 8:  93%|█████████▎| 8315/8958 [01:31<00:08, 75.73it/s]

Batch 8300, Loss: 5.4869


Epoch 8:  94%|█████████▍| 8413/8958 [01:32<00:05, 98.33it/s]

Batch 8400, Loss: 5.6859


Epoch 8:  95%|█████████▌| 8520/8958 [01:33<00:04, 102.67it/s]

Batch 8500, Loss: 5.6801


Epoch 8:  96%|█████████▌| 8617/8958 [01:34<00:03, 99.34it/s]

Batch 8600, Loss: 5.6805


Epoch 8:  97%|█████████▋| 8711/8958 [01:35<00:02, 99.02it/s]

Batch 8700, Loss: 5.6577


Epoch 8:  98%|█████████▊| 8819/8958 [01:36<00:01, 100.04it/s]

Batch 8800, Loss: 5.7068


Epoch 8: 100%|█████████▉| 8914/8958 [01:37<00:00, 97.23it/s]

Batch 8900, Loss: 5.7758


Epoch 8: 100%|██████████| 8958/8958 [01:37<00:00, 91.46it/s]


Epoch 8 completed in 97.948 seconds
Average Loss: 5.6562


Epoch 9:   0%|          | 8/8958 [00:00<01:55, 77.45it/s]

Batch 0, Loss: 5.5827


Epoch 9:   1%|▏         | 121/8958 [00:01<01:28, 99.55it/s]

Batch 100, Loss: 5.6311


Epoch 9:   2%|▏         | 216/8958 [00:02<01:27, 99.78it/s]

Batch 200, Loss: 5.6723


Epoch 9:   4%|▎         | 318/8958 [00:03<01:28, 97.66it/s]

Batch 300, Loss: 5.6469


Epoch 9:   5%|▍         | 413/8958 [00:04<01:51, 76.77it/s]

Batch 400, Loss: 5.5843


Epoch 9:   6%|▌         | 511/8958 [00:05<01:56, 72.79it/s]

Batch 500, Loss: 5.4879


Epoch 9:   7%|▋         | 614/8958 [00:07<01:34, 88.72it/s]

Batch 600, Loss: 5.6941


Epoch 9:   8%|▊         | 715/8958 [00:08<01:25, 96.75it/s]

Batch 700, Loss: 5.6421


Epoch 9:   9%|▉         | 817/8958 [00:09<01:23, 98.07it/s]

Batch 800, Loss: 5.6446


Epoch 9:  10%|█         | 918/8958 [00:10<01:21, 98.73it/s]

Batch 900, Loss: 5.7999


Epoch 9:  11%|█▏        | 1021/8958 [00:11<01:22, 96.70it/s]

Batch 1000, Loss: 5.6486


Epoch 9:  12%|█▏        | 1114/8958 [00:12<01:22, 95.10it/s]

Batch 1100, Loss: 5.5847


Epoch 9:  14%|█▎        | 1216/8958 [00:13<01:18, 98.05it/s]

Batch 1200, Loss: 5.5947


Epoch 9:  15%|█▍        | 1318/8958 [00:14<01:18, 97.41it/s]

Batch 1300, Loss: 5.6757


Epoch 9:  16%|█▌        | 1419/8958 [00:15<01:17, 97.44it/s]

Batch 1400, Loss: 5.7763


Epoch 9:  17%|█▋        | 1519/8958 [00:16<01:20, 92.64it/s]

Batch 1500, Loss: 5.6775


Epoch 9:  18%|█▊        | 1615/8958 [00:17<01:43, 70.74it/s]

Batch 1600, Loss: 5.6872


Epoch 9:  19%|█▉        | 1711/8958 [00:19<01:47, 67.19it/s]

Batch 1700, Loss: 5.6588


Epoch 9:  20%|██        | 1811/8958 [00:20<01:15, 95.15it/s]

Batch 1800, Loss: 5.8332


Epoch 9:  21%|██▏       | 1913/8958 [00:21<01:13, 95.54it/s]

Batch 1900, Loss: 5.5193


Epoch 9:  22%|██▏       | 2013/8958 [00:22<01:12, 95.91it/s]

Batch 2000, Loss: 5.5692


Epoch 9:  24%|██▎       | 2113/8958 [00:23<01:12, 94.06it/s]

Batch 2100, Loss: 5.8005


Epoch 9:  25%|██▍       | 2213/8958 [00:24<01:11, 93.79it/s]

Batch 2200, Loss: 5.7609


Epoch 9:  26%|██▌       | 2314/8958 [00:25<01:09, 95.26it/s]

Batch 2300, Loss: 5.5821


Epoch 9:  27%|██▋       | 2416/8958 [00:26<01:07, 96.43it/s]

Batch 2400, Loss: 5.6220


Epoch 9:  28%|██▊       | 2516/8958 [00:27<01:08, 93.56it/s]

Batch 2500, Loss: 5.6433


Epoch 9:  29%|██▉       | 2616/8958 [00:28<01:07, 94.18it/s]

Batch 2600, Loss: 5.5828


Epoch 9:  30%|███       | 2714/8958 [00:30<01:23, 74.46it/s]

Batch 2700, Loss: 5.5804


Epoch 9:  31%|███▏      | 2811/8958 [00:31<01:24, 72.88it/s]

Batch 2800, Loss: 5.6598


Epoch 9:  33%|███▎      | 2916/8958 [00:32<01:13, 82.07it/s]

Batch 2900, Loss: 5.5156


Epoch 9:  34%|███▎      | 3018/8958 [00:33<01:00, 98.49it/s]

Batch 3000, Loss: 5.5266


Epoch 9:  35%|███▍      | 3119/8958 [00:34<00:59, 98.41it/s]

Batch 3100, Loss: 5.7339


Epoch 9:  36%|███▌      | 3220/8958 [00:36<00:59, 95.90it/s]

Batch 3200, Loss: 5.7064


Epoch 9:  37%|███▋      | 3320/8958 [00:37<00:59, 94.70it/s]

Batch 3300, Loss: 5.7735


Epoch 9:  38%|███▊      | 3420/8958 [00:38<00:59, 93.25it/s]

Batch 3400, Loss: 5.5193


Epoch 9:  39%|███▉      | 3521/8958 [00:39<00:56, 96.58it/s]

Batch 3500, Loss: 5.8397


Epoch 9:  40%|████      | 3611/8958 [00:40<00:56, 95.30it/s]

Batch 3600, Loss: 5.6651


Epoch 9:  41%|████▏     | 3711/8958 [00:41<00:55, 94.01it/s]

Batch 3700, Loss: 5.5364


Epoch 9:  43%|████▎     | 3811/8958 [00:42<00:54, 94.95it/s]

Batch 3800, Loss: 5.5503


Epoch 9:  44%|████▎     | 3915/8958 [00:43<01:08, 73.22it/s]

Batch 3900, Loss: 5.4911


Epoch 9:  45%|████▍     | 4011/8958 [00:45<01:12, 68.39it/s]

Batch 4000, Loss: 5.5837


Epoch 9:  46%|████▌     | 4114/8958 [00:46<00:55, 87.26it/s]

Batch 4100, Loss: 5.7593


Epoch 9:  47%|████▋     | 4214/8958 [00:47<00:49, 95.35it/s]

Batch 4200, Loss: 5.4256


Epoch 9:  48%|████▊     | 4314/8958 [00:48<00:49, 94.14it/s]

Batch 4300, Loss: 5.6208


Epoch 9:  49%|████▉     | 4415/8958 [00:49<00:48, 93.51it/s]

Batch 4400, Loss: 5.5844


Epoch 9:  50%|█████     | 4515/8958 [00:50<00:49, 90.28it/s]

Batch 4500, Loss: 5.6904


Epoch 9:  52%|█████▏    | 4615/8958 [00:51<00:49, 87.80it/s]

Batch 4600, Loss: 5.6758


Epoch 9:  53%|█████▎    | 4715/8958 [00:52<00:46, 91.32it/s]

Batch 4700, Loss: 5.8902


Epoch 9:  54%|█████▍    | 4816/8958 [00:54<00:43, 96.27it/s]

Batch 4800, Loss: 5.8007


Epoch 9:  55%|█████▍    | 4916/8958 [00:55<00:42, 94.10it/s]

Batch 4900, Loss: 5.4956


Epoch 9:  56%|█████▌    | 5011/8958 [00:56<00:50, 77.56it/s]

Batch 5000, Loss: 5.7681


Epoch 9:  57%|█████▋    | 5107/8958 [00:57<00:53, 71.45it/s]

Batch 5100, Loss: 5.8008


Epoch 9:  58%|█████▊    | 5218/8958 [00:59<00:43, 85.00it/s]

Batch 5200, Loss: 5.6429


Epoch 9:  59%|█████▉    | 5318/8958 [01:00<00:37, 96.42it/s]

Batch 5300, Loss: 5.5002


Epoch 9:  60%|██████    | 5418/8958 [01:01<00:38, 92.97it/s]

Batch 5400, Loss: 5.5201


Epoch 9:  62%|██████▏   | 5520/8958 [01:02<00:36, 95.26it/s]

Batch 5500, Loss: 5.7657


Epoch 9:  63%|██████▎   | 5620/8958 [01:03<00:34, 95.63it/s]

Batch 5600, Loss: 5.6863


Epoch 9:  64%|██████▍   | 5720/8958 [01:04<00:33, 95.75it/s]

Batch 5700, Loss: 5.4620


Epoch 9:  65%|██████▍   | 5820/8958 [01:05<00:32, 95.56it/s]

Batch 5800, Loss: 5.3943


Epoch 9:  66%|██████▌   | 5911/8958 [01:06<00:32, 94.12it/s]

Batch 5900, Loss: 5.7068


Epoch 9:  67%|██████▋   | 6011/8958 [01:07<00:30, 95.20it/s]

Batch 6000, Loss: 5.6452


Epoch 9:  68%|██████▊   | 6111/8958 [01:08<00:29, 95.39it/s]

Batch 6100, Loss: 5.6780


Epoch 9:  69%|██████▉   | 6215/8958 [01:09<00:37, 72.76it/s]

Batch 6200, Loss: 5.8934


Epoch 9:  70%|███████   | 6312/8958 [01:11<00:39, 67.50it/s]

Batch 6300, Loss: 5.3018


Epoch 9:  72%|███████▏  | 6412/8958 [01:12<00:28, 90.50it/s]

Batch 6400, Loss: 5.6362


Epoch 9:  73%|███████▎  | 6515/8958 [01:13<00:26, 93.00it/s]

Batch 6500, Loss: 5.5999


Epoch 9:  74%|███████▍  | 6615/8958 [01:14<00:24, 94.71it/s]

Batch 6600, Loss: 5.7882


Epoch 9:  75%|███████▍  | 6716/8958 [01:15<00:23, 95.24it/s]

Batch 6700, Loss: 5.4881


Epoch 9:  76%|███████▌  | 6817/8958 [01:16<00:22, 95.01it/s]

Batch 6800, Loss: 5.6087


Epoch 9:  77%|███████▋  | 6917/8958 [01:17<00:21, 95.34it/s]

Batch 6900, Loss: 5.4632


Epoch 9:  78%|███████▊  | 7018/8958 [01:18<00:19, 97.66it/s]

Batch 7000, Loss: 5.8529


Epoch 9:  79%|███████▉  | 7118/8958 [01:19<00:19, 96.54it/s]

Batch 7100, Loss: 5.4604


Epoch 9:  81%|████████  | 7219/8958 [01:21<00:18, 94.77it/s]

Batch 7200, Loss: 5.8623


Epoch 9:  82%|████████▏ | 7312/8958 [01:22<00:22, 74.54it/s]

Batch 7300, Loss: 5.6605


Epoch 9:  83%|████████▎ | 7409/8958 [01:23<00:20, 74.41it/s]

Batch 7400, Loss: 5.6127


Epoch 9:  84%|████████▍ | 7514/8958 [01:24<00:16, 86.05it/s]

Batch 7500, Loss: 5.5810


Epoch 9:  85%|████████▍ | 7612/8958 [01:26<00:14, 93.60it/s]

Batch 7600, Loss: 5.7068


Epoch 9:  86%|████████▌ | 7712/8958 [01:27<00:13, 93.99it/s]

Batch 7700, Loss: 5.5514


Epoch 9:  87%|████████▋ | 7813/8958 [01:28<00:12, 92.27it/s]

Batch 7800, Loss: 5.5949


Epoch 9:  88%|████████▊ | 7913/8958 [01:29<00:11, 94.13it/s]

Batch 7900, Loss: 5.8002


Epoch 9:  89%|████████▉ | 8013/8958 [01:30<00:09, 95.00it/s]

Batch 8000, Loss: 5.4710


Epoch 9:  91%|█████████ | 8114/8958 [01:31<00:08, 93.94it/s]

Batch 8100, Loss: 5.5818


Epoch 9:  92%|█████████▏| 8214/8958 [01:32<00:07, 93.49it/s]

Batch 8200, Loss: 5.8035


Epoch 9:  93%|█████████▎| 8315/8958 [01:33<00:06, 94.84it/s]

Batch 8300, Loss: 5.4264


Epoch 9:  94%|█████████▍| 8415/8958 [01:34<00:05, 93.95it/s]

Batch 8400, Loss: 5.5190


Epoch 9:  95%|█████████▌| 8514/8958 [01:36<00:06, 70.32it/s]

Batch 8500, Loss: 5.6131


Epoch 9:  96%|█████████▌| 8614/8958 [01:37<00:05, 66.08it/s]

Batch 8600, Loss: 5.9446


Epoch 9:  97%|█████████▋| 8712/8958 [01:38<00:02, 92.87it/s]

Batch 8700, Loss: 5.7685


Epoch 9:  98%|█████████▊| 8812/8958 [01:39<00:01, 93.80it/s]

Batch 8800, Loss: 5.6656


Epoch 9: 100%|█████████▉| 8916/8958 [01:40<00:00, 97.90it/s]

Batch 8900, Loss: 5.3607


Epoch 9: 100%|██████████| 8958/8958 [01:41<00:00, 88.46it/s]


Epoch 9 completed in 101.270 seconds
Average Loss: 5.6544


Epoch 10:   0%|          | 6/8958 [00:00<02:39, 56.00it/s]

Batch 0, Loss: 5.7811


Epoch 10:   1%|▏         | 115/8958 [00:01<01:35, 92.85it/s]

Batch 100, Loss: 5.5194


Epoch 10:   2%|▏         | 215/8958 [00:02<01:33, 93.46it/s]

Batch 200, Loss: 5.5837


Epoch 10:   4%|▎         | 315/8958 [00:03<01:33, 92.19it/s]

Batch 300, Loss: 5.7643


Epoch 10:   5%|▍         | 415/8958 [00:04<01:32, 92.73it/s]

Batch 400, Loss: 5.6103


Epoch 10:   6%|▌         | 516/8958 [00:05<01:30, 93.53it/s]

Batch 500, Loss: 5.6441


Epoch 10:   7%|▋         | 607/8958 [00:06<01:42, 81.13it/s]

Batch 600, Loss: 5.4917


Epoch 10:   8%|▊         | 712/8958 [00:08<01:48, 76.34it/s]

Batch 700, Loss: 5.7293


Epoch 10:   9%|▉         | 814/8958 [00:09<01:51, 72.80it/s]

Batch 800, Loss: 5.6165


Epoch 10:  10%|█         | 913/8958 [00:10<01:28, 90.95it/s]

Batch 900, Loss: 5.3943


Epoch 10:  11%|█▏        | 1012/8958 [00:11<01:26, 91.95it/s]

Batch 1000, Loss: 5.5507


Epoch 10:  12%|█▏        | 1114/8958 [00:12<01:22, 94.78it/s]

Batch 1100, Loss: 5.5508


Epoch 10:  14%|█▎        | 1214/8958 [00:13<01:25, 90.82it/s]

Batch 1200, Loss: 5.6855


Epoch 10:  15%|█▍        | 1315/8958 [00:14<01:22, 92.65it/s]

Batch 1300, Loss: 5.8328


Epoch 10:  16%|█▌        | 1416/8958 [00:15<01:20, 94.22it/s]

Batch 1400, Loss: 5.7485


Epoch 10:  17%|█▋        | 1516/8958 [00:17<01:18, 94.39it/s]

Batch 1500, Loss: 5.8220


Epoch 10:  18%|█▊        | 1616/8958 [00:18<01:18, 94.03it/s]

Batch 1600, Loss: 5.6161


Epoch 10:  19%|█▉        | 1718/8958 [00:19<01:17, 93.70it/s]

Batch 1700, Loss: 5.6738


Epoch 10:  20%|██        | 1814/8958 [00:20<01:36, 73.75it/s]

Batch 1800, Loss: 5.5157


Epoch 10:  21%|██▏       | 1908/8958 [00:21<01:42, 69.00it/s]

Batch 1900, Loss: 6.0068


Epoch 10:  22%|██▏       | 2012/8958 [00:23<01:18, 87.93it/s]

Batch 2000, Loss: 5.6585


Epoch 10:  24%|██▎       | 2114/8958 [00:24<01:12, 94.36it/s]

Batch 2100, Loss: 5.6387


Epoch 10:  25%|██▍       | 2215/8958 [00:25<01:12, 92.68it/s]

Batch 2200, Loss: 6.0065


Epoch 10:  26%|██▌       | 2315/8958 [00:26<01:11, 93.10it/s]

Batch 2300, Loss: 5.7029


Epoch 10:  27%|██▋       | 2415/8958 [00:27<01:11, 91.91it/s]

Batch 2400, Loss: 5.6510


Epoch 10:  28%|██▊       | 2516/8958 [00:28<01:06, 97.19it/s]

Batch 2500, Loss: 5.5416


Epoch 10:  29%|██▉       | 2617/8958 [00:29<01:06, 94.67it/s]

Batch 2600, Loss: 5.6111


Epoch 10:  30%|███       | 2717/8958 [00:30<01:06, 94.01it/s]

Batch 2700, Loss: 5.7084


Epoch 10:  31%|███▏      | 2817/8958 [00:31<01:05, 94.39it/s]

Batch 2800, Loss: 5.6755


Epoch 10:  33%|███▎      | 2914/8958 [00:32<01:19, 75.87it/s]

Batch 2900, Loss: 5.5523


Epoch 10:  34%|███▎      | 3009/8958 [00:34<01:39, 59.60it/s]

Batch 3000, Loss: 5.6734


Epoch 10:  35%|███▍      | 3120/8958 [00:35<01:08, 85.69it/s]

Batch 3100, Loss: 5.4703


Epoch 10:  36%|███▌      | 3219/8958 [00:36<01:01, 92.83it/s]

Batch 3200, Loss: 5.3939


Epoch 10:  37%|███▋      | 3319/8958 [00:37<01:00, 92.63it/s]

Batch 3300, Loss: 5.5821


Epoch 10:  38%|███▊      | 3419/8958 [00:39<00:59, 92.78it/s]

Batch 3400, Loss: 5.5731


Epoch 10:  39%|███▉      | 3512/8958 [00:39<00:57, 94.69it/s]

Batch 3500, Loss: 5.5649


Epoch 10:  40%|████      | 3613/8958 [00:41<00:57, 92.30it/s]

Batch 3600, Loss: 5.6433


Epoch 10:  41%|████▏     | 3713/8958 [00:42<00:55, 94.06it/s]

Batch 3700, Loss: 5.7855


Epoch 10:  43%|████▎     | 3813/8958 [00:43<00:55, 92.39it/s]

Batch 3800, Loss: 5.5171


Epoch 10:  44%|████▎     | 3914/8958 [00:44<00:53, 93.93it/s]

Batch 3900, Loss: 5.7067


Epoch 10:  45%|████▍     | 4014/8958 [00:45<00:53, 93.05it/s]

Batch 4000, Loss: 5.6128


Epoch 10:  46%|████▌     | 4114/8958 [00:46<01:07, 72.22it/s]

Batch 4100, Loss: 5.6121


Epoch 10:  47%|████▋     | 4213/8958 [00:48<01:11, 66.25it/s]

Batch 4200, Loss: 5.5998


Epoch 10:  48%|████▊     | 4320/8958 [00:49<00:49, 92.95it/s]

Batch 4300, Loss: 5.4256


Epoch 10:  49%|████▉     | 4412/8958 [00:50<00:47, 94.89it/s]

Batch 4400, Loss: 5.6128


Epoch 10:  50%|█████     | 4513/8958 [00:51<00:45, 96.81it/s]

Batch 4500, Loss: 5.5917


Epoch 10:  52%|█████▏    | 4614/8958 [00:52<00:45, 96.09it/s]

Batch 4600, Loss: 5.5393


Epoch 10:  53%|█████▎    | 4714/8958 [00:53<00:44, 95.03it/s]

Batch 4700, Loss: 5.7290


Epoch 10:  54%|█████▎    | 4814/8958 [00:54<00:44, 93.46it/s]

Batch 4800, Loss: 5.8014


Epoch 10:  55%|█████▍    | 4914/8958 [00:55<00:43, 93.44it/s]

Batch 4900, Loss: 5.7541


Epoch 10:  56%|█████▌    | 5015/8958 [00:56<00:41, 96.12it/s]

Batch 5000, Loss: 5.7060


Epoch 10:  57%|█████▋    | 5115/8958 [00:57<00:41, 93.56it/s]

Batch 5100, Loss: 5.7076


Epoch 10:  58%|█████▊    | 5215/8958 [00:59<00:50, 74.33it/s]

Batch 5200, Loss: 5.4573


Epoch 10:  59%|█████▉    | 5310/8958 [01:00<00:53, 68.13it/s]

Batch 5300, Loss: 5.7230


Epoch 10:  60%|██████    | 5411/8958 [01:01<00:39, 90.55it/s]

Batch 5400, Loss: 5.5189


Epoch 10:  62%|██████▏   | 5512/8958 [01:02<00:36, 95.25it/s]

Batch 5500, Loss: 5.7560


Epoch 10:  63%|██████▎   | 5612/8958 [01:03<00:35, 93.60it/s]

Batch 5600, Loss: 5.5819


Epoch 10:  64%|██████▍   | 5712/8958 [01:05<00:34, 93.21it/s]

Batch 5700, Loss: 5.7693


Epoch 10:  65%|██████▍   | 5813/8958 [01:06<00:33, 93.58it/s]

Batch 5800, Loss: 5.5254


Epoch 10:  66%|██████▌   | 5915/8958 [01:07<00:31, 95.71it/s]

Batch 5900, Loss: 5.7240


Epoch 10:  67%|██████▋   | 6016/8958 [01:08<00:31, 94.21it/s]

Batch 6000, Loss: 5.6759


Epoch 10:  68%|██████▊   | 6116/8958 [01:09<00:30, 94.56it/s]

Batch 6100, Loss: 5.4576


Epoch 10:  69%|██████▉   | 6217/8958 [01:10<00:28, 95.97it/s]

Batch 6200, Loss: 5.6772


Epoch 10:  71%|███████   | 6317/8958 [01:11<00:30, 86.30it/s]

Batch 6300, Loss: 5.4813


Epoch 10:  72%|███████▏  | 6415/8958 [01:12<00:34, 73.96it/s]

Batch 6400, Loss: 5.6756


Epoch 10:  73%|███████▎  | 6510/8958 [01:14<00:36, 67.80it/s]

Batch 6500, Loss: 5.5532


Epoch 10:  74%|███████▍  | 6614/8958 [01:15<00:25, 92.14it/s]

Batch 6600, Loss: 5.5544


Epoch 10:  75%|███████▍  | 6714/8958 [01:16<00:24, 92.96it/s]

Batch 6700, Loss: 5.7379


Epoch 10:  76%|███████▌  | 6814/8958 [01:17<00:22, 94.11it/s]

Batch 6800, Loss: 5.7705


Epoch 10:  77%|███████▋  | 6915/8958 [01:18<00:21, 94.85it/s]

Batch 6900, Loss: 5.5493


Epoch 10:  78%|███████▊  | 7017/8958 [01:19<00:20, 93.36it/s]

Batch 7000, Loss: 5.7893


Epoch 10:  79%|███████▉  | 7118/8958 [01:20<00:19, 94.21it/s]

Batch 7100, Loss: 5.4873


Epoch 10:  81%|████████  | 7218/8958 [01:21<00:18, 93.40it/s]

Batch 7200, Loss: 5.7038


Epoch 10:  82%|████████▏ | 7318/8958 [01:22<00:17, 93.47it/s]

Batch 7300, Loss: 5.5816


Epoch 10:  83%|████████▎ | 7418/8958 [01:23<00:16, 94.23it/s]

Batch 7400, Loss: 5.4203


Epoch 10:  84%|████████▍ | 7513/8958 [01:25<00:19, 73.39it/s]

Batch 7500, Loss: 5.6131


Epoch 10:  85%|████████▍ | 7614/8958 [01:26<00:19, 68.48it/s]

Batch 7600, Loss: 6.0623


Epoch 10:  86%|████████▌ | 7720/8958 [01:27<00:13, 89.91it/s]

Batch 7700, Loss: 5.6769


Epoch 10:  87%|████████▋ | 7820/8958 [01:29<00:12, 93.41it/s]

Batch 7800, Loss: 5.5803


Epoch 10:  88%|████████▊ | 7912/8958 [01:29<00:10, 96.98it/s]

Batch 7900, Loss: 5.6447


Epoch 10:  89%|████████▉ | 8016/8958 [01:31<00:09, 98.05it/s]

Batch 8000, Loss: 5.6437


Epoch 10:  91%|█████████ | 8117/8958 [01:32<00:08, 95.08it/s]

Batch 8100, Loss: 5.6465


Epoch 10:  92%|█████████▏| 8221/8958 [01:33<00:07, 99.53it/s]

Batch 8200, Loss: 5.8409


Epoch 10:  93%|█████████▎| 8313/8958 [01:34<00:06, 95.05it/s]

Batch 8300, Loss: 5.6756


Epoch 10:  94%|█████████▍| 8413/8958 [01:35<00:05, 92.27it/s]

Batch 8400, Loss: 5.4881


Epoch 10:  95%|█████████▌| 8514/8958 [01:36<00:04, 95.58it/s]

Batch 8500, Loss: 5.6145


Epoch 10:  96%|█████████▌| 8614/8958 [01:37<00:03, 93.74it/s]

Batch 8600, Loss: 5.6442


Epoch 10:  97%|█████████▋| 8709/8958 [01:38<00:03, 73.76it/s]

Batch 8700, Loss: 5.7467


Epoch 10:  98%|█████████▊| 8815/8958 [01:40<00:02, 69.63it/s]

Batch 8800, Loss: 5.7041


Epoch 10: 100%|█████████▉| 8915/8958 [01:41<00:00, 90.98it/s]

Batch 8900, Loss: 5.8918


Epoch 10: 100%|██████████| 8958/8958 [01:41<00:00, 88.04it/s]

Epoch 10 completed in 101.755 seconds
Average Loss: 5.6542
Training completed in 972.950 seconds



