In [1]:
# Cell 1: Import required libraries
import os
import yaml
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Import your model and data loading components
from dataloader.dataset_wrapper import create_wrapper_from_dataframe

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
# Cell 2: Configuration and paths setup
CONFIG_PATH = "/Users/ivangolov/Desktop/Диплом/CSMP_project/CSMP_thesis_project/services/CSU-MS2/model/qtof_model/median_energy/checkpoints/config.yaml"
TRAIN_CSV_PATH = "/Users/ivangolov/Desktop/Диплом/CSMP_project/CSMP_spectrum_database/data/production/train_deduplicated.csv"
OUTPUT_DIR = "./train_results"

# Device selection for Mac
if torch.backends.mps.is_available():
    DEVICE = 'mps'
    print("Using MPS (Metal Performance Shaders) for GPU acceleration")
elif torch.cuda.is_available():
    DEVICE = 'cuda'
    print("Using CUDA for GPU acceleration")
else:
    DEVICE = 'cpu'
    print("Using CPU")

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Using device: {DEVICE}")
print(f"Validation data path: {TRAIN_CSV_PATH}")

Using MPS (Metal Performance Shaders) for GPU acceleration
Using device: mps
Validation data path: /Users/ivangolov/Desktop/Диплом/CSMP_project/CSMP_spectrum_database/data/production/train_deduplicated.csv


In [3]:
# Cell 3: Load configuration
print("Loading configuration...")
config = yaml.load(open(CONFIG_PATH, "r"), Loader=yaml.FullLoader)

print("Configuration loaded:")
print(f"- Batch size: {config.get('batch_size', 'Not specified')}")
print(f"- Model config keys: {list(config.get('model', {}).keys())}")
print(f"- Loss config: {config.get('loss', {})}")

Loading configuration...
Configuration loaded:
- Batch size: 64
- Model config keys: []
- Loss config: {'temperature': 0.1, 'use_cosine_similarity': True, 'alpha_weight': 0.75}


In [4]:
# Cell 4: Load and explore validation data
print("Loading train data...")
df_train = pd.read_csv(TRAIN_CSV_PATH)

print(f"Validation dataset shape: {df_train.shape}")
print(f"Columns: {list(df_train.columns)}")
print(f"Sample data:")
df_train.head()

Loading train data...
Validation dataset shape: (798444, 10)
Columns: ['peaks_json', 'ion_source', 'compound_source', 'instrument', 'adduct', 'precursor_mz', 'smiles', 'inchikey', 'ion_mode', 'molecular_formula']
Sample data:


Unnamed: 0,peaks_json,ion_source,compound_source,instrument,adduct,precursor_mz,smiles,inchikey,ion_mode,molecular_formula
0,"[[42.014248, 0.10199999999999998], [42.26601, ...",ESI,Crude,Orbitrap,[M+H]+,377.186,CC12CCC(C(=O)N(CNc3cc4c(cc3)c3ccccc3o4)C1=O)C2...,RNKMIWQDRWSWCD-UHFFFAOYSA-N,Positive,C23H24N2O3
1,"[[49.01717, 0.155], [49.020023, 0.253], [67.05...",ESI,Crude,Orbitrap,[M+H]+,377.186,CC12CCC(C(=O)N(CNc3cc4c(cc3)c3ccccc3o4)C1=O)C2...,RNKMIWQDRWSWCD-UHFFFAOYSA-N,Positive,C23H24N2O3
2,"[[49.017338, 0.242], [49.020237, 0.181], [67.0...",ESI,Crude,Orbitrap,[M+H]+,377.186,CC12CCC(C(=O)N(CNc3cc4c(cc3)c3ccccc3o4)C1=O)C2...,RNKMIWQDRWSWCD-UHFFFAOYSA-N,Positive,C23H24N2O3
3,"[[49.01701, 0.144], [49.019947, 0.244], [139.0...",ESI,Crude,Orbitrap,[M+H]+,377.186,CC12CCC(C(=O)N(CNc3cc4c(cc3)c3ccccc3o4)C1=O)C2...,RNKMIWQDRWSWCD-UHFFFAOYSA-N,Positive,C23H24N2O3
4,"[[49.017166, 0.155], [49.020008, 0.253], [139....",ESI,Crude,Orbitrap,[M+H]+,377.186,CC12CCC(C(=O)N(CNc3cc4c(cc3)c3ccccc3o4)C1=O)C2...,RNKMIWQDRWSWCD-UHFFFAOYSA-N,Positive,C23H24N2O3


In [5]:
df_train_sample = df_train.sample(n=10000,random_state=42).reset_index(drop=True)

In [7]:
# Cell 6: Prepare validation data loader
print("Preparing data loaders")

# Create data wrapper from DataFrame
wrapper, processed_df = create_wrapper_from_dataframe(
    df=df_train_sample,
    batch_size=64,  
    num_workers=8,
    valid_size=0.2,  
    use_ddp=False,
    output_dir="./train_features",
)

# Get the data loader
train_loader, val_loader = wrapper.get_data_loaders()

print(f"Successfully processed {len(processed_df)} samples")
print(f"Number of batches: {len(val_loader)}")

Preparing data loaders
Convert DataFrame to compatible files
Processed 10000 valid spectra out of 10000 total entries.
Create data wrapper
calculating molecular graphs


 13%|█▎        | 1033/8000 [00:00<00:04, 1515.42it/s]

SMILES [I-].O=C(OCC1=CC[N+]2(C)CCC(O)C12)C(O)(C(O)C)C(C)C calculation failure


 22%|██▏       | 1738/8000 [00:01<00:03, 1708.82it/s]

SMILES [Cl-].O=C1C2=CC=C(O)C(=C2OC(=C1C=3C=CC=4OCCCOC4C3)C)C[NH+](C)C calculation failure
SMILES [Na+].O=C(CCCCCCCCCCC)CC(O)S(=O)(=O)[O-] calculation failure


 26%|██▌       | 2086/8000 [00:01<00:03, 1714.60it/s]

SMILES [Cl-].O=C1C(=COC2=C1C=C(C(O)=C2C[NH+](C)C)CC)C=3C=CC=4OCCOC4C3 calculation failure
SMILES [I-].O=C(OCC1=CC[N+]2(C)CCC(O)C12)C(O)(C(O)C)C(C)C calculation failure


 30%|███       | 2431/8000 [00:01<00:03, 1708.65it/s]

SMILES [Cl-].OC=1C=C(O)C=2C=C(OC3OC(CO)C(O)C(O)C3O)C(=[O+]C2C1)C=4C=CC(O)=C(O)C4 calculation failure
SMILES [Cl-].O=C1C2=CC=C(O)C(=C2OC(=C1C=3C=CC=4OCCCOC4C3)C)C[NH+](C)C calculation failure


 37%|███▋      | 2946/8000 [00:01<00:02, 1702.73it/s]

SMILES [Cl-].O=C1C=2C=C(C(O)=C(C2OC(=C1C=3C=CC=4OCCOC4C3)C)C[NH+](C)C)CCC calculation failure
SMILES [K+].O=S(=O)([O-])ON=C(SC1OC(CO)C(O)C(O)C1O)CC=C.O calculation failure
SMILES [Na+].O=C(CCCCCCCCCCC)CC(O)S(=O)(=O)[O-] calculation failure


 56%|█████▌    | 4499/8000 [00:02<00:02, 1691.79it/s]

SMILES [Cl-].O=C1C2=CC=C(O)C(=C2OC(=C1C=3C=CC=4OCCCOC4C3)C)C[NH+](C)C calculation failure


 63%|██████▎   | 5002/8000 [00:03<00:01, 1641.39it/s]

SMILES [K+].[K+].O=C([O-])C1OC(OC2C(OC(C(=O)[O-])C(O)C2O)OC3CCC4(C)C5C(=O)C=C6C7CC(C(=O)O)(C)CCC7(C)CCC6(C)C5(C)CCC4C3(C)C)C(O)C(O)C1O calculation failure


 71%|███████   | 5644/8000 [00:03<00:01, 1408.10it/s]

SMILES [Cl-].OC=1C=C(O)C=2C=C(OC3OC(CO)C(O)C(O)C3O)C(=[O+]C2C1)C=4C=CC(O)=C(O)C4 calculation failure
SMILES C1C(N(C2=C(N1)N=C(NC2=O)N)C=O)CNC3=CC=C(C=C3)C(=O)N[C@@H](CCC(=O)[O-])C(=O)[O-].[Ca+2] calculation failure


 77%|███████▋  | 6183/8000 [00:03<00:01, 1179.17it/s]

SMILES [Na+].O=C([O-])C(CC)C1OC(C(=CC=CC2C=CC3CCCC3C2C(=O)C4=CC=CN4)CC)C(C)CC1 calculation failure
SMILES [K+].O=C([O-])C12CCC(C(=C)C)C2C3CCC4C5(C)CCC(=O)C(C)(C)C5CCC4(C)C3(C)CC1 calculation failure


 91%|█████████ | 7265/8000 [00:04<00:00, 1546.88it/s]

SMILES [Na+].O=P([O-])(O)OCC1OC(N2C=NC=3C(=NC=NC32)N)C(O)C1O calculation failure


100%|██████████| 8000/8000 [00:05<00:00, 1544.90it/s]


SMILES [I-].O=C(OCC1=CC[N+]2(C)CCC(O)C12)C(O)(C(O)C)C(C)C calculation failure
Calculated 6848 molecular graph-mass spectrometry pairs
calculating molecular graphs


 25%|██▍       | 495/2000 [00:00<00:00, 1652.05it/s]

SMILES [Cl-].O=C(O)C=1C=CC=CC1C=2C=3C=CC(=CC3OC4=CC(C=CC42)=[N+](CC)CC)N(CC)CC calculation failure


 50%|████▉     | 994/2000 [00:00<00:00, 1647.43it/s]

SMILES [Na+].O=C(CCCCCCCCCCC)CC(O)S(=O)(=O)[O-] calculation failure
SMILES [K+].O=S(=O)([O-])ON=C(SC1OC(CO)C(O)C(O)C1O)CC=C.O calculation failure


 92%|█████████▏| 1837/2000 [00:01<00:00, 1679.95it/s]

SMILES [Br-].O=C(OC1CC2C3OC3C(C1)[N+]2(C)CCCC)C(C=4C=CC=CC4)CO calculation failure
SMILES CCC1=C(C2=NC1=CC3=C(C4=C([N-]3)C(=C5[C@H]([C@@H](C(=N5)C=C6C(=C(C(=C2)[N-]6)C=C)C)C)CCC(=O)OC/C=C(\C)/CCC[C@H](C)CCC[C@H](C)CCCC(C)C)[C@H](C4=O)C(=O)OC)C)C=O.[Mg+2] calculation failure
SMILES [I-].O=C(OCC1=CC[N+]2(C)CCC(O)C12)C(O)(C(O)C)C(C)C calculation failure


100%|██████████| 2000/2000 [00:01<00:00, 1644.40it/s]

Calculated 1705 molecular graph-mass spectrometry pairs
Successfully processed 10000 samples
Number of batches: 27





In [8]:
from model import ModelCLR

# Initialize model architecture
model = ModelCLR(**config["model_config"]).to(DEVICE)

In [9]:
# Model Architecture Overview
print("MODEL ARCHITECTURE:")
print("-" * 40)
print(f"Model Type: {type(model).__name__}")
print(f"Device: {DEVICE}")

MODEL ARCHITECTURE:
----------------------------------------
Model Type: ModelCLR
Device: mps


In [10]:
# Submodules Analysis
print("SUBMODULES:")
print("-" * 40)
for name, module in model.named_children():
    print(f"{name:15}: {type(module).__name__}")
    if hasattr(module, '__dict__'):
        for attr_name, attr_value in module.__dict__.items():
            if not attr_name.startswith('_') and not callable(attr_value):
                if isinstance(attr_value, (int, float, str, bool)):
                    print(f"  {attr_name:12}: {attr_value}")

SUBMODULES:
----------------------------------------
Smiles_model   : SmilesModel
  training    : True
  num_layer   : 5
  emb_dim     : 300
  feat_dim    : 512
  drop_ratio  : 0.3
MS_model       : MSModel
  training    : True
smi_esa        : ESA_SMILES
  training    : True
spec_esa       : ESA_SPEC
  training    : True
smi_proj       : Linear
  training    : True
  in_features : 256
  out_features: 256
spec_proj      : Linear
  training    : True
  in_features : 256
  out_features: 256


In [11]:
# Parameter Count
print("PARAMETER ANALYSIS:")
print("-" * 40)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}")
print(f"Non-trainable Parameters: {total_params - trainable_params:,}")


PARAMETER ANALYSIS:
----------------------------------------
Total Parameters: 5,318,572
Trainable Parameters: 5,318,444
Non-trainable Parameters: 128


In [12]:
# Memory Usage (approximate)
print("MEMORY ANALYSIS:")
print("-" * 40)
param_size = sum(p.numel() * p.element_size() for p in model.parameters())
buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())
model_size_mb = (param_size + buffer_size) / 1024 / 1024

print(f"Model Size: {model_size_mb:.2f} MB")
print(f"Parameter Memory: {param_size / 1024 / 1024:.2f} MB")
print(f"Buffer Memory: {buffer_size / 1024 / 1024:.2f} MB")


MEMORY ANALYSIS:
----------------------------------------
Model Size: 20.30 MB
Parameter Memory: 20.29 MB
Buffer Memory: 0.01 MB


In [13]:
from loss.nt_xent import NTXentLoss

# Initialize loss function
temperature = config.get('loss', {}).get('temperature', 0.1)
batch_size = config.get('batch_size', 1024)
use_cosine_similarity = config.get('loss', {}).get('use_cosine_similarity', True)
alpha_weight = config.get('loss', {}).get('alpha_weight', 1.0)

criterion = NTXentLoss(
    device=DEVICE, 
    batch_size=batch_size, 
    temperature=temperature, 
    use_cosine_similarity=use_cosine_similarity, 
    alpha_weight=alpha_weight
)

In [14]:
# Cell 15: Training Setup and Optimizer
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import json
import time
from collections import defaultdict

print("Setting up training components...")
OUTPUT_DIR = "./model"

# Initialize optimizer
optimizer = optim.AdamW(
    model.parameters(),
    lr=float(config.get('learning_rate', 5e-6)),
    weight_decay=float(config.get('weight_decay', 1e-4))
)

# Initialize scheduler
scheduler = CosineAnnealingLR(
    optimizer, 
    T_max=config.get('epochs', 4000),
    eta_min=1e-7
)

# Training configuration
epochs = config.get('epochs', 4000)
eval_every_n_epochs = config.get('eval_every_n_epochs', 5)
log_every_n_steps = config.get('log_every_n_steps', 2)

# Create checkpoint directory
checkpoint_dir = os.path.join(OUTPUT_DIR, "checkpoints")
os.makedirs(checkpoint_dir, exist_ok=True)

print(f"Training for {epochs} epochs")
print(f"Optimizer: AdamW with lr={config.get('learning_rate', 5e-6)}")
print(f"Scheduler: CosineAnnealingLR")
print(f"Checkpoint directory: {checkpoint_dir}")

Setting up training components...
Training for 100 epochs
Optimizer: AdamW with lr=5e-06
Scheduler: CosineAnnealingLR
Checkpoint directory: ./model/checkpoints


In [15]:
# Cell 16: Training and Evaluation Functions
def train_epoch(model, train_loader, criterion, optimizer, device, epoch):
    model.train()
    total_loss = 0
    num_batches = 0
    batch_losses = []
    
    progress_bar = tqdm(train_loader, desc=f"Training Epoch {epoch}")
    
    for batch_idx, (graphs, mzs, intensities, num_peaks) in enumerate(progress_bar):
        # Move data to device
        graphs = graphs.to(device)
        mzs = mzs.to(device)
        intensities = intensities.to(device)
        num_peaks = num_peaks.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        mol_features, spec_features = model(graphs, mzs, intensities, num_peaks)
        loss = criterion(mol_features, spec_features)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Track loss
        batch_loss = loss.item()
        total_loss += batch_loss
        batch_losses.append(batch_loss)
        num_batches += 1
        
        # Update progress bar
        progress_bar.set_postfix({
            'Loss': f'{batch_loss:.4f}',
            'Avg Loss': f'{total_loss/num_batches:.4f}'
        })
        
        # Log every n steps
        if batch_idx % log_every_n_steps == 0:
            print(f"Epoch {epoch}, Batch {batch_idx}, Loss: {batch_loss:.4f}")
    
    avg_loss = total_loss / num_batches
    return avg_loss, batch_losses

In [16]:
def compute_retrieval_metrics(molecular_features, spectral_features):
    from sklearn.metrics import pairwise_distances

    # Compute cosine distance matrix
    distance_matrix = pairwise_distances(molecular_features, spectral_features, metric='cosine')
    
    # For each molecular feature, find the rank of the correct spectral feature
    ranks = []
    for i in range(distance_matrix.shape[0]):
        sorted_indices = np.argsort(distance_matrix[i])
        rank = np.where(sorted_indices == i)[0][0] + 1  # +1 for 1-based rank
        ranks.append(rank)
    
    ranks = np.array(ranks)
    
    # Compute top-k accuracies
    top_1_accuracy = np.mean(ranks <= 1)
    top_5_accuracy = np.mean(ranks <= 5)
    top_10_accuracy = np.mean(ranks <= 10)
    
    return {
        'top_1_accuracy': top_1_accuracy,
        'top_5_accuracy': top_5_accuracy,
        'top_10_accuracy': top_10_accuracy
    }


def evaluate_model(model, val_loader, criterion, device, epoch):
    model.eval()
    total_loss = 0
    num_batches = 0
    
    molecular_features_list = []
    spectral_features_list = []
    
    with torch.no_grad():
        progress_bar = tqdm(val_loader, desc=f"Evaluating Epoch {epoch}")
        
        for batch_idx, (graphs, mzs, intensities, num_peaks) in enumerate(progress_bar):
            # Move data to device
            graphs = graphs.to(device)
            mzs = mzs.to(device)
            intensities = intensities.to(device)
            num_peaks = num_peaks.to(device)
            
            # Forward pass
            mol_features, spec_features = model(graphs, mzs, intensities, num_peaks)
            loss = criterion(mol_features, spec_features)
            
            total_loss += loss.item()
            num_batches += 1
            
            # Store features for retrieval metrics
            molecular_features_list.append(mol_features.cpu().numpy())
            spectral_features_list.append(spec_features.cpu().numpy())
            
            progress_bar.set_postfix({'Val Loss': f'{loss.item():.4f}'})
    
    avg_loss = total_loss / num_batches
    
    # Compute retrieval metrics
    all_mol_features = np.vstack(molecular_features_list)
    all_spec_features = np.vstack(spectral_features_list)
    
    # Compute cosine similarities
    cosine_similarities = np.sum(all_mol_features * all_spec_features, axis=1)
    mean_similarity = np.mean(cosine_similarities)
    
    # Compute retrieval metrics
    retrieval_metrics = compute_retrieval_metrics(all_mol_features, all_spec_features)
    
    return avg_loss, mean_similarity, retrieval_metrics

In [17]:
def save_checkpoint(model, optimizer, scheduler, epoch, train_loss, val_loss, val_metrics, checkpoint_dir):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'train_loss': train_loss,
        'val_loss': val_loss,
        'val_metrics': val_metrics,
        'config': config
    }
    
    checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch}.pth')
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved: {checkpoint_path}")
    
    # Save best model
    best_checkpoint_path = os.path.join(checkpoint_dir, 'best_model.pth')
    if not os.path.exists(best_checkpoint_path):
        torch.save(checkpoint, best_checkpoint_path)
        print(f"Best model saved: {best_checkpoint_path}")
    else:
        best_checkpoint = torch.load(best_checkpoint_path)
        if val_loss < best_checkpoint['val_loss']:
            torch.save(checkpoint, best_checkpoint_path)
            print(f"New best model saved: {best_checkpoint_path}")

print("Training and evaluation functions defined!")

Training and evaluation functions defined!


In [18]:
# Cell 17: Training Loop
# Training history for plotting
train_history = {
    'epochs': [],
    'train_losses': [],
    'val_losses': [],
    'val_similarities': [],
    'val_top1_acc': [],
    'val_top5_acc': [],
    'val_top10_acc': [],
    'learning_rates': []
}

best_val_loss = float('inf')
start_time = time.time()

print("Starting training...")
print("=" * 60)

for epoch in range(1, epochs + 1):
    epoch_start_time = time.time()
    
    # Training phase
    train_loss, batch_losses = train_epoch(
        model, train_loader, criterion, optimizer, DEVICE, epoch
    )
    
    # Validation phase (every n epochs)
    if epoch % eval_every_n_epochs == 0:
        val_loss, val_similarity, val_metrics = evaluate_model(
            model, val_loader, criterion, DEVICE, epoch
        )
        
        # Update learning rate
        scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']
        
        # Log results
        epoch_time = time.time() - epoch_start_time
        total_time = time.time() - start_time
        
        print(f"\nEpoch {epoch}/{epochs}")
        print(f"Train Loss: {train_loss:.6f}")
        print(f"Val Loss: {val_loss:.6f}")
        print(f"Val Similarity: {val_similarity:.4f}")
        print(f"Top-1 Accuracy: {val_metrics['top_1_accuracy']:.4f}")
        print(f"Top-5 Accuracy: {val_metrics['top_5_accuracy']:.4f}")
        print(f"Top-10 Accuracy: {val_metrics['top_10_accuracy']:.4f}")
        print(f"Learning Rate: {current_lr:.2e}")
        print(f"Epoch Time: {epoch_time:.2f}s, Total Time: {total_time:.2f}s")
        print("-" * 60)
        
        # Store history
        train_history['epochs'].append(epoch)
        train_history['train_losses'].append(train_loss)
        train_history['val_losses'].append(val_loss)
        train_history['val_similarities'].append(val_similarity)
        train_history['val_top1_acc'].append(val_metrics['top_1_accuracy'])
        train_history['val_top5_acc'].append(val_metrics['top_5_accuracy'])
        train_history['val_top10_acc'].append(val_metrics['top_10_accuracy'])
        train_history['learning_rates'].append(current_lr)
        
        # Save checkpoint
        save_checkpoint(
            model, optimizer, scheduler, epoch, 
            train_loss, val_loss, val_metrics, checkpoint_dir
        )
        
        # Save training history
        history_path = os.path.join(OUTPUT_DIR, 'training_history.json')
        with open(history_path, 'w') as f:
            json.dump(train_history, f, indent=2)
    
    else:
        # Just update learning rate and log basic info
        scheduler.step()
        current_lr = optimizer.param_groups[0]['lr']
        epoch_time = time.time() - epoch_start_time
        
        print(f"Epoch {epoch}/{epochs} - Train Loss: {train_loss:.6f}, LR: {current_lr:.2e}, Time: {epoch_time:.2f}s")

print("\nTraining completed!")
print(f"Total training time: {(time.time() - start_time)/3600:.2f} hours")

Starting training...


Training Epoch 1:   1%|          | 1/107 [00:21<37:11, 21.05s/it, Loss=4.1717, Avg Loss=4.1717]

Epoch 1, Batch 0, Loss: 4.1717


Training Epoch 1:   3%|▎         | 3/107 [00:22<09:15,  5.34s/it, Loss=4.1893, Avg Loss=4.1826]

Epoch 1, Batch 2, Loss: 4.1893


Training Epoch 1:   5%|▍         | 5/107 [00:23<04:09,  2.44s/it, Loss=4.2161, Avg Loss=4.1962]

Epoch 1, Batch 4, Loss: 4.2161


Training Epoch 1:   7%|▋         | 7/107 [00:25<02:20,  1.40s/it, Loss=4.1766, Avg Loss=4.1927]

Epoch 1, Batch 6, Loss: 4.1766


Training Epoch 1:   8%|▊         | 9/107 [00:26<01:47,  1.09s/it, Loss=4.1722, Avg Loss=4.1876]

Epoch 1, Batch 8, Loss: 4.1722


Training Epoch 1:  10%|█         | 11/107 [00:28<01:20,  1.19it/s, Loss=4.1773, Avg Loss=4.1874]

Epoch 1, Batch 10, Loss: 4.1773


Training Epoch 1:  12%|█▏        | 13/107 [00:29<01:10,  1.33it/s, Loss=4.1833, Avg Loss=4.1898]

Epoch 1, Batch 12, Loss: 4.1833


Training Epoch 1:  14%|█▍        | 15/107 [00:30<01:02,  1.48it/s, Loss=4.1768, Avg Loss=4.1883]

Epoch 1, Batch 14, Loss: 4.1768


Training Epoch 1:  16%|█▌        | 17/107 [00:32<01:05,  1.37it/s, Loss=4.1671, Avg Loss=4.1861]

Epoch 1, Batch 16, Loss: 4.1671


Training Epoch 1:  18%|█▊        | 19/107 [00:33<01:04,  1.37it/s, Loss=4.1771, Avg Loss=4.1856]

Epoch 1, Batch 18, Loss: 4.1771


Training Epoch 1:  20%|█▉        | 21/107 [00:35<01:15,  1.13it/s, Loss=4.1667, Avg Loss=4.1846]

Epoch 1, Batch 20, Loss: 4.1667


Training Epoch 1:  21%|██▏       | 23/107 [00:37<01:04,  1.30it/s, Loss=4.1594, Avg Loss=4.1826]

Epoch 1, Batch 22, Loss: 4.1594


Training Epoch 1:  23%|██▎       | 25/107 [00:38<01:05,  1.25it/s, Loss=4.1792, Avg Loss=4.1823]

Epoch 1, Batch 24, Loss: 4.1792


Training Epoch 1:  25%|██▌       | 27/107 [00:40<01:10,  1.13it/s, Loss=4.1738, Avg Loss=4.1813]

Epoch 1, Batch 26, Loss: 4.1738


Training Epoch 1:  27%|██▋       | 29/107 [00:42<01:02,  1.26it/s, Loss=4.1726, Avg Loss=4.1809]

Epoch 1, Batch 28, Loss: 4.1726


Training Epoch 1:  29%|██▉       | 31/107 [00:43<00:57,  1.31it/s, Loss=4.1603, Avg Loss=4.1799]

Epoch 1, Batch 30, Loss: 4.1603


Training Epoch 1:  31%|███       | 33/107 [00:45<01:02,  1.18it/s, Loss=4.1733, Avg Loss=4.1790]

Epoch 1, Batch 32, Loss: 4.1733


Training Epoch 1:  33%|███▎      | 35/107 [00:47<01:05,  1.10it/s, Loss=4.1632, Avg Loss=4.1784]

Epoch 1, Batch 34, Loss: 4.1632


Training Epoch 1:  35%|███▍      | 37/107 [00:48<00:57,  1.22it/s, Loss=4.1477, Avg Loss=4.1773]

Epoch 1, Batch 36, Loss: 4.1477


Training Epoch 1:  36%|███▋      | 39/107 [00:49<00:50,  1.35it/s, Loss=4.1626, Avg Loss=4.1773]

Epoch 1, Batch 38, Loss: 4.1626


Training Epoch 1:  38%|███▊      | 41/107 [00:51<00:55,  1.20it/s, Loss=4.1398, Avg Loss=4.1759]

Epoch 1, Batch 40, Loss: 4.1398


Training Epoch 1:  40%|████      | 43/107 [00:54<01:10,  1.10s/it, Loss=4.1506, Avg Loss=4.1752]

Epoch 1, Batch 42, Loss: 4.1506


Training Epoch 1:  42%|████▏     | 45/107 [00:56<01:02,  1.01s/it, Loss=4.1643, Avg Loss=4.1747]

Epoch 1, Batch 44, Loss: 4.1643


Training Epoch 1:  44%|████▍     | 47/107 [00:57<00:52,  1.14it/s, Loss=4.1696, Avg Loss=4.1738]

Epoch 1, Batch 46, Loss: 4.1696


Training Epoch 1:  46%|████▌     | 49/107 [00:59<00:48,  1.19it/s, Loss=4.1601, Avg Loss=4.1736]

Epoch 1, Batch 48, Loss: 4.1601


Training Epoch 1:  48%|████▊     | 51/107 [01:01<00:46,  1.21it/s, Loss=4.1609, Avg Loss=4.1729]

Epoch 1, Batch 50, Loss: 4.1609


Training Epoch 1:  50%|████▉     | 53/107 [01:03<00:57,  1.06s/it, Loss=4.1530, Avg Loss=4.1722]

Epoch 1, Batch 52, Loss: 4.1530


Training Epoch 1:  51%|█████▏    | 55/107 [01:10<02:02,  2.36s/it, Loss=4.1792, Avg Loss=4.1724]

Epoch 1, Batch 54, Loss: 4.1792


Training Epoch 1:  53%|█████▎    | 57/107 [01:13<01:34,  1.90s/it, Loss=4.1669, Avg Loss=4.1718]

Epoch 1, Batch 56, Loss: 4.1669


Training Epoch 1:  55%|█████▌    | 59/107 [01:15<01:06,  1.38s/it, Loss=4.1585, Avg Loss=4.1715]

Epoch 1, Batch 58, Loss: 4.1585


Training Epoch 1:  57%|█████▋    | 61/107 [01:16<00:47,  1.04s/it, Loss=4.1496, Avg Loss=4.1711]

Epoch 1, Batch 60, Loss: 4.1496


Training Epoch 1:  59%|█████▉    | 63/107 [01:19<00:58,  1.33s/it, Loss=4.1600, Avg Loss=4.1707]

Epoch 1, Batch 62, Loss: 4.1600


Training Epoch 1:  61%|██████    | 65/107 [01:22<01:03,  1.51s/it, Loss=4.1647, Avg Loss=4.1706]

Epoch 1, Batch 64, Loss: 4.1647


Training Epoch 1:  63%|██████▎   | 67/107 [01:29<01:41,  2.54s/it, Loss=4.1538, Avg Loss=4.1702]

Epoch 1, Batch 66, Loss: 4.1538


Training Epoch 1:  64%|██████▍   | 69/107 [01:31<01:06,  1.74s/it, Loss=4.1609, Avg Loss=4.1695]

Epoch 1, Batch 68, Loss: 4.1609


Training Epoch 1:  66%|██████▋   | 71/107 [01:32<00:43,  1.20s/it, Loss=4.1430, Avg Loss=4.1689]

Epoch 1, Batch 70, Loss: 4.1430


Training Epoch 1:  68%|██████▊   | 73/107 [01:36<00:58,  1.71s/it, Loss=4.1574, Avg Loss=4.1688]

Epoch 1, Batch 72, Loss: 4.1574


Training Epoch 1:  70%|███████   | 75/107 [01:39<00:53,  1.67s/it, Loss=4.1612, Avg Loss=4.1684]

Epoch 1, Batch 74, Loss: 4.1612


Training Epoch 1:  72%|███████▏  | 77/107 [01:42<00:47,  1.58s/it, Loss=4.1582, Avg Loss=4.1681]

Epoch 1, Batch 76, Loss: 4.1582


Training Epoch 1:  74%|███████▍  | 79/107 [01:48<01:00,  2.17s/it, Loss=4.1520, Avg Loss=4.1679]

Epoch 1, Batch 78, Loss: 4.1520


Training Epoch 1:  76%|███████▌  | 81/107 [01:51<00:46,  1.80s/it, Loss=4.1409, Avg Loss=4.1673]

Epoch 1, Batch 80, Loss: 4.1409


Training Epoch 1:  78%|███████▊  | 83/107 [01:53<00:30,  1.29s/it, Loss=4.1387, Avg Loss=4.1669]

Epoch 1, Batch 82, Loss: 4.1387


Training Epoch 1:  79%|███████▉  | 85/107 [01:55<00:23,  1.06s/it, Loss=4.1348, Avg Loss=4.1662]

Epoch 1, Batch 84, Loss: 4.1348


Training Epoch 1:  81%|████████▏ | 87/107 [01:57<00:20,  1.05s/it, Loss=4.1622, Avg Loss=4.1658]

Epoch 1, Batch 86, Loss: 4.1622


Training Epoch 1:  83%|████████▎ | 89/107 [02:02<00:32,  1.78s/it, Loss=4.1475, Avg Loss=4.1654]

Epoch 1, Batch 88, Loss: 4.1475


Training Epoch 1:  85%|████████▌ | 91/107 [02:04<00:20,  1.29s/it, Loss=4.1234, Avg Loss=4.1647]

Epoch 1, Batch 90, Loss: 4.1234


Training Epoch 1:  87%|████████▋ | 93/107 [02:11<00:34,  2.49s/it, Loss=4.1661, Avg Loss=4.1645]

Epoch 1, Batch 92, Loss: 4.1661


Training Epoch 1:  89%|████████▉ | 95/107 [02:22<00:52,  4.40s/it, Loss=4.1537, Avg Loss=4.1639]

Epoch 1, Batch 94, Loss: 4.1537


Training Epoch 1:  91%|█████████ | 97/107 [02:27<00:34,  3.48s/it, Loss=4.1634, Avg Loss=4.1640]

Epoch 1, Batch 96, Loss: 4.1634


Training Epoch 1:  93%|█████████▎| 99/107 [02:29<00:17,  2.22s/it, Loss=4.1380, Avg Loss=4.1634]

Epoch 1, Batch 98, Loss: 4.1380


Training Epoch 1:  94%|█████████▍| 101/107 [02:34<00:14,  2.44s/it, Loss=4.1512, Avg Loss=4.1631]

Epoch 1, Batch 100, Loss: 4.1512


Training Epoch 1:  96%|█████████▋| 103/107 [02:37<00:07,  1.84s/it, Loss=4.1559, Avg Loss=4.1627]

Epoch 1, Batch 102, Loss: 4.1559


Training Epoch 1:  98%|█████████▊| 105/107 [02:42<00:05,  2.52s/it, Loss=4.1290, Avg Loss=4.1619]

Epoch 1, Batch 104, Loss: 4.1290


Training Epoch 1: 100%|██████████| 107/107 [02:46<00:00,  2.28s/it, Loss=4.1376, Avg Loss=4.1613]

Epoch 1, Batch 106, Loss: 4.1376


Training Epoch 1: 100%|██████████| 107/107 [02:50<00:00,  1.59s/it, Loss=4.1376, Avg Loss=4.1613]


Epoch 1/100 - Train Loss: 4.161294, LR: 5.00e-06, Time: 170.10s


Training Epoch 2:   1%|          | 1/107 [00:25<45:37, 25.82s/it, Loss=4.1535, Avg Loss=4.1535]

Epoch 2, Batch 0, Loss: 4.1535


libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
libc++abi: terminating due to uncaught exception of type std::__1::system_error: Broken pipe
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x118f0c430>
Traceback (most recent call last):
  File "/Users/ivangolov/anaconda3/envs/CSU-MS2/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1476, in __del__
   

: 