In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from tqdm import tqdm



df = pd.read_csv('/Users/danielmartinezvillegas/Developer/master-ds/✨TDG/audio_pipeline/data/dataset_jose_fwod_vector.csv')

In [49]:
class VectorToVectorDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        x = torch.tensor([row[f'x_{i}'] for i in range(16)], dtype=torch.float32)
        y = torch.tensor([row[f'y_{i}'] for i in range(16)], dtype=torch.float32)
        return x, y


In [50]:
class EnsembleMLP(nn.Module):
    def __init__(self, dropout=0.3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(16, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)


In [51]:
def train_single_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=30):
    model.to(device)
    best_model = model.state_dict()
    best_val_loss = float('inf')

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            y_hat = model(x)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                y_hat = model(x)
                val_loss += criterion(y_hat, y).item()

        val_loss /= len(val_loader)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()

        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}")

    model.load_state_dict(best_model)
    return model


In [52]:
def train_ensemble(df, device, n_models=5):
    dataset = VectorToVectorDataset(df)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_ds, val_ds = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=64)

    ensemble = []
    for i in range(n_models):
        print(f"\nTraining model {i+1}/{n_models}")
        model = EnsembleMLP()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        criterion = nn.MSELoss()
        trained_model = train_single_model(model, train_loader, val_loader, criterion, optimizer, device)
        ensemble.append(trained_model)

    return ensemble, val_loader


In [53]:
def ensemble_predict(models, x):
    return torch.stack([m(x) for m in models]).mean(0)

def evaluate_ensemble(models, val_loader, device):
    all_preds, all_targets = [], []
    for x, y in val_loader:
        x = x.to(device)
        with torch.no_grad():
            pred = ensemble_predict(models, x).cpu().numpy()
        all_preds.append(pred)
        all_targets.append(y.numpy())

    all_preds = np.vstack(all_preds)
    all_targets = np.vstack(all_targets)
    mse = mean_squared_error(all_targets, all_preds)
    print(f"\n✅ Ensemble MSE: {mse:.4f}")

    for i in range(min(3, len(all_preds))):
        plt.figure(figsize=(10, 2))
        plt.plot(all_targets[i], label='True', marker='o')
        plt.plot(all_preds[i], label='Predicted', marker='x')
        plt.title(f'Example {i+1}')
        plt.grid(True)
        plt.legend()
        plt.show()


In [54]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Entrenar el ensemble
models, val_loader = train_ensemble(df, device)

# Evaluar el rendimiento del ensemble
evaluate_ensemble(models, val_loader, device)



Training model 1/5
Epoch 1/30 - Train Loss: 0.1035, Val Loss: 0.0922
Epoch 2/30 - Train Loss: 0.0936, Val Loss: 0.0901
Epoch 3/30 - Train Loss: 0.0918, Val Loss: 0.0881


KeyboardInterrupt: 

# Test 2

In [11]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from tqdm import tqdm

# Load the dataset
df = pd.read_csv('/Users/danielmartinezvillegas/Developer/master-ds/✨TDG/audio_pipeline/data/dataset_jose_fwod_vector.csv')

# Normalize input features to improve training stability
for i in range(16):
    col = f'x_{i}'
    mean, std = df[col].mean(), df[col].std()
    if std > 0:  # Avoid division by zero
        df[col] = (df[col] - mean) / std

class VectorToVectorDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        x = torch.tensor([row[f'x_{i}'] for i in range(16)], dtype=torch.float32)
        y = torch.tensor([row[f'y_{i}'] for i in range(16)], dtype=torch.float32)
        return x, y

# Define different model architectures for ensemble diversity
class ImprovedMLP(nn.Module):
    def __init__(self, dropout=0.2, use_residual=True, activation='leaky_relu'):
        super().__init__()
        self.use_residual = use_residual
        
        # Choose activation function
        if activation == 'leaky_relu':
            self.act = nn.LeakyReLU(0.1)
        elif activation == 'gelu':
            self.act = nn.GELU()
        else:
            self.act = nn.ReLU()
            
        # Layer blocks
        self.block1 = nn.Sequential(
            nn.Linear(16, 128),
            nn.BatchNorm1d(128),
            self.act,
            nn.Dropout(dropout)
        )
        
        self.block2 = nn.Sequential(
            nn.Linear(128, 128),
            nn.BatchNorm1d(128),
            self.act,
            nn.Dropout(dropout)
        )
        
        self.block3 = nn.Sequential(
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            self.act
        )
        
        self.block4 = nn.Sequential(
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            self.act
        )
        
        # Output layer - linear output for flexibility
        self.output = nn.Linear(32, 16)
        
    def forward(self, x):
        b1 = self.block1(x)
        b2 = self.block2(b1)
        
        # Residual connection if dimensions match
        if self.use_residual:
            b2 = b2 + b1
            
        b3 = self.block3(b2)
        b4 = self.block4(b3)
        out = self.output(b4)
        return out

# Wider network architecture for ensemble diversity
class WiderMLP(nn.Module):
    def __init__(self, dropout=0.3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(16, 256),
            nn.BatchNorm1d(256),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(256, 256),
            nn.BatchNorm1d(256),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.GELU(),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.GELU(),
            nn.Linear(64, 16)
        )

    def forward(self, x):
        return self.net(x)

# Deeper network architecture for ensemble diversity
class DeeperMLP(nn.Module):
    def __init__(self, dropout=0.25):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(16, 64),
            nn.LayerNorm(64),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(64, 128),
            nn.LayerNorm(128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, 128),
            nn.LayerNorm(128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.LayerNorm(64),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(64, 32),
            nn.LayerNorm(32),
            nn.GELU(),
            nn.Linear(32, 16)
        )

    def forward(self, x):
        return self.net(x)

def combined_loss(pred, target, alpha=0.8):
    # Combine MSE and Huber loss for robustness
    mse_loss = F.mse_loss(pred, target)
    huber_loss = F.smooth_l1_loss(pred, target)
    return alpha * mse_loss + (1 - alpha) * huber_loss

def train_single_model(model, train_loader, val_loader, criterion, optimizer, device, scheduler=None, epochs=50, patience=10):
    model.to(device)
    best_model = model.state_dict()
    best_val_loss = float('inf')
    no_improve_count = 0
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        
        # Use tqdm for progress bar
        with tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}") as pbar:
            for x, y in pbar:
                x, y = x.to(device), y.to(device)
                optimizer.zero_grad()
                y_hat = model(x)
                loss = criterion(y_hat, y)
                loss.backward()
                
                # Gradient clipping to prevent exploding gradients
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                
                optimizer.step()
                train_loss += loss.item()
                pbar.set_postfix({"train_loss": f"{loss.item():.4f}"})
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                y_hat = model(x)
                val_loss += criterion(y_hat, y).item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}")
        
        # Step the learning rate scheduler if provided
        if scheduler is not None:
            # For ReduceLROnPlateau, we need to pass the validation loss
            if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(val_loss)
            else:
                scheduler.step()
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            no_improve_count = 0
        else:
            no_improve_count += 1
            
        # Early stopping
        if no_improve_count >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

    model.load_state_dict(best_model)
    return model, best_val_loss

def create_model_architecture(model_type, dropout=0.3):
    if model_type == 'improved':
        return ImprovedMLP(dropout=dropout, use_residual=True, activation='leaky_relu')
    elif model_type == 'wider':
        return WiderMLP(dropout=dropout)
    elif model_type == 'deeper':
        return DeeperMLP(dropout=dropout)
    else:
        raise ValueError(f"Unknown model type: {model_type}")

def train_ensemble(df, device, n_models=7):
    # Split data
    dataset = VectorToVectorDataset(df)
    train_size = int(0.7 * len(dataset))
    val_size = int(0.15 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    
    train_ds, val_ds, test_ds = random_split(dataset, [train_size, val_size, test_size])

    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=64)
    test_loader = DataLoader(test_ds, batch_size=64)

    ensemble = []
    model_weights = []
    
    # Different model architectures to try
    model_types = ['improved', 'wider', 'deeper']
    
    for i in range(n_models):
        print(f"\nTraining model {i+1}/{n_models}")
        
        # Vary model architecture, dropout rate
        model_type = model_types[i % len(model_types)]
        dropout = np.random.uniform(0.1, 0.4)
        
        model = create_model_architecture(model_type, dropout)
        
        # Try different optimizers
        if i % 3 == 0:
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
        elif i % 3 == 1:
            optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
        else:
            optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3, weight_decay=1e-5)
            
        # Learning rate scheduler
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)
        
        # Criterion
        if i % 2 == 0:
            criterion = nn.MSELoss()
        else:
            criterion = combined_loss
            
        # Train the model
        trained_model, val_loss = train_single_model(
            model, 
            train_loader, 
            val_loader, 
            criterion, 
            optimizer, 
            device,
            scheduler=scheduler,
            epochs=100,  # Increased epochs with early stopping
            patience=15
        )
        
        # Calculate model weight based on validation performance
        # Lower validation loss = higher weight
        weight = 1.0 / (val_loss + 1e-5)
        
        ensemble.append(trained_model)
        model_weights.append(weight)
        
    # Normalize weights
    total_weight = sum(model_weights)
    model_weights = [w / total_weight for w in model_weights]
    
    return ensemble, model_weights, test_loader

def weighted_ensemble_predict(models, weights, x):
    predictions = []
    for i, model in enumerate(models):
        with torch.no_grad():
            pred = model(x)
        predictions.append(pred * weights[i])
    return torch.stack(predictions).sum(0)

def evaluate_ensemble(models, weights, test_loader, device):
    all_preds, all_targets = [], []
    
    models = [model.to(device) for model in models]
    
    for model in models:
        model.eval()
    
    for x, y in test_loader:
        x = x.to(device)
        with torch.no_grad():
            pred = weighted_ensemble_predict(models, weights, x).cpu().numpy()
        all_preds.append(pred)
        all_targets.append(y.numpy())

    all_preds = np.vstack(all_preds)
    all_targets = np.vstack(all_targets)
    mse = mean_squared_error(all_targets, all_preds)
    print(f"\n✅ Ensemble MSE: {mse:.4f}")

    # Calculate per-element MSE to find where the model struggles
    element_mse = np.mean((all_targets - all_preds)**2, axis=0)
    
    plt.figure(figsize=(10, 4))
    plt.bar(range(16), element_mse)
    plt.title('MSE by Output Element')
    plt.xlabel('Output Element Index')
    plt.ylabel('MSE')
    plt.show()

    # Plot a few examples
    for i in range(min(3, len(all_preds))):
        plt.figure(figsize=(10, 3))
        plt.plot(all_targets[i], label='True', marker='o')
        plt.plot(all_preds[i], label='Predicted', marker='x')
        plt.title(f'Example {i+1}')
        plt.grid(True)
        plt.legend()
        plt.show()

# Set seeds for reproducibility
def set_seeds(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

if __name__ == "__main__":
    set_seeds()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Train the ensemble
    models, model_weights, test_loader = train_ensemble(df, device, n_models=7)

    # Evaluate the performance of the ensemble
    evaluate_ensemble(models, model_weights, test_loader, device)
    
    # Save the best model
    torch.save({
        'models': [model.state_dict() for model in models],
        'weights': model_weights
    }, 'improved_ensemble_model.pt')

Using device: cpu

Training model 1/7


Epoch 1/100: 100%|██████████| 370/370 [00:01<00:00, 204.19it/s, train_loss=0.0915]


Epoch 1/100 - Train Loss: 0.1159, Val Loss: 0.0948


Epoch 2/100: 100%|██████████| 370/370 [00:01<00:00, 206.03it/s, train_loss=0.1111]


Epoch 2/100 - Train Loss: 0.0962, Val Loss: 0.0929


Epoch 3/100: 100%|██████████| 370/370 [00:01<00:00, 198.76it/s, train_loss=0.0982]


Epoch 3/100 - Train Loss: 0.0938, Val Loss: 0.0904


Epoch 4/100: 100%|██████████| 370/370 [00:01<00:00, 203.02it/s, train_loss=0.0833]


Epoch 4/100 - Train Loss: 0.0926, Val Loss: 0.0884


Epoch 5/100: 100%|██████████| 370/370 [00:01<00:00, 187.11it/s, train_loss=0.0892]


Epoch 5/100 - Train Loss: 0.0913, Val Loss: 0.0880


Epoch 6/100: 100%|██████████| 370/370 [00:01<00:00, 196.30it/s, train_loss=0.0730]


Epoch 6/100 - Train Loss: 0.0907, Val Loss: 0.0872


Epoch 7/100: 100%|██████████| 370/370 [00:01<00:00, 197.72it/s, train_loss=0.0935]


Epoch 7/100 - Train Loss: 0.0894, Val Loss: 0.0866


Epoch 8/100: 100%|██████████| 370/370 [00:02<00:00, 177.53it/s, train_loss=0.1094]


Epoch 8/100 - Train Loss: 0.0889, Val Loss: 0.0851


Epoch 9/100: 100%|██████████| 370/370 [00:01<00:00, 202.47it/s, train_loss=0.0887]


Epoch 9/100 - Train Loss: 0.0883, Val Loss: 0.0848


Epoch 10/100: 100%|██████████| 370/370 [00:01<00:00, 196.32it/s, train_loss=0.0971]


Epoch 10/100 - Train Loss: 0.0879, Val Loss: 0.0844


Epoch 11/100: 100%|██████████| 370/370 [00:01<00:00, 204.44it/s, train_loss=0.0980]


Epoch 11/100 - Train Loss: 0.0873, Val Loss: 0.0852


Epoch 12/100: 100%|██████████| 370/370 [00:02<00:00, 171.15it/s, train_loss=0.0747]


Epoch 12/100 - Train Loss: 0.0866, Val Loss: 0.0832


Epoch 13/100: 100%|██████████| 370/370 [00:01<00:00, 195.88it/s, train_loss=0.0808]


Epoch 13/100 - Train Loss: 0.0863, Val Loss: 0.0834


Epoch 14/100: 100%|██████████| 370/370 [00:01<00:00, 201.95it/s, train_loss=0.0909]


Epoch 14/100 - Train Loss: 0.0861, Val Loss: 0.0832


Epoch 15/100: 100%|██████████| 370/370 [00:01<00:00, 197.25it/s, train_loss=0.0793]


Epoch 15/100 - Train Loss: 0.0856, Val Loss: 0.0826


Epoch 16/100: 100%|██████████| 370/370 [00:01<00:00, 207.63it/s, train_loss=0.0889]


Epoch 16/100 - Train Loss: 0.0852, Val Loss: 0.0822


Epoch 17/100: 100%|██████████| 370/370 [00:02<00:00, 180.45it/s, train_loss=0.0823]


Epoch 17/100 - Train Loss: 0.0853, Val Loss: 0.0821


Epoch 18/100: 100%|██████████| 370/370 [00:01<00:00, 200.38it/s, train_loss=0.0768]


Epoch 18/100 - Train Loss: 0.0847, Val Loss: 0.0820


Epoch 19/100: 100%|██████████| 370/370 [00:01<00:00, 192.56it/s, train_loss=0.0974]


Epoch 19/100 - Train Loss: 0.0846, Val Loss: 0.0819


Epoch 20/100: 100%|██████████| 370/370 [00:01<00:00, 202.18it/s, train_loss=0.0972]


Epoch 20/100 - Train Loss: 0.0846, Val Loss: 0.0818


Epoch 21/100: 100%|██████████| 370/370 [00:01<00:00, 207.24it/s, train_loss=0.0912]


Epoch 21/100 - Train Loss: 0.0841, Val Loss: 0.0806


Epoch 22/100: 100%|██████████| 370/370 [00:01<00:00, 188.28it/s, train_loss=0.1043]


Epoch 22/100 - Train Loss: 0.0840, Val Loss: 0.0807


Epoch 23/100: 100%|██████████| 370/370 [00:01<00:00, 193.92it/s, train_loss=0.1066]


Epoch 23/100 - Train Loss: 0.0840, Val Loss: 0.0803


Epoch 24/100: 100%|██████████| 370/370 [00:01<00:00, 196.08it/s, train_loss=0.0921]


Epoch 24/100 - Train Loss: 0.0838, Val Loss: 0.0811


Epoch 25/100: 100%|██████████| 370/370 [00:01<00:00, 208.83it/s, train_loss=0.0964]


Epoch 25/100 - Train Loss: 0.0836, Val Loss: 0.0800


Epoch 26/100: 100%|██████████| 370/370 [00:01<00:00, 207.55it/s, train_loss=0.0863]


Epoch 26/100 - Train Loss: 0.0833, Val Loss: 0.0802


Epoch 27/100: 100%|██████████| 370/370 [00:01<00:00, 194.13it/s, train_loss=0.0866]


Epoch 27/100 - Train Loss: 0.0831, Val Loss: 0.0800


Epoch 28/100: 100%|██████████| 370/370 [00:01<00:00, 209.93it/s, train_loss=0.0948]


Epoch 28/100 - Train Loss: 0.0832, Val Loss: 0.0795


Epoch 29/100: 100%|██████████| 370/370 [00:01<00:00, 199.61it/s, train_loss=0.0780]


Epoch 29/100 - Train Loss: 0.0832, Val Loss: 0.0808


Epoch 30/100: 100%|██████████| 370/370 [00:01<00:00, 210.65it/s, train_loss=0.0885]


Epoch 30/100 - Train Loss: 0.0828, Val Loss: 0.0796


Epoch 31/100: 100%|██████████| 370/370 [00:01<00:00, 204.00it/s, train_loss=0.0818]


Epoch 31/100 - Train Loss: 0.0828, Val Loss: 0.0807


Epoch 32/100: 100%|██████████| 370/370 [00:01<00:00, 188.60it/s, train_loss=0.0947]


Epoch 32/100 - Train Loss: 0.0828, Val Loss: 0.0801


Epoch 33/100: 100%|██████████| 370/370 [00:01<00:00, 204.54it/s, train_loss=0.0899]


Epoch 33/100 - Train Loss: 0.0827, Val Loss: 0.0810


Epoch 34/100: 100%|██████████| 370/370 [00:01<00:00, 190.25it/s, train_loss=0.0784]


Epoch 34/100 - Train Loss: 0.0824, Val Loss: 0.0792


Epoch 35/100: 100%|██████████| 370/370 [00:01<00:00, 194.87it/s, train_loss=0.0804]


Epoch 35/100 - Train Loss: 0.0823, Val Loss: 0.0794


Epoch 36/100: 100%|██████████| 370/370 [00:01<00:00, 210.19it/s, train_loss=0.0974]


Epoch 36/100 - Train Loss: 0.0824, Val Loss: 0.0794


Epoch 37/100: 100%|██████████| 370/370 [00:02<00:00, 146.59it/s, train_loss=0.1039]


Epoch 37/100 - Train Loss: 0.0822, Val Loss: 0.0794


Epoch 38/100: 100%|██████████| 370/370 [00:01<00:00, 193.75it/s, train_loss=0.0811]


Epoch 38/100 - Train Loss: 0.0823, Val Loss: 0.0788


Epoch 39/100: 100%|██████████| 370/370 [00:01<00:00, 208.72it/s, train_loss=0.0876]


Epoch 39/100 - Train Loss: 0.0821, Val Loss: 0.0793


Epoch 40/100: 100%|██████████| 370/370 [00:01<00:00, 187.94it/s, train_loss=0.0702]


Epoch 40/100 - Train Loss: 0.0821, Val Loss: 0.0786


Epoch 41/100: 100%|██████████| 370/370 [00:01<00:00, 203.57it/s, train_loss=0.0781]


Epoch 41/100 - Train Loss: 0.0817, Val Loss: 0.0793


Epoch 42/100: 100%|██████████| 370/370 [00:01<00:00, 216.91it/s, train_loss=0.1006]


Epoch 42/100 - Train Loss: 0.0819, Val Loss: 0.0787


Epoch 43/100: 100%|██████████| 370/370 [00:01<00:00, 209.95it/s, train_loss=0.0942]


Epoch 43/100 - Train Loss: 0.0817, Val Loss: 0.0779


Epoch 44/100: 100%|██████████| 370/370 [00:01<00:00, 195.17it/s, train_loss=0.1016]


Epoch 44/100 - Train Loss: 0.0818, Val Loss: 0.0784


Epoch 45/100: 100%|██████████| 370/370 [00:01<00:00, 213.43it/s, train_loss=0.0667]


Epoch 45/100 - Train Loss: 0.0816, Val Loss: 0.0784


Epoch 46/100: 100%|██████████| 370/370 [00:01<00:00, 193.30it/s, train_loss=0.0767]


Epoch 46/100 - Train Loss: 0.0816, Val Loss: 0.0782


Epoch 47/100: 100%|██████████| 370/370 [00:01<00:00, 193.49it/s, train_loss=0.0932]


Epoch 47/100 - Train Loss: 0.0817, Val Loss: 0.0785


Epoch 48/100: 100%|██████████| 370/370 [00:01<00:00, 197.78it/s, train_loss=0.0947]


Epoch 48/100 - Train Loss: 0.0818, Val Loss: 0.0779


Epoch 49/100: 100%|██████████| 370/370 [00:01<00:00, 201.54it/s, train_loss=0.0910]


Epoch 49/100 - Train Loss: 0.0816, Val Loss: 0.0785


Epoch 50/100: 100%|██████████| 370/370 [00:01<00:00, 202.88it/s, train_loss=0.0973]


Epoch 50/100 - Train Loss: 0.0813, Val Loss: 0.0781


Epoch 51/100: 100%|██████████| 370/370 [00:01<00:00, 202.58it/s, train_loss=0.0832]


Epoch 51/100 - Train Loss: 0.0813, Val Loss: 0.0779


Epoch 52/100: 100%|██████████| 370/370 [00:01<00:00, 206.52it/s, train_loss=0.0879]


Epoch 52/100 - Train Loss: 0.0813, Val Loss: 0.0788


Epoch 53/100: 100%|██████████| 370/370 [00:02<00:00, 184.82it/s, train_loss=0.0799]


Epoch 53/100 - Train Loss: 0.0812, Val Loss: 0.0782


Epoch 54/100: 100%|██████████| 370/370 [00:01<00:00, 195.81it/s, train_loss=0.0809]


Epoch 54/100 - Train Loss: 0.0813, Val Loss: 0.0788


Epoch 55/100: 100%|██████████| 370/370 [00:01<00:00, 191.11it/s, train_loss=0.0748]


Epoch 55/100 - Train Loss: 0.0805, Val Loss: 0.0778


Epoch 56/100: 100%|██████████| 370/370 [00:01<00:00, 197.88it/s, train_loss=0.1045]


Epoch 56/100 - Train Loss: 0.0803, Val Loss: 0.0773


Epoch 57/100: 100%|██████████| 370/370 [00:01<00:00, 198.70it/s, train_loss=0.0768]


Epoch 57/100 - Train Loss: 0.0802, Val Loss: 0.0769


Epoch 58/100: 100%|██████████| 370/370 [00:01<00:00, 205.97it/s, train_loss=0.0713]


Epoch 58/100 - Train Loss: 0.0802, Val Loss: 0.0770


Epoch 59/100: 100%|██████████| 370/370 [00:01<00:00, 198.72it/s, train_loss=0.0785]


Epoch 59/100 - Train Loss: 0.0802, Val Loss: 0.0773


Epoch 60/100: 100%|██████████| 370/370 [00:01<00:00, 202.53it/s, train_loss=0.0803]


Epoch 60/100 - Train Loss: 0.0801, Val Loss: 0.0776


Epoch 61/100:  63%|██████▎   | 233/370 [00:01<00:00, 185.34it/s, train_loss=0.0939]


KeyboardInterrupt: 