In [1]:
import os
import glob
import math
from transformers import get_cosine_schedule_with_warmup
import torch
from collections import Counter
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    accuracy_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
)
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import gc

In [2]:
def clear_memory():
    """Clear GPU memory cache."""
    if torch.backends.mps.is_available():
        torch.mps.empty_cache()
    torch.cuda.empty_cache()  # For compatibility if CUDA is used
    gc.collect() 

In [3]:
# --- Configuration ---
config = {
    "data_dir": "data",
    "model_name": "dbmdz/bert-base-turkish-cased",
    "batch_size": 16,
    "num_epochs": 15,
    "max_length": 256,
    "num_folds": 10,
    "early_stopping_patience": 3,
    "output_dir": "dbmdz/bert-base-turkish-cased-finetuned",
    # Learning rate parameters
    "initial_learning_rate": 5e-5,
    "min_learning_rate": 1e-7,
    "warmup_steps": 0.1,  # 10% of training steps
    "warmup_ratio": 0.1,  
    # Weight decay parameters
    "initial_weight_decay": 0.01, 
    "final_weight_decay": 0.1,  
    "weight_decay_schedule": "linear"  # Use linear decay
}

# Create necessary directories
model_name = config["model_name"].replace("/", "_")
os.makedirs(os.path.join("fold_metrics", model_name), exist_ok=True)
os.makedirs(os.path.join("plots", model_name), exist_ok=True)

# --- Device Setup ---
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize best metrics
best_params = None
best_f1 = 0.0

Using device: mps


# --- Load and Preprocess Dataset ---

In [4]:
def load_dataset(data_dir):
    texts, labels = [], []
    authors = sorted([d for d in os.listdir(data_dir) if not d.startswith('.')])
    author_to_label = {author: idx for idx, author in enumerate(authors)}
 
    for author, label in author_to_label.items():
        author_dir = os.path.join(data_dir, author)
        if os.path.isdir(author_dir):
            for file_name in os.listdir(author_dir):
                if not file_name.endswith('.txt'):
                    continue
                file_path = os.path.join(author_dir, file_name)
                try:
                    with open(file_path, 'r', encoding='ISO-8859-1') as file:
                        texts.append(file.read())
                        labels.append(label)
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
    return texts, labels, author_to_label

# Load dataset
texts, labels, author_to_label = load_dataset(config["data_dir"])
print(f"Loaded {len(texts)} samples from {config['data_dir']}.")
print(f"Authors: {author_to_label}")
print(Counter(labels))


Loaded 1500 samples from data.
Authors: {'AHMET ÇAKAR': 0, 'ALİ SİRMEN': 1, 'ATAOL BEHRAMOĞLU': 2, 'ATİLLA DORSAY': 3, 'AYKAN SEVER': 4, 'AZİZ ÜSTEL': 5, 'CAN ATAKLI': 6, 'DENİZ GÖKÇE': 7, 'EMRE KONGAR': 8, 'GÖZDE BEDELOĞLU': 9, 'HASAN PULUR': 10, 'HİKMET ÇETİNKAYA': 11, 'MEHMET ALİ BİRAND': 12, 'MEHMET DEMİRKOL': 13, 'MELTEM GÜRLE': 14, 'MERYEM KORAY': 15, 'MÜMTAZ SOYSAL': 16, 'NAZAN BEKİROĞLU': 17, 'NAZIM ALPMAN': 18, 'NEDİM HAZAR': 19, 'NEŞE YAŞIN': 20, 'OKAY KARACAN': 21, 'ÖZGE BAŞAK TANELİ': 22, 'REHA MUHTAR': 23, 'RIDVAN DİLMEN': 24, 'RUHAT MENGİ': 25, 'SELİM İLERİ': 26, 'TARHAN ERDEM': 27, 'UFUK BOZKIR': 28, 'YAŞAR SEYMAN': 29}
Counter({0: 50, 1: 50, 2: 50, 3: 50, 4: 50, 5: 50, 6: 50, 7: 50, 8: 50, 9: 50, 10: 50, 11: 50, 12: 50, 13: 50, 14: 50, 15: 50, 16: 50, 17: 50, 18: 50, 19: 50, 20: 50, 21: 50, 22: 50, 23: 50, 24: 50, 25: 50, 26: 50, 27: 50, 28: 50, 29: 50})


# --- Tokenization ---

In [5]:
tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
model = AutoModelForSequenceClassification.from_pretrained(
    config["model_name"],
    num_labels=30,  # Number of authors
    problem_type="single_label_classification"
)

class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(
            text, max_length=self.max_length, truncation=True, padding="max_length", return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze().to(device),
            "attention_mask": encoding["attention_mask"].squeeze().to(device),
            "labels": torch.tensor(label, dtype=torch.long).to(device)
        }

  torch.utils._pytree._register_pytree_node(
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def save_fold_metrics(true_labels, predictions, num_classes, fold):
    """Calculate and save metrics for each fold."""
    # Create model-specific directory
    model_name = config["model_name"].replace("/", "_")
    metrics_dir = os.path.join("fold_metrics", model_name)

    
    # Calculate class-wise metrics
    precision = precision_score(true_labels, predictions, average=None)
    recall = recall_score(true_labels, predictions, average=None)
    f1 = f1_score(true_labels, predictions, average=None)
    
    # Calculate averages
    precision_avg = precision.mean()
    recall_avg = recall.mean()
    f1_avg = f1.mean()
    
    # Save metrics to CSV
    metrics_df = pd.DataFrame({
        'Class': [f'Class {i+1}' for i in range(num_classes)] + ['Average'],
        'Precision': list(precision) + [precision_avg],
        'Recall': list(recall) + [recall_avg],
        'F1-Score': list(f1) + [f1_avg]
    })
    
    # Save to CSV
    output_path = os.path.join(metrics_dir, f"performance_metrics_fold_{fold}.csv")
    metrics_df.to_csv(output_path, index=False)
    print(f"Metrics for fold {fold} saved to {output_path}")
    
    return precision_avg, recall_avg, f1_avg

In [7]:
def generate_combined_plots(train_metrics, val_metrics, all_labels, all_preds, num_classes, fold):
    # Create plots directory
    model_name = config["model_name"].replace("/", "_")
    plots_dir = os.path.join("plots", model_name)
    
    # Extract metrics history
    epochs = range(1, len(train_metrics) + 1)
    train_losses = [m['loss'] for m in train_metrics]
    train_precisions = [m['precision'] for m in train_metrics]
    train_recalls = [m['recall'] for m in train_metrics]
    train_f1s = [m['f1'] for m in train_metrics]
    
    val_losses = [m['loss'] for m in val_metrics]
    val_precisions = [m['precision'] for m in val_metrics]
    val_recalls = [m['recall'] for m in val_metrics]
    val_f1s = [m['f1'] for m in val_metrics]
    
    # Create figure with subplots
    fig = plt.figure(figsize=(20, 15))
    gs = fig.add_gridspec(3, 2)
    
    # 1. Training and Validation Loss
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.plot(epochs, train_losses, 'b-', label='Training Loss', marker='o')
    ax1.plot(epochs, val_losses, 'r-', label='Validation Loss', marker='o')
    ax1.set_title('Loss Progress')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True)
    
    # 2. All Metrics Progress
    ax2 = fig.add_subplot(gs[0, 1])
    ax2.plot(epochs, train_precisions, 'b-', label='Train Precision', marker='o')
    ax2.plot(epochs, train_recalls, 'g-', label='Train Recall', marker='o')
    ax2.plot(epochs, train_f1s, 'r-', label='Train F1', marker='o')
    ax2.plot(epochs, val_precisions, 'b--', label='Val Precision', marker='s')
    ax2.plot(epochs, val_recalls, 'g--', label='Val Recall', marker='s')
    ax2.plot(epochs, val_f1s, 'r--', label='Val F1', marker='s')
    ax2.set_title('Metrics Progress')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Score')
    ax2.legend()
    ax2.grid(True)
    
    # 3. Per-Class Performance
    class_precision = precision_score(all_labels, all_preds, labels=np.arange(num_classes), average=None, zero_division=0)
    class_recall = recall_score(all_labels, all_preds, labels=np.arange(num_classes), average=None, zero_division=0)
    class_f1 = f1_score(all_labels, all_preds, labels=np.arange(num_classes), average=None, zero_division=0)

    """
    print(f"Precision: {class_precision}")
    print(f"Recall: {class_recall}")
    print(f"F1-Score: {class_f1}")
    """
    
    x = np.arange(num_classes)
    width = 0.25
    
    ax3 = fig.add_subplot(gs[1, :])
    ax3.bar(x - width, class_precision, width, label='Precision', color='blue', alpha=0.7)
    ax3.bar(x, class_recall, width, label='Recall', color='green', alpha=0.7)
    ax3.bar(x + width, class_f1, width, label='F1-score', color='red', alpha=0.7)
    
    ax3.set_ylabel('Scores')
    ax3.set_title('Per-Class Performance')
    ax3.set_xticks(x)
    ax3.set_xticklabels([f'Class {i+1}' for i in range(num_classes)], rotation=45)
    ax3.legend()
    ax3.grid(True)
    
    # 4. Confusion Matrix
    ax4 = fig.add_subplot(gs[2, :])  # Define ax4 here
    cm = confusion_matrix(all_labels, all_preds, labels=np.arange(num_classes))
    print(f"Confusion Matrix Shape: {cm.shape}")
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[f'Class {i+1}' for i in range(num_classes)])
    disp.plot(ax=ax4, cmap='Blues', xticks_rotation=45)
    ax4.set_title('Confusion Matrix')

    
    # Adjust layout and save
    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, f'combined_metrics_fold_{fold}.png'), dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Plots saved to {os.path.join(plots_dir, f'combined_metrics_fold_{fold}.png')}")


In [8]:
def focal_loss(logits, labels, alpha=1, gamma=2):
    probs = torch.softmax(logits, dim=1)
    probs_gt = probs.gather(1, labels.view(-1, 1)).squeeze()
    ce_loss = -torch.log(probs_gt + 1e-9)
    focal_loss = alpha * (1 - probs_gt) ** gamma * ce_loss
    return focal_loss.mean()

In [9]:
def get_scheduler(optimizer, num_training_steps):
    """Create a learning rate scheduler with warmup and cosine decay."""
    warmup_steps = int(num_training_steps * config["warmup_steps"])
    return get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=num_training_steps,
        num_cycles=0.5
    )

def get_weight_decay(epoch, num_epochs):
    """Calculate weight decay based on training progress."""
    if config["weight_decay_schedule"] == "linear":
        progress = epoch / num_epochs
        return config["initial_weight_decay"] + (config["final_weight_decay"] - config["initial_weight_decay"]) * progress
    else:  # cosine
        progress = epoch / num_epochs
        return config["initial_weight_decay"] + (config["final_weight_decay"] - config["initial_weight_decay"]) * \
               (1 + math.cos(math.pi * progress)) / 2

In [10]:
def save_fold_metrics(true_labels, predictions, num_classes, fold):
    """Calculate and save metrics for each fold."""
    # Create model-specific directory
    model_name = config["model_name"].replace("/", "_")
    metrics_dir = os.path.join("fold_metrics", model_name)

    
    # Calculate class-wise metrics
    precision = precision_score(true_labels, predictions, average=None)
    recall = recall_score(true_labels, predictions, average=None)
    f1 = f1_score(true_labels, predictions, average=None)
    
    # Calculate averages
    precision_avg = precision.mean()
    recall_avg = recall.mean()
    f1_avg = f1.mean()
    
    # Save metrics to CSV
    metrics_df = pd.DataFrame({
        'Class': [f'Class {i+1}' for i in range(num_classes)] + ['Average'],
        'Precision': list(precision) + [precision_avg],
        'Recall': list(recall) + [recall_avg],
        'F1-Score': list(f1) + [f1_avg]
    })
    
    # Save to CSV
    output_path = os.path.join(metrics_dir, f"performance_metrics_fold_{fold}.csv")
    metrics_df.to_csv(output_path, index=False)
    print(f"Metrics for fold {fold} saved to {output_path}")
    
    return precision_avg, recall_avg, f1_avg

# --- Training Loop ---

In [11]:
def train_epoch(model, dataloader, optimizer, scheduler, device, epoch_num, num_epochs):
    model.train()
    total_loss = 0
    all_preds, all_labels = [], []
    progress_bar = tqdm(dataloader, desc="Training", unit="batch")
    
    # Update weight decay for this epoch
    current_weight_decay = get_weight_decay(epoch_num, num_epochs)
    for param_group in optimizer.param_groups:
        param_group['weight_decay'] = current_weight_decay
    
    for batch in progress_bar:
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits

        loss = focal_loss(logits, labels, alpha=1, gamma=2)
        loss.backward()
        
        # Gradient clipping for stability
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        optimizer.step()
        scheduler.step()  # Update learning rate
        
        total_loss += loss.item()
        
        # Collect predictions for metrics
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
        # Update progress bar with current learning rate and weight decay
        current_lr = scheduler.get_last_lr()[0]
        progress_bar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'lr': f'{current_lr:.2e}',
            'wd': f'{current_weight_decay:.2e}'
        })
    
    # Calculate training metrics
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return {
        'loss': total_loss / len(dataloader),
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'learning_rate': scheduler.get_last_lr()[0],
        'weight_decay': current_weight_decay
    }

def evaluate_epoch(model, dataloader, device):
    model.eval()
    total_loss = 0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            logits = outputs.logits
            loss = focal_loss(logits, labels, alpha=1, gamma=2)
            total_loss += loss.item()
            
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics including accuracy
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return {
        'loss': total_loss / len(dataloader),
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'predictions': all_preds,
        'true_labels': all_labels
    }

def train_and_evaluate(model, train_data, train_labels, val_data, val_labels, fold):
    # Create datasets and dataloaders
    model = model.to(device)
    clear_memory()
    train_dataset = TextDataset(train_data, train_labels, tokenizer, config["max_length"])
    val_dataset = TextDataset(val_data, val_labels, tokenizer, config["max_length"])
    
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'])
    
    # Calculate total steps and warmup steps
    num_training_steps = len(train_loader) * config['num_epochs']
    num_warmup_steps = int(num_training_steps * config['warmup_ratio'])
    
    # Initialize optimizer with initial learning rate and weight decay
    optimizer = optim.AdamW(
        model.parameters(),
        lr=config['initial_learning_rate'],
        weight_decay=config['initial_weight_decay'],
        betas=(0.9, 0.999),
        eps=1e-8
    )
    
    # Initialize learning rate scheduler with warmup and cosine decay
    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    best_val_f1 = 0
    early_stopping_counter = 0
    train_metrics_history = []
    val_metrics_history = []
    
    print(f"\n{'='*50}")
    print(f"Training Fold {fold}")
    print(f"{'='*50}")
    print(f"Total steps: {num_training_steps}, Warmup steps: {num_warmup_steps}")
    
    for epoch in range(config['num_epochs']):
        print(f"\nEpoch {epoch + 1}/{config['num_epochs']}")
        print("-" * 30)
        
        # Training
        train_metrics = train_epoch(
            model, train_loader, optimizer, scheduler, device, 
            epoch, config['num_epochs']
        )
        train_metrics_history.append(train_metrics)
        clear_memory()
        
        # Validation
        val_metrics = evaluate_epoch(model, val_loader, device)
        val_metrics_history.append(val_metrics)
        clear_memory()
        
        # Print metrics
        print(f"\nTraining Metrics:")
        print(f"Loss: {train_metrics['loss']:.4f}")
        print(f"Precision: {train_metrics['precision']:.4f}")
        print(f"Recall: {train_metrics['recall']:.4f}")
        print(f"F1-Score: {train_metrics['f1']:.4f}")
        print(f"Learning Rate: {train_metrics['learning_rate']:.2e}")
        print(f"Weight Decay: {train_metrics['weight_decay']:.2e}")
        
        print(f"\nValidation Metrics:")
        print(f"Loss: {val_metrics['loss']:.4f}")
        print(f"Accuracy: {val_metrics['accuracy']:.4f}")
        print(f"Precision: {val_metrics['precision']:.4f}")
        print(f"Recall: {val_metrics['recall']:.4f}")
        print(f"F1-Score: {val_metrics['f1']:.4f}")
        
        # Early stopping check
        if val_metrics['f1'] > best_val_f1:
            best_val_f1 = val_metrics['f1']
            early_stopping_counter = 0
            best_metrics = val_metrics
            
            # Save the best model for this fold
            os.makedirs(config['output_dir'], exist_ok=True)
            model_save_path = os.path.join(config['output_dir'], f'best_model_fold_{fold}.pt')
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_f1': best_val_f1,
                'config': config
            }, model_save_path)
            print(f"\nSaved best model for fold {fold} with F1: {best_val_f1:.4f}")
        else:
            early_stopping_counter += 1
            
        if early_stopping_counter >= config['early_stopping_patience']:
            print("\nEarly stopping triggered")
            break
    
    # Generate visualizations
    generate_combined_plots(
        train_metrics_history,
        val_metrics_history,
        best_metrics['true_labels'],
        best_metrics['predictions'],
        30,  # number of classes
        fold
    )
    
    # Save fold metrics
    precision_avg, recall_avg, f1_avg = save_fold_metrics(
        best_metrics['true_labels'],
        best_metrics['predictions'],
        30,
        fold
    )
    
    return best_metrics

def cross_validate(texts, labels):
    global best_f1
    
    skf = StratifiedKFold(n_splits=config['num_folds'], shuffle=True, random_state=42)
    fold_metrics = []
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(texts, labels)):
        print(f"\nFold {fold + 1}/{config['num_folds']}")
        print(f"Train size: {len(train_idx)}, Validation size: {len(val_idx)}")
        clear_memory()
        
        # Reset model for each fold
        model = AutoModelForSequenceClassification.from_pretrained(
            config["model_name"],
            num_labels=30,
            problem_type="single_label_classification"
        ).to(device)
        
        # Split data
        X_train = [texts[i] for i in train_idx]
        y_train = [labels[i] for i in train_idx]
        X_val = [texts[i] for i in val_idx]
        y_val = [labels[i] for i in val_idx]
        
        # Train and evaluate
        metrics = train_and_evaluate(
            model, X_train, y_train, X_val, y_val, fold + 1
        )
        
        fold_metrics.append({
            'fold': fold + 1,
            'loss': metrics['loss'],
            'accuracy': metrics['accuracy'],
            'precision': metrics['precision'],
            'recall': metrics['recall'],
            'f1': metrics['f1']
        })
        
        # Print fold results
        print(f"\nFold {fold + 1} Results:")
        print(f"Loss: {metrics['loss']:.4f}")
        print(f"Accuracy: {metrics['accuracy']:.4f}")
        print(f"Precision: {metrics['precision']:.4f}")
        print(f"Recall: {metrics['recall']:.4f}")
        print(f"F1-Score: {metrics['f1']:.4f}")
        
        del model
        clear_memory()
    
    # Calculate average metrics across folds
    avg_metrics = {
        'loss': np.mean([m['loss'] for m in fold_metrics]),
        'accuracy': np.mean([m['accuracy'] for m in fold_metrics]),
        'precision': np.mean([m['precision'] for m in fold_metrics]),
        'recall': np.mean([m['recall'] for m in fold_metrics]),
        'f1': np.mean([m['f1'] for m in fold_metrics])
    }
    
    print('\nAverage metrics across folds:')
    print(f"Loss: {avg_metrics['loss']:.4f}")
    print(f"Accuracy: {avg_metrics['accuracy']:.4f}")
    print(f"Precision: {avg_metrics['precision']:.4f}")
    print(f"Recall: {avg_metrics['recall']:.4f}")
    print(f"F1-Score: {avg_metrics['f1']:.4f}")
    
    # Save overall results
    results_df = pd.DataFrame(fold_metrics)
    results_df.to_csv('fold_results.csv', index=False)
    print("\nSaved detailed fold results to 'fold_results.csv'")
    
    best_f1 = avg_metrics['f1']
    
    return avg_metrics

# --- Generate Overall Results ---

In [12]:
def generate_overall_results(fold_metrics_dir, output_filename="overall_performance_metrics.csv"):
    # Gather all fold-level CSV files
    fold_files = glob.glob(os.path.join(fold_metrics_dir, "performance_metrics_fold_*.csv"))
    if not fold_files:
        raise FileNotFoundError("No fold-level performance metrics files found in the directory.")

    # Initialize DataFrame for aggregation
    all_folds_metrics = []

    # Process each fold file
    for file in fold_files:
        fold_df = pd.read_csv(file)
        all_folds_metrics.append(fold_df)

    # Combine all fold data
    combined_df = pd.concat(all_folds_metrics)

    # Exclude the "Average" row for class-level aggregation
    class_only_df = combined_df[~combined_df["Class"].str.contains("Average")]

    # Aggregate metrics by class
    aggregated_metrics = class_only_df.groupby("Class").mean().reset_index()

    # Sort the metrics by Class
    aggregated_metrics["Class"] = aggregated_metrics["Class"].str.extract(r'(\d+)').astype(int)
    aggregated_metrics = aggregated_metrics.sort_values(by="Class").reset_index(drop=True)

    # Compute overall averages
    overall_precision = aggregated_metrics["Precision"].mean()
    overall_recall = aggregated_metrics["Recall"].mean()
    overall_f1 = aggregated_metrics["F1-Score"].mean()

    # Add "Average" row to the results using pd.concat
    average_row = pd.DataFrame(
        {
            "Class": ["Average"],
            "Precision": [overall_precision],
            "Recall": [overall_recall],
            "F1-Score": [overall_f1],
        }
    )
    aggregated_metrics = pd.concat([aggregated_metrics, average_row], ignore_index=True)

    # Save to a new CSV file
    aggregated_metrics.to_csv(output_filename, index=False)
    print(f"Overall performance metrics saved to '{output_filename}'.")

    return aggregated_metrics


In [13]:

def display_as_dataframe(aggregated_metrics):
    """
    Display the overall performance metrics as a clean DataFrame.

    Args:
        aggregated_metrics (pd.DataFrame): DataFrame containing overall performance metrics.
    """
    # Rename columns to match the teacher's format
    aggregated_metrics = aggregated_metrics.rename(columns={"Class": " ", "Precision": "Precision", "Recall": "Recall", "F1-Score": "F-Score"})
    
    # Display the DataFrame as is
    display(aggregated_metrics.style.set_table_styles(
        [{'selector': 'th', 'props': [('text-align', 'center'), ('font-weight', 'bold')]},
         {'selector': 'td', 'props': [('text-align', 'center')]}]
    ).set_caption("Overall Performance Metrics"))


# --- Execute Training ---

In [14]:
# --- Execute Cross-Validation with Hyperparameter Search ---
best_metrics = cross_validate(texts, labels)


Fold 1/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 1
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:31<00:00,  1.08s/batch, loss=2.6901, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1149
Precision: 0.1046
Recall: 0.0578
F1-Score: 0.0459
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.6139
Accuracy: 0.1867
Precision: 0.1116
Recall: 0.1867
F1-Score: 0.1037

Saved best model for fold 1 with F1: 0.1037

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=0.9838, lr=4.98e-05, wd=1.60e-02]



Training Metrics:
Loss: 1.7646
Precision: 0.4872
Recall: 0.4281
F1-Score: 0.4225
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 0.9013
Accuracy: 0.7000
Precision: 0.7790
Recall: 0.7000
F1-Score: 0.7054

Saved best model for fold 1 with F1: 0.7054

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=0.3028, lr=4.85e-05, wd=2.20e-02]



Training Metrics:
Loss: 0.5490
Precision: 0.8133
Recall: 0.8119
F1-Score: 0.8089
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.4439
Accuracy: 0.8267
Precision: 0.8577
Recall: 0.8267
F1-Score: 0.8163

Saved best model for fold 1 with F1: 0.8163

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=0.0189, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1878
Precision: 0.9076
Recall: 0.9059
F1-Score: 0.9051
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.3096
Accuracy: 0.8733
Precision: 0.8919
Recall: 0.8733
F1-Score: 0.8645

Saved best model for fold 1 with F1: 0.8645

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=0.0137, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0510
Precision: 0.9747
Recall: 0.9741
F1-Score: 0.9741
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.3179
Accuracy: 0.8400
Precision: 0.8559
Recall: 0.8400
F1-Score: 0.8361

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.08s/batch, loss=0.0276, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0202
Precision: 0.9912
Recall: 0.9911
F1-Score: 0.9911
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.2743
Accuracy: 0.9000
Precision: 0.9121
Recall: 0.9000
F1-Score: 0.8976

Saved best model for fold 1 with F1: 0.8976

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=0.0013, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0074
Precision: 0.9971
Recall: 0.9970
F1-Score: 0.9970
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.2648
Accuracy: 0.8867
Precision: 0.9119
Recall: 0.8867
F1-Score: 0.8848

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=0.0003, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0023
Precision: 0.9986
Recall: 0.9985
F1-Score: 0.9985
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.2528
Accuracy: 0.9133
Precision: 0.9275
Recall: 0.9133
F1-Score: 0.9116

Saved best model for fold 1 with F1: 0.9116

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:33<00:00,  1.10s/batch, loss=0.0003, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0008
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.2656
Accuracy: 0.9133
Precision: 0.9259
Recall: 0.9133
F1-Score: 0.9103

Epoch 10/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0002, lr=1.51e-05, wd=6.40e-02]



Training Metrics:
Loss: 0.0005
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.51e-05
Weight Decay: 6.40e-02

Validation Metrics:
Loss: 0.2629
Accuracy: 0.9000
Precision: 0.9120
Recall: 0.9000
F1-Score: 0.8974

Epoch 11/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0003, lr=1.01e-05, wd=7.00e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.01e-05
Weight Decay: 7.00e-02

Validation Metrics:
Loss: 0.2591
Accuracy: 0.9133
Precision: 0.9259
Recall: 0.9133
F1-Score: 0.9103

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_1.png
Metrics for fold 1 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_1.csv

Fold 1 Results:
Loss: 0.2528
Accuracy: 0.9133
Precision: 0.9275
Recall: 0.9133
F1-Score: 0.9116

Fold 2/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 2
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=2.7274, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1591
Precision: 0.0575
Recall: 0.0489
F1-Score: 0.0380
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.5925
Accuracy: 0.1933
Precision: 0.0992
Recall: 0.1933
F1-Score: 0.1029

Saved best model for fold 2 with F1: 0.1029

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=1.1857, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 1.7969
Precision: 0.4291
Recall: 0.4015
F1-Score: 0.3939
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 1.1528
Accuracy: 0.5800
Precision: 0.6200
Recall: 0.5800
F1-Score: 0.5459

Saved best model for fold 2 with F1: 0.5459

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.9068, lr=4.85e-05, wd=2.20e-02]



Training Metrics:
Loss: 0.5989
Precision: 0.7729
Recall: 0.7719
F1-Score: 0.7685
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.5385
Accuracy: 0.7267
Precision: 0.8029
Recall: 0.7267
F1-Score: 0.7233

Saved best model for fold 2 with F1: 0.7233

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.1277, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1881
Precision: 0.9166
Recall: 0.9170
F1-Score: 0.9162
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.3505
Accuracy: 0.8267
Precision: 0.8699
Recall: 0.8267
F1-Score: 0.8222

Saved best model for fold 2 with F1: 0.8222

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.0027, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0603
Precision: 0.9686
Recall: 0.9681
F1-Score: 0.9679
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.2304
Accuracy: 0.8667
Precision: 0.8915
Recall: 0.8667
F1-Score: 0.8636

Saved best model for fold 2 with F1: 0.8636

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0044, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0184
Precision: 0.9905
Recall: 0.9904
F1-Score: 0.9903
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.2515
Accuracy: 0.8533
Precision: 0.8768
Recall: 0.8533
F1-Score: 0.8518

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:38<00:00,  1.16s/batch, loss=0.0017, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0079
Precision: 0.9935
Recall: 0.9933
F1-Score: 0.9933
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.2028
Accuracy: 0.9000
Precision: 0.9148
Recall: 0.9000
F1-Score: 0.8976

Saved best model for fold 2 with F1: 0.8976

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0003, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0013
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.1703
Accuracy: 0.9200
Precision: 0.9326
Recall: 0.9200
F1-Score: 0.9177

Saved best model for fold 2 with F1: 0.9177

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0004, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0007
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.1687
Accuracy: 0.9133
Precision: 0.9269
Recall: 0.9133
F1-Score: 0.9110

Epoch 10/15
------------------------------


Training: 100%|██████████| 85/85 [04:23<00:00,  3.10s/batch, loss=0.0003, lr=1.51e-05, wd=6.40e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.51e-05
Weight Decay: 6.40e-02

Validation Metrics:
Loss: 0.1637
Accuracy: 0.9267
Precision: 0.9393
Recall: 0.9267
F1-Score: 0.9253

Saved best model for fold 2 with F1: 0.9253

Epoch 11/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.09s/batch, loss=0.0004, lr=1.01e-05, wd=7.00e-02]



Training Metrics:
Loss: 0.0005
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.01e-05
Weight Decay: 7.00e-02

Validation Metrics:
Loss: 0.1620
Accuracy: 0.9267
Precision: 0.9382
Recall: 0.9267
F1-Score: 0.9254

Saved best model for fold 2 with F1: 0.9254

Epoch 12/15
------------------------------


Training: 100%|██████████| 85/85 [01:31<00:00,  1.07s/batch, loss=0.0003, lr=5.84e-06, wd=7.60e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 5.84e-06
Weight Decay: 7.60e-02

Validation Metrics:
Loss: 0.1607
Accuracy: 0.9267
Precision: 0.9382
Recall: 0.9267
F1-Score: 0.9254

Epoch 13/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=0.0003, lr=2.66e-06, wd=8.20e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.66e-06
Weight Decay: 8.20e-02

Validation Metrics:
Loss: 0.1615
Accuracy: 0.9267
Precision: 0.9382
Recall: 0.9267
F1-Score: 0.9254

Epoch 14/15
------------------------------


Training: 100%|██████████| 85/85 [01:31<00:00,  1.07s/batch, loss=0.0003, lr=6.73e-07, wd=8.80e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 6.73e-07
Weight Decay: 8.80e-02

Validation Metrics:
Loss: 0.1621
Accuracy: 0.9267
Precision: 0.9382
Recall: 0.9267
F1-Score: 0.9254

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_2.png
Metrics for fold 2 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_2.csv

Fold 2 Results:
Loss: 0.1620
Accuracy: 0.9267
Precision: 0.9382
Recall: 0.9267
F1-Score: 0.9254

Fold 3/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 3
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.09s/batch, loss=3.4065, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1970
Precision: 0.0599
Recall: 0.0430
F1-Score: 0.0358
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.8732
Accuracy: 0.1667
Precision: 0.1111
Recall: 0.1667
F1-Score: 0.1016

Saved best model for fold 3 with F1: 0.1016

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:31<00:00,  1.07s/batch, loss=0.9373, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 1.8113
Precision: 0.4381
Recall: 0.4356
F1-Score: 0.4094
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 0.9874
Accuracy: 0.6600
Precision: 0.6809
Recall: 0.6600
F1-Score: 0.6349

Saved best model for fold 3 with F1: 0.6349

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:31<00:00,  1.07s/batch, loss=0.4803, lr=4.85e-05, wd=2.20e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 0.5673
Precision: 0.7654
Recall: 0.7711
F1-Score: 0.7657
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.6606
Accuracy: 0.6933
Precision: 0.7589
Recall: 0.6933
F1-Score: 0.6881

Saved best model for fold 3 with F1: 0.6881

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:38<00:00,  1.15s/batch, loss=0.0690, lr=4.59e-05, wd=2.80e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 0.2004
Precision: 0.9103
Recall: 0.9096
F1-Score: 0.9094
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.4735
Accuracy: 0.8067
Precision: 0.8239
Recall: 0.8067
F1-Score: 0.7883

Saved best model for fold 3 with F1: 0.7883

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0737, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0846
Precision: 0.9503
Recall: 0.9504
F1-Score: 0.9501
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.4283
Accuracy: 0.8200
Precision: 0.8397
Recall: 0.8200
F1-Score: 0.8096

Saved best model for fold 3 with F1: 0.8096

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0331, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0187
Precision: 0.9906
Recall: 0.9904
F1-Score: 0.9904
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.4351
Accuracy: 0.8267
Precision: 0.8402
Recall: 0.8267
F1-Score: 0.8138

Saved best model for fold 3 with F1: 0.8138

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0004, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0056
Precision: 0.9986
Recall: 0.9985
F1-Score: 0.9985
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.3468
Accuracy: 0.8467
Precision: 0.8760
Recall: 0.8467
F1-Score: 0.8365

Saved best model for fold 3 with F1: 0.8365

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0003, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0014
Precision: 0.9993
Recall: 0.9993
F1-Score: 0.9993
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.3399
Accuracy: 0.8667
Precision: 0.8768
Recall: 0.8667
F1-Score: 0.8595

Saved best model for fold 3 with F1: 0.8595

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.0007, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0006
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.3527
Accuracy: 0.8733
Precision: 0.8879
Recall: 0.8733
F1-Score: 0.8706

Saved best model for fold 3 with F1: 0.8706

Epoch 10/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0002, lr=1.51e-05, wd=6.40e-02]



Training Metrics:
Loss: 0.0005
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.51e-05
Weight Decay: 6.40e-02

Validation Metrics:
Loss: 0.3472
Accuracy: 0.8667
Precision: 0.8790
Recall: 0.8667
F1-Score: 0.8633

Epoch 11/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0003, lr=1.01e-05, wd=7.00e-02]



Training Metrics:
Loss: 0.0011
Precision: 0.9993
Recall: 0.9993
F1-Score: 0.9993
Learning Rate: 1.01e-05
Weight Decay: 7.00e-02

Validation Metrics:
Loss: 0.3501
Accuracy: 0.8733
Precision: 0.8912
Recall: 0.8733
F1-Score: 0.8702

Epoch 12/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0002, lr=5.84e-06, wd=7.60e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 5.84e-06
Weight Decay: 7.60e-02

Validation Metrics:
Loss: 0.3509
Accuracy: 0.8667
Precision: 0.8813
Recall: 0.8667
F1-Score: 0.8638

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_3.png
Metrics for fold 3 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_3.csv

Fold 3 Results:
Loss: 0.3527
Accuracy: 0.8733
Precision: 0.8879
Recall: 0.8733
F1-Score: 0.8706

Fold 4/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 4
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=2.6691, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1434
Precision: 0.0958
Recall: 0.0607
F1-Score: 0.0497
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.7482
Accuracy: 0.1800
Precision: 0.1028
Recall: 0.1800
F1-Score: 0.1078

Saved best model for fold 4 with F1: 0.1078

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.7483, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 1.6820
Precision: 0.4682
Recall: 0.4489
F1-Score: 0.4316
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 0.8310
Accuracy: 0.7067
Precision: 0.7405
Recall: 0.7067
F1-Score: 0.6880

Saved best model for fold 4 with F1: 0.6880

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.5670, lr=4.85e-05, wd=2.20e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 0.5008
Precision: 0.8068
Recall: 0.8052
F1-Score: 0.8010
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.4268
Accuracy: 0.8133
Precision: 0.8415
Recall: 0.8133
F1-Score: 0.8055

Saved best model for fold 4 with F1: 0.8055

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:38<00:00,  1.15s/batch, loss=0.1412, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1641
Precision: 0.9315
Recall: 0.9296
F1-Score: 0.9298
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.3877
Accuracy: 0.7800
Precision: 0.8413
Recall: 0.7800
F1-Score: 0.7739

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:39<00:00,  1.17s/batch, loss=0.0052, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0583
Precision: 0.9688
Recall: 0.9689
F1-Score: 0.9686
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.3343
Accuracy: 0.8467
Precision: 0.8900
Recall: 0.8467
F1-Score: 0.8436

Saved best model for fold 4 with F1: 0.8436

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0017, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0287
Precision: 0.9868
Recall: 0.9867
F1-Score: 0.9866
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.2606
Accuracy: 0.8867
Precision: 0.9088
Recall: 0.8867
F1-Score: 0.8842

Saved best model for fold 4 with F1: 0.8842

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0011, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0093
Precision: 0.9942
Recall: 0.9941
F1-Score: 0.9941
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.2536
Accuracy: 0.8933
Precision: 0.9102
Recall: 0.8933
F1-Score: 0.8917

Saved best model for fold 4 with F1: 0.8917

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.0006, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0015
Precision: 0.9993
Recall: 0.9993
F1-Score: 0.9993
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.2168
Accuracy: 0.9200
Precision: 0.9353
Recall: 0.9200
F1-Score: 0.9179

Saved best model for fold 4 with F1: 0.9179

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0004, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0006
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.2810
Accuracy: 0.8800
Precision: 0.9039
Recall: 0.8800
F1-Score: 0.8796

Epoch 10/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0003, lr=1.51e-05, wd=6.40e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.51e-05
Weight Decay: 6.40e-02

Validation Metrics:
Loss: 0.2264
Accuracy: 0.9000
Precision: 0.9185
Recall: 0.9000
F1-Score: 0.9014

Epoch 11/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.0002, lr=1.01e-05, wd=7.00e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.01e-05
Weight Decay: 7.00e-02

Validation Metrics:
Loss: 0.2187
Accuracy: 0.9067
Precision: 0.9219
Recall: 0.9067
F1-Score: 0.9078

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_4.png
Metrics for fold 4 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_4.csv

Fold 4 Results:
Loss: 0.2168
Accuracy: 0.9200
Precision: 0.9353
Recall: 0.9200
F1-Score: 0.9179

Fold 5/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 5
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=2.7632, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1719
Precision: 0.0982
Recall: 0.0593
F1-Score: 0.0551
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.8898
Accuracy: 0.2000
Precision: 0.1044
Recall: 0.2000
F1-Score: 0.1101

Saved best model for fold 5 with F1: 0.1101

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=1.1265, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 1.9065
Precision: 0.3833
Recall: 0.3719
F1-Score: 0.3631
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 1.0083
Accuracy: 0.6333
Precision: 0.6699
Recall: 0.6333
F1-Score: 0.6101

Saved best model for fold 5 with F1: 0.6101

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.3564, lr=4.85e-05, wd=2.20e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 0.6238
Precision: 0.7641
Recall: 0.7630
F1-Score: 0.7583
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.5506
Accuracy: 0.7333
Precision: 0.7454
Recall: 0.7333
F1-Score: 0.7170

Saved best model for fold 5 with F1: 0.7170

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0685, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1949
Precision: 0.9114
Recall: 0.9104
F1-Score: 0.9102
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.3468
Accuracy: 0.7867
Precision: 0.8080
Recall: 0.7867
F1-Score: 0.7838

Saved best model for fold 5 with F1: 0.7838

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.0064, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0640
Precision: 0.9707
Recall: 0.9704
F1-Score: 0.9703
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.3595
Accuracy: 0.8133
Precision: 0.8556
Recall: 0.8133
F1-Score: 0.8069

Saved best model for fold 5 with F1: 0.8069

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0366, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0208
Precision: 0.9890
Recall: 0.9889
F1-Score: 0.9889
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.3302
Accuracy: 0.8267
Precision: 0.8616
Recall: 0.8267
F1-Score: 0.8178

Saved best model for fold 5 with F1: 0.8178

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0012, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0030
Precision: 0.9993
Recall: 0.9993
F1-Score: 0.9993
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.2456
Accuracy: 0.8867
Precision: 0.9012
Recall: 0.8867
F1-Score: 0.8824

Saved best model for fold 5 with F1: 0.8824

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0005, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0009
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.2319
Accuracy: 0.9000
Precision: 0.9038
Recall: 0.9000
F1-Score: 0.8956

Saved best model for fold 5 with F1: 0.8956

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0005, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0018
Precision: 0.9993
Recall: 0.9993
F1-Score: 0.9993
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.2554
Accuracy: 0.8867
Precision: 0.8910
Recall: 0.8867
F1-Score: 0.8821

Epoch 10/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0002, lr=1.51e-05, wd=6.40e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.51e-05
Weight Decay: 6.40e-02

Validation Metrics:
Loss: 0.2189
Accuracy: 0.9000
Precision: 0.9033
Recall: 0.9000
F1-Score: 0.8967

Saved best model for fold 5 with F1: 0.8967

Epoch 11/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0003, lr=1.01e-05, wd=7.00e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.01e-05
Weight Decay: 7.00e-02

Validation Metrics:
Loss: 0.2145
Accuracy: 0.9067
Precision: 0.9088
Recall: 0.9067
F1-Score: 0.9034

Saved best model for fold 5 with F1: 0.9034

Epoch 12/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.0002, lr=5.84e-06, wd=7.60e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 5.84e-06
Weight Decay: 7.60e-02

Validation Metrics:
Loss: 0.2166
Accuracy: 0.9200
Precision: 0.9217
Recall: 0.9200
F1-Score: 0.9169

Saved best model for fold 5 with F1: 0.9169

Epoch 13/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0002, lr=2.66e-06, wd=8.20e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.66e-06
Weight Decay: 8.20e-02

Validation Metrics:
Loss: 0.2162
Accuracy: 0.9200
Precision: 0.9217
Recall: 0.9200
F1-Score: 0.9169

Epoch 14/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0001, lr=6.73e-07, wd=8.80e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 6.73e-07
Weight Decay: 8.80e-02

Validation Metrics:
Loss: 0.2161
Accuracy: 0.9200
Precision: 0.9217
Recall: 0.9200
F1-Score: 0.9169

Epoch 15/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.0010, lr=0.00e+00, wd=9.40e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 0.00e+00
Weight Decay: 9.40e-02

Validation Metrics:
Loss: 0.2161
Accuracy: 0.9200
Precision: 0.9217
Recall: 0.9200
F1-Score: 0.9169

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_5.png
Metrics for fold 5 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_5.csv

Fold 5 Results:
Loss: 0.2166
Accuracy: 0.9200
Precision: 0.9217
Recall: 0.9200
F1-Score: 0.9169

Fold 6/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 6
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=3.0005, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1945
Precision: 0.0528
Recall: 0.0496
F1-Score: 0.0351
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.9687
Accuracy: 0.1067
Precision: 0.0485
Recall: 0.1067
F1-Score: 0.0605

Saved best model for fold 6 with F1: 0.0605

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.9287, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 1.8368
Precision: 0.4256
Recall: 0.4089
F1-Score: 0.3900
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 1.0056
Accuracy: 0.6400
Precision: 0.6585
Recall: 0.6400
F1-Score: 0.5984

Saved best model for fold 6 with F1: 0.5984

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.2543, lr=4.85e-05, wd=2.20e-02]



Training Metrics:
Loss: 0.5856
Precision: 0.7469
Recall: 0.7496
F1-Score: 0.7443
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.5260
Accuracy: 0.7933
Precision: 0.8320
Recall: 0.7933
F1-Score: 0.7744

Saved best model for fold 6 with F1: 0.7744

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0324, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1939
Precision: 0.9063
Recall: 0.9059
F1-Score: 0.9054
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.3481
Accuracy: 0.8733
Precision: 0.9053
Recall: 0.8733
F1-Score: 0.8674

Saved best model for fold 6 with F1: 0.8674

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0286, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0503
Precision: 0.9713
Recall: 0.9711
F1-Score: 0.9711
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.3474
Accuracy: 0.8733
Precision: 0.8917
Recall: 0.8733
F1-Score: 0.8616

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0024, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0113
Precision: 0.9941
Recall: 0.9941
F1-Score: 0.9941
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.4208
Accuracy: 0.8200
Precision: 0.8592
Recall: 0.8200
F1-Score: 0.8143

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:38<00:00,  1.15s/batch, loss=0.0020, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0123
Precision: 0.9964
Recall: 0.9963
F1-Score: 0.9963
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.3129
Accuracy: 0.8733
Precision: 0.8847
Recall: 0.8733
F1-Score: 0.8687

Saved best model for fold 6 with F1: 0.8687

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0004, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0016
Precision: 0.9993
Recall: 0.9993
F1-Score: 0.9993
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.2536
Accuracy: 0.8933
Precision: 0.9030
Recall: 0.8933
F1-Score: 0.8873

Saved best model for fold 6 with F1: 0.8873

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0004, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0007
Precision: 0.9993
Recall: 0.9993
F1-Score: 0.9993
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.2562
Accuracy: 0.9067
Precision: 0.9167
Recall: 0.9067
F1-Score: 0.9033

Saved best model for fold 6 with F1: 0.9033

Epoch 10/15
------------------------------


Training: 100%|██████████| 85/85 [01:38<00:00,  1.15s/batch, loss=0.0004, lr=1.51e-05, wd=6.40e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.51e-05
Weight Decay: 6.40e-02

Validation Metrics:
Loss: 0.2552
Accuracy: 0.9200
Precision: 0.9305
Recall: 0.9200
F1-Score: 0.9172

Saved best model for fold 6 with F1: 0.9172

Epoch 11/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0001, lr=1.01e-05, wd=7.00e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 1.01e-05
Weight Decay: 7.00e-02

Validation Metrics:
Loss: 0.2534
Accuracy: 0.9133
Precision: 0.9263
Recall: 0.9133
F1-Score: 0.9105

Epoch 12/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0006, lr=5.84e-06, wd=7.60e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 5.84e-06
Weight Decay: 7.60e-02

Validation Metrics:
Loss: 0.2524
Accuracy: 0.9200
Precision: 0.9310
Recall: 0.9200
F1-Score: 0.9171

Epoch 13/15
------------------------------


Training: 100%|██████████| 85/85 [01:38<00:00,  1.15s/batch, loss=0.0002, lr=2.66e-06, wd=8.20e-02]



Training Metrics:
Loss: 0.0003
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.66e-06
Weight Decay: 8.20e-02

Validation Metrics:
Loss: 0.2515
Accuracy: 0.9133
Precision: 0.9233
Recall: 0.9133
F1-Score: 0.9097

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_6.png
Metrics for fold 6 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_6.csv

Fold 6 Results:
Loss: 0.2552
Accuracy: 0.9200
Precision: 0.9305
Recall: 0.9200
F1-Score: 0.9172

Fold 7/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 7
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=2.4372, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1608
Precision: 0.1041
Recall: 0.0607
F1-Score: 0.0564
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.6567
Accuracy: 0.2800
Precision: 0.2146
Recall: 0.2800
F1-Score: 0.2089

Saved best model for fold 7 with F1: 0.2089

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.8865, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 1.6049
Precision: 0.5100
Recall: 0.4993
F1-Score: 0.4855
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 0.8863
Accuracy: 0.6533
Precision: 0.7437
Recall: 0.6533
F1-Score: 0.6443

Saved best model for fold 7 with F1: 0.6443

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.3704, lr=4.85e-05, wd=2.20e-02]



Training Metrics:
Loss: 0.4322
Precision: 0.8179
Recall: 0.8193
F1-Score: 0.8166
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.4142
Accuracy: 0.8467
Precision: 0.8905
Recall: 0.8467
F1-Score: 0.8469

Saved best model for fold 7 with F1: 0.8469

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0255, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1082
Precision: 0.9471
Recall: 0.9467
F1-Score: 0.9465
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.5170
Accuracy: 0.7933
Precision: 0.8638
Recall: 0.7933
F1-Score: 0.8015

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0075, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0395
Precision: 0.9773
Recall: 0.9763
F1-Score: 0.9764
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.3411
Accuracy: 0.8667
Precision: 0.8908
Recall: 0.8667
F1-Score: 0.8607

Saved best model for fold 7 with F1: 0.8607

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.2581, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0169
Precision: 0.9920
Recall: 0.9919
F1-Score: 0.9918
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.3507
Accuracy: 0.8667
Precision: 0.9015
Recall: 0.8667
F1-Score: 0.8736

Saved best model for fold 7 with F1: 0.8736

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0003, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0033
Precision: 0.9978
Recall: 0.9978
F1-Score: 0.9978
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.3346
Accuracy: 0.8600
Precision: 0.9047
Recall: 0.8600
F1-Score: 0.8624

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:34<00:00,  1.11s/batch, loss=0.0003, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0009
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.3314
Accuracy: 0.8667
Precision: 0.8902
Recall: 0.8667
F1-Score: 0.8653

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.09s/batch, loss=0.0003, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0004
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.3421
Accuracy: 0.8667
Precision: 0.8929
Recall: 0.8667
F1-Score: 0.8650

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_7.png
Metrics for fold 7 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_7.csv

Fold 7 Results:
Loss: 0.3507
Accuracy: 0.8667
Precision: 0.9015
Recall: 0.8667
F1-Score: 0.8736

Fold 8/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 8
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:31<00:00,  1.08s/batch, loss=3.3912, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.2352
Precision: 0.0312
Recall: 0.0296
F1-Score: 0.0216
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 3.1986
Accuracy: 0.0467
Precision: 0.0105
Recall: 0.0467
F1-Score: 0.0144

Saved best model for fold 8 with F1: 0.0144

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:30<00:00,  1.07s/batch, loss=1.8294, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 2.5553
Precision: 0.2801
Recall: 0.2178
F1-Score: 0.2140
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 1.4645
Accuracy: 0.5267
Precision: 0.4823
Recall: 0.5267
F1-Score: 0.4565

Saved best model for fold 8 with F1: 0.4565

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.5596, lr=4.85e-05, wd=2.20e-02]



Training Metrics:
Loss: 0.8742
Precision: 0.6970
Recall: 0.6904
F1-Score: 0.6831
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.5561
Accuracy: 0.7867
Precision: 0.8241
Recall: 0.7867
F1-Score: 0.7762

Saved best model for fold 8 with F1: 0.7762

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0514, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.2149
Precision: 0.9094
Recall: 0.9096
F1-Score: 0.9089
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.2918
Accuracy: 0.8867
Precision: 0.9093
Recall: 0.8867
F1-Score: 0.8840

Saved best model for fold 8 with F1: 0.8840

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.13s/batch, loss=0.0085, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0629
Precision: 0.9652
Recall: 0.9652
F1-Score: 0.9650
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.2156
Accuracy: 0.8867
Precision: 0.9044
Recall: 0.8867
F1-Score: 0.8825

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:40<00:00,  1.18s/batch, loss=0.0022, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0147
Precision: 0.9920
Recall: 0.9919
F1-Score: 0.9919
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.2153
Accuracy: 0.8867
Precision: 0.9017
Recall: 0.8867
F1-Score: 0.8809

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:34<00:00,  1.11s/batch, loss=0.0008, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0014
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.2294
Accuracy: 0.8800
Precision: 0.9029
Recall: 0.8800
F1-Score: 0.8706

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_8.png
Metrics for fold 8 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_8.csv

Fold 8 Results:
Loss: 0.2918
Accuracy: 0.8867
Precision: 0.9093
Recall: 0.8867
F1-Score: 0.8840

Fold 9/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 9
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.09s/batch, loss=2.8004, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.2255
Precision: 0.0490
Recall: 0.0430
F1-Score: 0.0365
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.9058
Accuracy: 0.1333
Precision: 0.0806
Recall: 0.1333
F1-Score: 0.0632

Saved best model for fold 9 with F1: 0.0632

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.09s/batch, loss=1.0698, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 2.1347
Precision: 0.3675
Recall: 0.3467
F1-Score: 0.3281
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 1.1589
Accuracy: 0.6400
Precision: 0.6203
Recall: 0.6400
F1-Score: 0.5864

Saved best model for fold 9 with F1: 0.5864

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:33<00:00,  1.09s/batch, loss=0.6298, lr=4.85e-05, wd=2.20e-02]



Training Metrics:
Loss: 0.6673
Precision: 0.7591
Recall: 0.7570
F1-Score: 0.7527
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.4591
Accuracy: 0.8000
Precision: 0.8245
Recall: 0.8000
F1-Score: 0.7860

Saved best model for fold 9 with F1: 0.7860

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.09s/batch, loss=0.0239, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1929
Precision: 0.9180
Recall: 0.9163
F1-Score: 0.9163
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.2660
Accuracy: 0.8667
Precision: 0.8850
Recall: 0.8667
F1-Score: 0.8554

Saved best model for fold 9 with F1: 0.8554

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:33<00:00,  1.10s/batch, loss=0.0124, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0443
Precision: 0.9743
Recall: 0.9733
F1-Score: 0.9734
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.2251
Accuracy: 0.9133
Precision: 0.9425
Recall: 0.9133
F1-Score: 0.9140

Saved best model for fold 9 with F1: 0.9140

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:35<00:00,  1.12s/batch, loss=0.0013, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0179
Precision: 0.9883
Recall: 0.9881
F1-Score: 0.9882
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.2036
Accuracy: 0.8867
Precision: 0.9029
Recall: 0.8867
F1-Score: 0.8831

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.14s/batch, loss=0.0014, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0052
Precision: 0.9963
Recall: 0.9963
F1-Score: 0.9963
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.1571
Accuracy: 0.9067
Precision: 0.9183
Recall: 0.9067
F1-Score: 0.9031

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:35<00:00,  1.13s/batch, loss=0.0003, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0006
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.1485
Accuracy: 0.9067
Precision: 0.9212
Recall: 0.9067
F1-Score: 0.9062

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_9.png
Metrics for fold 9 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_9.csv

Fold 9 Results:
Loss: 0.2251
Accuracy: 0.9133
Precision: 0.9425
Recall: 0.9133
F1-Score: 0.9140

Fold 10/10
Train size: 1350, Validation size: 150


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training Fold 10
Total steps: 1275, Warmup steps: 127

Epoch 1/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.13s/batch, loss=3.3214, lr=3.35e-05, wd=1.00e-02]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 3.1999
Precision: 0.0640
Recall: 0.0437
F1-Score: 0.0357
Learning Rate: 3.35e-05
Weight Decay: 1.00e-02

Validation Metrics:
Loss: 2.8175
Accuracy: 0.1600
Precision: 0.0651
Recall: 0.1600
F1-Score: 0.0834

Saved best model for fold 10 with F1: 0.0834

Epoch 2/15
------------------------------


Training: 100%|██████████| 85/85 [01:36<00:00,  1.14s/batch, loss=0.9409, lr=4.98e-05, wd=1.60e-02]
  _warn_prf(average, modifier, msg_start, len(result))



Training Metrics:
Loss: 1.8525
Precision: 0.4397
Recall: 0.4163
F1-Score: 0.3959
Learning Rate: 4.98e-05
Weight Decay: 1.60e-02

Validation Metrics:
Loss: 0.9091
Accuracy: 0.6933
Precision: 0.7013
Recall: 0.6933
F1-Score: 0.6628

Saved best model for fold 10 with F1: 0.6628

Epoch 3/15
------------------------------


Training: 100%|██████████| 85/85 [01:35<00:00,  1.13s/batch, loss=0.0890, lr=4.85e-05, wd=2.20e-02]



Training Metrics:
Loss: 0.5277
Precision: 0.7901
Recall: 0.7926
F1-Score: 0.7880
Learning Rate: 4.85e-05
Weight Decay: 2.20e-02

Validation Metrics:
Loss: 0.4878
Accuracy: 0.8067
Precision: 0.8434
Recall: 0.8067
F1-Score: 0.8002

Saved best model for fold 10 with F1: 0.8002

Epoch 4/15
------------------------------


Training: 100%|██████████| 85/85 [01:32<00:00,  1.09s/batch, loss=0.3764, lr=4.59e-05, wd=2.80e-02]



Training Metrics:
Loss: 0.1558
Precision: 0.9220
Recall: 0.9215
F1-Score: 0.9212
Learning Rate: 4.59e-05
Weight Decay: 2.80e-02

Validation Metrics:
Loss: 0.4619
Accuracy: 0.7867
Precision: 0.8007
Recall: 0.7867
F1-Score: 0.7734

Epoch 5/15
------------------------------


Training: 100%|██████████| 85/85 [01:33<00:00,  1.10s/batch, loss=0.0581, lr=4.21e-05, wd=3.40e-02]



Training Metrics:
Loss: 0.0578
Precision: 0.9673
Recall: 0.9667
F1-Score: 0.9667
Learning Rate: 4.21e-05
Weight Decay: 3.40e-02

Validation Metrics:
Loss: 0.4099
Accuracy: 0.8400
Precision: 0.8592
Recall: 0.8400
F1-Score: 0.8347

Saved best model for fold 10 with F1: 0.8347

Epoch 6/15
------------------------------


Training: 100%|██████████| 85/85 [01:37<00:00,  1.15s/batch, loss=0.0023, lr=3.75e-05, wd=4.00e-02]



Training Metrics:
Loss: 0.0174
Precision: 0.9934
Recall: 0.9933
F1-Score: 0.9933
Learning Rate: 3.75e-05
Weight Decay: 4.00e-02

Validation Metrics:
Loss: 0.3743
Accuracy: 0.8733
Precision: 0.8876
Recall: 0.8733
F1-Score: 0.8709

Saved best model for fold 10 with F1: 0.8709

Epoch 7/15
------------------------------


Training: 100%|██████████| 85/85 [01:35<00:00,  1.12s/batch, loss=0.0010, lr=3.22e-05, wd=4.60e-02]



Training Metrics:
Loss: 0.0045
Precision: 0.9986
Recall: 0.9985
F1-Score: 0.9985
Learning Rate: 3.22e-05
Weight Decay: 4.60e-02

Validation Metrics:
Loss: 0.3982
Accuracy: 0.8600
Precision: 0.8700
Recall: 0.8600
F1-Score: 0.8542

Epoch 8/15
------------------------------


Training: 100%|██████████| 85/85 [01:34<00:00,  1.11s/batch, loss=0.0004, lr=2.64e-05, wd=5.20e-02]



Training Metrics:
Loss: 0.0008
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.64e-05
Weight Decay: 5.20e-02

Validation Metrics:
Loss: 0.3717
Accuracy: 0.8533
Precision: 0.8613
Recall: 0.8533
F1-Score: 0.8486

Epoch 9/15
------------------------------


Training: 100%|██████████| 85/85 [01:34<00:00,  1.11s/batch, loss=0.0208, lr=2.06e-05, wd=5.80e-02]



Training Metrics:
Loss: 0.0006
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Learning Rate: 2.06e-05
Weight Decay: 5.80e-02

Validation Metrics:
Loss: 0.3685
Accuracy: 0.8533
Precision: 0.8613
Recall: 0.8533
F1-Score: 0.8486

Early stopping triggered
Confusion Matrix Shape: (30, 30)
Plots saved to plots/dbmdz_bert-base-turkish-cased/combined_metrics_fold_10.png
Metrics for fold 10 saved to fold_metrics/dbmdz_bert-base-turkish-cased/performance_metrics_fold_10.csv

Fold 10 Results:
Loss: 0.3743
Accuracy: 0.8733
Precision: 0.8876
Recall: 0.8733
F1-Score: 0.8709

Average metrics across folds:
Loss: 0.2698
Accuracy: 0.9013
Precision: 0.9182
Recall: 0.9013
F1-Score: 0.9002

Saved detailed fold results to 'fold_results.csv'
Results saved to 'hyperparameter_search_results.csv'


In [18]:
# --- Save Overall Results ---
results_df = pd.DataFrame([{
    'Loss': best_metrics['loss'],
    'Accuracy': best_metrics['accuracy'],
    'Precision': best_metrics['precision'],
    'Recall': best_metrics['recall'],
    'F1-Score': best_metrics['f1']
}])
results_df.to_csv("best_metrics.csv", index=False)
print("Results saved to 'best_metrics.csv'")

Results saved to 'best_metrics.csv'


In [17]:
# Generate and display the final performance table as required
overall_results = generate_overall_results("fold_metrics/dbmdz_bert-base-turkish-cased", output_filename="overall_performance_metrics.csv")
display_as_dataframe(overall_results)


Overall performance metrics saved to 'overall_performance_metrics.csv'.


Unnamed: 0,Unnamed: 1,Precision,Recall,F-Score
0,1,0.901429,0.96,0.924949
1,2,0.841429,0.84,0.833131
2,3,0.881429,0.76,0.791385
3,4,0.911905,0.92,0.907576
4,5,0.892262,0.92,0.89561
5,6,0.983333,1.0,0.990909
6,7,1.0,1.0,1.0
7,8,1.0,0.98,0.988889
8,9,0.911429,0.82,0.851111
9,10,0.86,0.8,0.818838
