## Use Case Focus

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import numpy as np
from datetime import datetime
import itertools

# Set device
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# Model name
MODEL_NAME = "microsoft/deberta-v3-base"
LABEL_COLUMN = "usecase_focus"
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
log_file = f"{MODEL_NAME.replace('/', '-')}_{LABEL_COLUMN}_{timestamp}.txt"

# Define hyperparameter grid
EPOCHS_LIST = [3, 4, 9, 12]
BATCH_SIZES = [8]
N_SPLITS_LIST = [5, 10]

# Load data
data = pd.read_csv('type_classification-validation.csv')
label_encoder = LabelEncoder()
data[LABEL_COLUMN] = label_encoder.fit_transform(data[LABEL_COLUMN])
texts = data['sentence'].tolist()
labels = data[LABEL_COLUMN].tolist()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def save_best_model(model, model_name, label_column, epochs, k_folds, batch_size):
    save_path = os.path.join(model_save_dir, f"{model_name}_{label_column}_epochs{epochs}_kfold{k_folds}_batch{batch_size}.bin")
    torch.save(model.state_dict(), save_path)
    print(f"Best model saved at {save_path}")

# Dataset class
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.encodings = tokenizer(texts, padding=True, truncation=True, max_length=max_length, return_tensors='pt')
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

# Training loop
def train_model(model, train_loader, optimizer, criterion, epochs, fold):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Fold {fold+1} Epoch {epoch+1}")
        for batch in progress_bar:
            optimizer.zero_grad()

            # Move entire batch to device
            batch = {key: val.to(device) for key, val in batch.items()}

            # Forward pass with labels included
            outputs = model(**batch)

            # Automatic loss from model
            loss = outputs.loss

            # Backward and optimize
            loss.backward()
            optimizer.step()

            # Accumulate loss
            total_loss += loss.item()
            progress_bar.set_postfix(loss=total_loss / (progress_bar.n + 1))

# Evaluation
def evaluate_model(model, val_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
            labels = batch['labels'].to(device)
            preds = model(**inputs).logits.argmax(dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return all_labels, all_preds

# Logging helper
def log_result(log_path, text):
    with open(log_path, "a") as f:
        f.write(text + "\n")

# Start hyperparameter tuning
best_f1 = 0
best_combo = ""
best_result = ""

for epochs, batch_size, n_splits in itertools.product(EPOCHS_LIST, BATCH_SIZES, N_SPLITS_LIST):
    print(f"\nTuning Combination: EPOCHS={epochs}, BATCH_SIZE={batch_size}, K-FOLD={n_splits}")
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    all_accuracies, all_precisions, all_recalls, all_f1s = [], [], [], []

    for fold, (train_idx, val_idx) in enumerate(skf.split(texts, labels)):
        train_texts = [texts[i] for i in train_idx]
        val_texts = [texts[i] for i in val_idx]
        train_labels = [labels[i] for i in train_idx]
        val_labels = [labels[i] for i in val_idx]

        train_dataset = TextDataset(train_texts, train_labels, tokenizer)
        val_dataset = TextDataset(val_texts, val_labels, tokenizer)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(set(labels)), ignore_mismatched_sizes=True)
        model.to(device)

        optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)
        criterion = torch.nn.CrossEntropyLoss()

        train_model(model, train_loader, optimizer, criterion, epochs, fold)
        y_true, y_pred = evaluate_model(model, val_loader)

        acc = accuracy_score(y_true, y_pred)
        prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

        all_accuracies.append(acc)
        all_precisions.append(prec)
        all_recalls.append(rec)
        all_f1s.append(f1)

    avg_accuracy = np.mean(all_accuracies)
    avg_precision = np.mean(all_precisions)
    avg_recall = np.mean(all_recalls)
    avg_f1 = np.mean(all_f1s)

    combo_string = f"Combination: EPOCHS={epochs}, BATCH_SIZE={batch_size}, K-FOLD={n_splits}"
    result_string = f"{combo_string}\naccuracy: {avg_accuracy:.4f}, precision: {avg_precision:.4f}, recall: {avg_recall:.4f}, f1-score: {avg_f1:.4f}\n"
    print(result_string)
    log_result(log_file, result_string)

    if avg_f1 > best_f1:
        best_f1 = avg_f1
        best_combo = combo_string
        best_result = result_string
        best_model = model
        best_epoch = epochs
        best_batch = batch_size
        best_fold = n_splits

# Log best combination at the end
footer = f"\nBest Combination:\n{best_result}"
log_result(log_file, footer)
print(footer)





Tuning Combination: EPOCHS=3, BATCH_SIZE=8, K-FOLD=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 15/15 [00:02<00:00,  6.69it/s, loss=0.619]
Fold 1 Epoch 2: 100%|██████████| 15/15 [00:01<00:00,  9.34it/s, loss=0.513]
Fold 1 Epoch 3: 100%|██████████| 15/15 [00:01<00:00,  9.33it/s, loss=0.477]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Combination: EPOCHS=3, BATCH_SIZE=8, K-FOLD=5
accuracy: 0.8138, precision: 0.4069, recall: 0.5000, f1-score: 0.4486


Tuning Combination: EPOCHS=3, BATCH_SIZE=8, K-FOLD=10


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 17/17 [00:01<00:00,  9.25it/s, loss=0.527]
Fold 1 Epoch 2: 100%|██████████| 17/17 [00:01<00:00,  9.32it/s, loss=0.462]
Fold 1 Epoch 3: 100%|██████████| 17/17 [00:01<00:00,  9.33it/s, loss=0.426]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Combination: EPOCHS=3, BATCH_SIZE=8, K-FOLD=10
accuracy: 0.8143, precision: 0.4071, recall: 0.5000, f1-score: 0.4487


Tuning Combination: EPOCHS=4, BATCH_SIZE=8, K-FOLD=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 15/15 [00:01<00:00,  9.29it/s, loss=0.569]
Fold 1 Epoch 2: 100%|██████████| 15/15 [00:01<00:00,  9.32it/s, loss=0.518]
Fold 1 Epoch 3: 100%|██████████| 15/15 [00:01<00:00,  9.32it/s, loss=0.452]
Fold 1 Epoch 4: 100%|██████████| 15/15 [00:01<00:00,  9.32it/s, loss=0.407]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this mod

Combination: EPOCHS=4, BATCH_SIZE=8, K-FOLD=5
accuracy: 0.8138, precision: 0.4069, recall: 0.5000, f1-score: 0.4486


Tuning Combination: EPOCHS=4, BATCH_SIZE=8, K-FOLD=10


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.612]
Fold 1 Epoch 2: 100%|██████████| 17/17 [00:01<00:00,  9.32it/s, loss=0.485]
Fold 1 Epoch 3: 100%|██████████| 17/17 [00:01<00:00,  9.31it/s, loss=0.479]
Fold 1 Epoch 4: 100%|██████████| 17/17 [00:01<00:00,  9.32it/s, loss=0.355]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this mod

Combination: EPOCHS=4, BATCH_SIZE=8, K-FOLD=10
accuracy: 0.8143, precision: 0.4071, recall: 0.5000, f1-score: 0.4487


Tuning Combination: EPOCHS=9, BATCH_SIZE=8, K-FOLD=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 15/15 [00:01<00:00,  9.27it/s, loss=0.574]
Fold 1 Epoch 2: 100%|██████████| 15/15 [00:01<00:00,  9.29it/s, loss=0.491]
Fold 1 Epoch 3: 100%|██████████| 15/15 [00:01<00:00,  9.30it/s, loss=0.442]
Fold 1 Epoch 4: 100%|██████████| 15/15 [00:01<00:00,  9.30it/s, loss=0.359]
Fold 1 Epoch 5: 100%|██████████| 15/15 [00:01<00:00,  9.30it/s, loss=0.197]
Fold 1 Epoch 6: 100%|██████████| 15/15 [00:01<00:00,  9.29it/s, loss=0.107] 
Fold 1 Epoch 7: 100%|██████████| 15/15 [00:01<00:00,  9.29it/s, loss=0.529]
Fold 1 Epoch 8: 100%|██████████| 15/15 [00:01<00:00,  9.29it/s, loss=0.125]
Fold 1 Epoch 9: 100%|██████████| 15/15 [0

Combination: EPOCHS=9, BATCH_SIZE=8, K-FOLD=5
accuracy: 0.8276, precision: 0.7537, recall: 0.7054, f1-score: 0.7114


Tuning Combination: EPOCHS=9, BATCH_SIZE=8, K-FOLD=10


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 17/17 [00:01<00:00,  9.28it/s, loss=0.624]
Fold 1 Epoch 2: 100%|██████████| 17/17 [00:01<00:00,  9.31it/s, loss=0.464]
Fold 1 Epoch 3: 100%|██████████| 17/17 [00:01<00:00,  9.31it/s, loss=0.432]
Fold 1 Epoch 4: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.35] 
Fold 1 Epoch 5: 100%|██████████| 17/17 [00:01<00:00,  9.28it/s, loss=0.199]
Fold 1 Epoch 6: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.0922]
Fold 1 Epoch 7: 100%|██████████| 17/17 [00:01<00:00,  9.30it/s, loss=0.033] 
Fold 1 Epoch 8: 100%|██████████| 17/17 [00:01<00:00,  9.28it/s, loss=0.0112]
Fold 1 Epoch 9: 100%|██████████| 17/17 

Combination: EPOCHS=9, BATCH_SIZE=8, K-FOLD=10
accuracy: 0.8067, precision: 0.6675, recall: 0.6042, f1-score: 0.6178


Tuning Combination: EPOCHS=12, BATCH_SIZE=8, K-FOLD=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 15/15 [00:01<00:00,  9.26it/s, loss=0.584]
Fold 1 Epoch 2: 100%|██████████| 15/15 [00:01<00:00,  9.31it/s, loss=0.478]
Fold 1 Epoch 3: 100%|██████████| 15/15 [00:01<00:00,  9.32it/s, loss=0.442]
Fold 1 Epoch 4: 100%|██████████| 15/15 [00:01<00:00,  9.32it/s, loss=0.325]
Fold 1 Epoch 5: 100%|██████████| 15/15 [00:01<00:00,  9.32it/s, loss=0.227]
Fold 1 Epoch 6: 100%|██████████| 15/15 [00:01<00:00,  9.32it/s, loss=0.149]
Fold 1 Epoch 7: 100%|██████████| 15/15 [00:01<00:00,  9.30it/s, loss=0.0812]
Fold 1 Epoch 8: 100%|██████████| 15/15 [00:01<00:00,  9.31it/s, loss=0.0195]
Fold 1 Epoch 9: 100%|██████████| 15/15 [

Combination: EPOCHS=12, BATCH_SIZE=8, K-FOLD=5
accuracy: 0.8276, precision: 0.7345, recall: 0.6493, f1-score: 0.6649


Tuning Combination: EPOCHS=12, BATCH_SIZE=8, K-FOLD=10


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 17/17 [00:01<00:00,  9.26it/s, loss=0.581]
Fold 1 Epoch 2: 100%|██████████| 17/17 [00:01<00:00,  9.30it/s, loss=0.455]
Fold 1 Epoch 3: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.441]
Fold 1 Epoch 4: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.364]
Fold 1 Epoch 5: 100%|██████████| 17/17 [00:01<00:00,  9.30it/s, loss=0.206]
Fold 1 Epoch 6: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.172]
Fold 1 Epoch 7: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.173]
Fold 1 Epoch 8: 100%|██████████| 17/17 [00:01<00:00,  9.29it/s, loss=0.0787]
Fold 1 Epoch 9: 100%|██████████| 17/17 [0

Combination: EPOCHS=12, BATCH_SIZE=8, K-FOLD=10
accuracy: 0.8205, precision: 0.7587, recall: 0.6542, f1-score: 0.6676


Best Combination:
Combination: EPOCHS=9, BATCH_SIZE=8, K-FOLD=5
accuracy: 0.8276, precision: 0.7537, recall: 0.7054, f1-score: 0.7114






In [2]:
import os

# Directory to save the best models
model_save_dir = "all_model"
os.makedirs(model_save_dir, exist_ok=True)

if best_model is not None:
    save_best_model(best_model, MODEL_NAME.replace('/', '-'), LABEL_COLUMN, best_epoch, best_fold, best_batch)

Best model saved at all_model/microsoft-deberta-v3-base_usecase_focus_epochs9_kfold5_batch8.bin


## Structure Focus

In [3]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import numpy as np
from datetime import datetime
import itertools

# Set device
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# Model name
MODEL_NAME = "microsoft/deberta-v3-large"
LABEL_COLUMN = "structure_focus"
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
log_file = f"{MODEL_NAME.replace('/', '-')}_{LABEL_COLUMN}_{timestamp}.txt"

# Define hyperparameter grid
EPOCHS_LIST = [3, 4, 9, 12]
BATCH_SIZES = [8]
N_SPLITS_LIST = [5, 10]

# Load data
data = pd.read_csv('type_classification-validation.csv')
label_encoder = LabelEncoder()
data[LABEL_COLUMN] = label_encoder.fit_transform(data[LABEL_COLUMN])
texts = data['sentence'].tolist()
labels = data[LABEL_COLUMN].tolist()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Dataset class
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.encodings = tokenizer(texts, padding=True, truncation=True, max_length=max_length, return_tensors='pt')
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

# Training loop
def train_model(model, train_loader, optimizer, criterion, epochs, fold):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Fold {fold+1} Epoch {epoch+1}")
        for batch in progress_bar:
            optimizer.zero_grad()

            # Move entire batch to device
            batch = {key: val.to(device) for key, val in batch.items()}

            # Forward pass with labels included
            outputs = model(**batch)

            # Automatic loss from model
            loss = outputs.loss

            # Backward and optimize
            loss.backward()
            optimizer.step()

            # Accumulate loss
            total_loss += loss.item()
            progress_bar.set_postfix(loss=total_loss / (progress_bar.n + 1))

# Evaluation
def evaluate_model(model, val_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
            labels = batch['labels'].to(device)
            preds = model(**inputs).logits.argmax(dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return all_labels, all_preds

# Logging helper
def log_result(log_path, text):
    with open(log_path, "a") as f:
        f.write(text + "\n")

# Start hyperparameter tuning
best_f1 = 0
best_combo = ""
best_result = ""

for epochs, batch_size, n_splits in itertools.product(EPOCHS_LIST, BATCH_SIZES, N_SPLITS_LIST):
    print(f"\nTuning Combination: EPOCHS={epochs}, BATCH_SIZE={batch_size}, K-FOLD={n_splits}")
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    all_accuracies, all_precisions, all_recalls, all_f1s = [], [], [], []

    for fold, (train_idx, val_idx) in enumerate(skf.split(texts, labels)):
        train_texts = [texts[i] for i in train_idx]
        val_texts = [texts[i] for i in val_idx]
        train_labels = [labels[i] for i in train_idx]
        val_labels = [labels[i] for i in val_idx]

        train_dataset = TextDataset(train_texts, train_labels, tokenizer)
        val_dataset = TextDataset(val_texts, val_labels, tokenizer)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=len(set(labels)), ignore_mismatched_sizes=True)
        model.to(device)

        optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)
        criterion = torch.nn.CrossEntropyLoss()

        train_model(model, train_loader, optimizer, criterion, epochs, fold)
        y_true, y_pred = evaluate_model(model, val_loader)

        acc = accuracy_score(y_true, y_pred)
        prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='macro')

        all_accuracies.append(acc)
        all_precisions.append(prec)
        all_recalls.append(rec)
        all_f1s.append(f1)

    avg_accuracy = np.mean(all_accuracies)
    avg_precision = np.mean(all_precisions)
    avg_recall = np.mean(all_recalls)
    avg_f1 = np.mean(all_f1s)

    combo_string = f"Combination: EPOCHS={epochs}, BATCH_SIZE={batch_size}, K-FOLD={n_splits}"
    result_string = f"{combo_string}\naccuracy: {avg_accuracy:.4f}, precision: {avg_precision:.4f}, recall: {avg_recall:.4f}, f1-score: {avg_f1:.4f}\n"
    print(result_string)
    log_result(log_file, result_string)

    if avg_f1 > best_f1:
        best_f1 = avg_f1
        best_combo = combo_string
        best_result = result_string
        best_model = model
        best_epoch = epochs
        best_batch = batch_size
        best_fold = n_splits

# Log best combination at the end
footer = f"\nBest Combination:\n{best_result}"
log_result(log_file, footer)
print(footer)





Tuning Combination: EPOCHS=3, BATCH_SIZE=8, K-FOLD=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1: 100%|██████████| 15/15 [00:04<00:00,  3.56it/s, loss=0.633]
Fold 1 Epoch 2: 100%|██████████| 15/15 [00:04<00:00,  3.57it/s, loss=0.525]
Fold 1 Epoch 3: 100%|██████████| 15/15 [00:04<00:00,  3.57it/s, loss=0.442]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inferenc

Combination: EPOCHS=3, BATCH_SIZE=8, K-FOLD=5
accuracy: 0.7586, precision: 0.4903, recall: 0.5584, f1-score: 0.5183


Tuning Combination: EPOCHS=3, BATCH_SIZE=8, K-FOLD=10


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Fold 1 Epoch 1:   6%|▌         | 1/17 [00:00<00:08,  2.00it/s, loss=0.716]


OutOfMemoryError: CUDA out of memory. Tried to allocate 502.00 MiB. GPU 1 has a total capacity of 10.90 GiB of which 386.25 MiB is free. Process 452606 has 10.52 GiB memory in use. Of the allocated memory 9.27 GiB is allocated by PyTorch, and 1.09 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import os

# Directory to save the best models
model_save_dir = "all_model"
os.makedirs(model_save_dir, exist_ok=True)

if best_model is not None:
    save_best_model(best_model, MODEL_NAME.replace('/', '-'), LABEL_COLUMN, best_epoch, best_fold, best_batch)