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

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import AutoTokenizer, Trainer, TrainingArguments, AutoConfig, LlamaForSequenceClassification
import torch
from torch.utils.data import Dataset
import numpy as np
import torch.nn as nn
from google.colab import drive
import os
import random

# Set a seed for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Mount Google Drive
drive.mount('/content/drive')

# Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/Thyroid_Diff.csv')

def row_to_text(row):
    return f"Patient of age {row.get('Age', 'N/A')}, gender {row.get('Gender', 'N/A')}, " \
           f"smoking history: {row.get('Smoking', 'N/A')}, radiotherapy: {row.get('Hx Radiotherapy', 'N/A')}, " \
           f"thyroid function: {row.get('Thyroid Function', 'N/A')}, physical examination: {row.get('Physical Examination', 'N/A')}, " \
           f"adenopathy: {row.get('Adenopathy', 'N/A')}, pathology: {row.get('Pathology', 'N/A')}, " \
           f"focality: {row.get('Focality', 'N/A')}."

df['text'] = df.apply(row_to_text, axis=1)

# Set seed before any random operations
set_seed(42)

# Get number of unique labels for each task
num_labels_per_task = [
    len(df['Recurred'].unique()),
    len(df['Stage'].unique()),
    len(df['Risk'].unique())
]

# Encode target columns
label_encoders = {}
for column in ['Recurred', 'Stage', 'Risk']:
    le = LabelEncoder()
    values = df[column].astype(str)
    encoded_values = le.fit_transform(values)
    df[column] = encoded_values
    label_encoders[column] = le

# Prepare multi-label target data
labels = df[['Recurred', 'Stage', 'Risk']].values

# Split data
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['text'].tolist(),
    labels,
    test_size=0.2,
    random_state=42
)

# Initialize tokenizer for LLaMA
model_name = "LLaMA_model_path_or_hf_repo_name"  # replace with the actual model path or Hugging Face repository name
access_token = "YOUR_HF_ACCESS_TOKEN"  # replace with your Hugging Face access token
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=access_token)

# Define the MultiLabelClassificationModel class using LLaMA
class MultiLabelClassificationModel(nn.Module):
    def __init__(self, model_name, num_labels_per_task):
        super(MultiLabelClassificationModel, self).__init__()
        self.num_labels_per_task = num_labels_per_task

        # Initialize the backbone as a base model instead of SequenceClassification
        self.backbone = LlamaForSequenceClassification.from_pretrained(model_name, num_labels=sum(num_labels_per_task)).to(device)

        # Add task-specific classification heads
        hidden_size = self.backbone.config.hidden_size
        self.task_heads = nn.ModuleList([
            nn.Linear(hidden_size, num_labels).to(device)
            for num_labels in num_labels_per_task
        ])

        # Add dropout for regularization
        self.dropout = nn.Dropout(0.1)

    def forward(self, input_ids, attention_mask=None, labels=None):
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device) if attention_mask is not None else None
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0, :]
        pooled_output = self.dropout(pooled_output)

        # Get logits for each task
        task_logits = [head(pooled_output) for head in self.task_heads]

        if labels is not None:
            labels = labels.to(device)
            loss_fn = nn.CrossEntropyLoss()
            losses = []
            for i, task_logit in enumerate(task_logits):
                task_labels = labels[:, i]
                losses.append(loss_fn(task_logit, task_labels))
            loss = sum(losses) / len(losses)
            return {'loss': loss, 'logits': task_logits}

        return {'logits': task_logits}

class TabularDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding='max_length',
            max_length=max_length,
            return_tensors='pt'
        )
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __getitem__(self, idx):
        item = {
            'input_ids': self.encodings['input_ids'][idx],
            'attention_mask': self.encodings['attention_mask'][idx],
            'labels': self.labels[idx]
        }
        return item

    def __len__(self):
        return len(self.labels)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = []

    if isinstance(logits, (tuple, list)):
        logits_list = logits
    else:
        logits_list = [logits]

    for task_logits in logits_list:
        if isinstance(task_logits, torch.Tensor):
            task_logits = task_logits.cpu().numpy()
        task_preds = np.argmax(task_logits, axis=-1)
        predictions.append(task_preds)

    accuracies = {}
    task_names = ['recurred', 'stage', 'risk']

    for i, task_name in enumerate(task_names):
        task_preds = predictions[i]
        task_labels = labels[:, i]
        accuracy = (task_preds == task_labels).mean()
        accuracies[f'{task_name}_accuracy'] = accuracy

    return accuracies

model = MultiLabelClassificationModel(model_name, num_labels_per_task)

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    report_to=["none"],
    save_strategy="epoch",
    load_best_model_at_end=True,
)

train_dataset = TabularDataset(train_texts, train_labels, tokenizer)
test_dataset = TabularDataset(test_texts, test_labels, tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

print("Starting training...")
trainer.train()
print("Training completed. Starting evaluation...")
results = trainer.evaluate()
print("\nEvaluation Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...


Epoch,Training Loss,Validation Loss,Recurred Accuracy,Stage Accuracy,Risk Accuracy
1,No log,0.917308,0.753247,0.909091,0.714286
2,No log,0.570608,0.753247,0.909091,0.714286
3,No log,0.511389,0.766234,0.909091,0.727273


Training completed. Starting evaluation...



Evaluation Results:
eval_loss: 0.5114
eval_recurred_accuracy: 0.7662
eval_stage_accuracy: 0.9091
eval_risk_accuracy: 0.7273
eval_runtime: 2.2249
eval_samples_per_second: 34.6090
eval_steps_per_second: 4.4950
epoch: 3.0000


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset
import numpy as np
import torch.nn as nn
import os
import warnings
import random
from google.colab import drive

# Disable CUDA
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
torch.set_num_threads(1)

def set_seed_cpu_only(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.backends.cudnn.enabled:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Patch torch.cuda functions
torch.cuda.is_available = lambda: False
torch.cuda.manual_seed = lambda *args, **kwargs: None
torch.cuda.manual_seed_all = lambda *args, **kwargs: None
torch.cuda.synchronize = lambda *args, **kwargs: None

# Set CPU device
device = torch.device("cpu")
torch._C._set_default_tensor_type(torch.FloatTensor)
warnings.filterwarnings('ignore')

def row_to_text(row):
    return f"Patient of age {row['Age']}, gender {row['Gender']}, smoking history: {row['Smoking']}, " \
           f"radiotherapy: {row.get('Hx Radiotherapy', 'N/A')}, thyroid function: {row['Thyroid Function']}, " \
           f"physical examination: {row['Physical Examination']}, adenopathy: {row['Adenopathy']}, " \
           f"pathology: {row['Pathology']}, focality: {row['Focality']}."

class TabularDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=max_length
        )
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx], device='cpu') for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long, device='cpu')
        return item

    def __len__(self):
        return len(self.labels)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = []

    for task_logits in logits:
        task_preds = np.argmax(task_logits, axis=-1)
        predictions.append(task_preds)

    accuracies = {}
    task_names = ['recurred', 'stage', 'risk']

    for i, task_name in enumerate(task_names):
        task_preds = predictions[i]
        task_labels = labels[:, i]
        accuracy = (task_preds == task_labels).mean()
        accuracies[f'{task_name}_accuracy'] = accuracy

    return accuracies

def prepare_data():
    drive.mount('/content/drive')
    df = pd.read_csv('/content/drive/MyDrive/Thyroid_Diff.csv')
    df['text'] = df.apply(row_to_text, axis=1)

    label_encoders = {}
    for column in ['Recurred', 'Stage', 'Risk']:
        le = LabelEncoder()
        values = df[column].astype(str)
        df[column] = le.fit_transform(values)
        label_encoders[column] = le

    num_labels_per_task = [
        len(label_encoders['Recurred'].classes_),
        len(label_encoders['Stage'].classes_),
        len(label_encoders['Risk'].classes_)
    ]

    return df, num_labels_per_task

  torch._C._set_default_tensor_type(torch.FloatTensor)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import AutoTokenizer, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoConfig
import torch
from torch.utils.data import Dataset
import numpy as np
import torch.nn as nn
from google.colab import drive
import os
import warnings
import random

# Disable CUDA completely before any other torch operations
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
torch.set_num_threads(1)

# Custom seed setting function that avoids CUDA
def set_seed_cpu_only(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.backends.cudnn.enabled:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# Patch torch.cuda functions to prevent CUDA initialization
def dummy_cuda_method(*args, **kwargs):
    return

torch.cuda.is_available = lambda: False
torch.cuda.manual_seed = dummy_cuda_method
torch.cuda.manual_seed_all = dummy_cuda_method
torch.cuda.synchronize = dummy_cuda_method

# Set the device to CPU and ensure we're using CPU tensors
device = torch.device("cpu")
torch._C._set_default_tensor_type(torch.FloatTensor)

# Suppress warnings
warnings.filterwarnings('ignore')

# Mount Google Drive
drive.mount('/content/drive')

# Load and preprocess the data
df = pd.read_csv('/content/drive/MyDrive/Thyroid_Diff.csv')

def row_to_text(row):
    return f"Patient of age {row['Age']}, gender {row['Gender']}, smoking history: {row['Smoking']}, " \
           f"radiotherapy: {row.get('Hx Radiotherapy', 'N/A')}, thyroid function: {row['Thyroid Function']}, " \
           f"physical examination: {row['Physical Examination']}, adenopathy: {row['Adenopathy']}, " \
           f"pathology: {row['Pathology']}, focality: {row['Focality']}."

df['text'] = df.apply(row_to_text, axis=1)

# Set seed before any random operations
set_seed_cpu_only(42)

# Print unique values before encoding
print("Unique values before encoding:")
for column in ['Recurred', 'Stage', 'Risk']:
    print(f"{column}: {df[column].unique()}")

# Encode target columns with verification
label_encoders = {}
for column in ['Recurred', 'Stage', 'Risk']:
    le = LabelEncoder()
    # Convert to string and fit transform
    values = df[column].astype(str)
    encoded_values = le.fit_transform(values)

    # Verify the encoding
    print(f"\nEncoding for {column}:")
    print(f"Original values: {le.classes_}")
    print(f"Encoded values: {np.unique(encoded_values)}")

    df[column] = encoded_values
    label_encoders[column] = le

# Get actual number of classes for each task
num_labels_per_task = [
    len(label_encoders['Recurred'].classes_),
    len(label_encoders['Stage'].classes_),
    len(label_encoders['Risk'].classes_)
]

print("\nNumber of classes per task:", num_labels_per_task)

# Prepare multi-label target data
labels = df[['Recurred', 'Stage', 'Risk']].values

# Split data
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['text'].tolist(),
    labels,
    test_size=0.2,
    random_state=42
)

# Initialize tokenizer with DistilBERT
tokenizer = AutoTokenizer.from_pretrained(
    "distilbert-base-uncased",
    use_fast=True,
    local_files_only=False
)

class TabularDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(texts, truncation=True, padding=True)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx], device='cpu') for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long, device='cpu')
        return item

    def __len__(self):
        return len(self.labels)

class MultiLabelClassificationModel(nn.Module):
    def __init__(self, model_name, num_labels_per_task):
        super(MultiLabelClassificationModel, self).__init__()
        self.num_labels_per_task = num_labels_per_task
        total_labels = sum(num_labels_per_task)

        # Create configuration for DistilBERT
        config = AutoConfig.from_pretrained(
            model_name,
            num_labels=total_labels
        )

        # Initialize DistilBERT model with CPU-only settings
        self.backbone = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            config=config,
            device_map=None
        ).to('cpu')

    def forward(self, input_ids, attention_mask=None, labels=None):
        # Ensure inputs are on CPU
        input_ids = input_ids.to('cpu')
        if attention_mask is not None:
            attention_mask = attention_mask.to('cpu')
        if labels is not None:
            labels = labels.to('cpu')

        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Split logits for each task
        start_idx = 0
        task_logits = []
        for num_labels in self.num_labels_per_task:
            task_logits.append(logits[:, start_idx:start_idx + num_labels])
            start_idx += num_labels

        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            losses = []
            for i, (task_logit, num_labels) in enumerate(zip(task_logits, self.num_labels_per_task)):
                task_labels = labels[:, i]
                losses.append(loss_fn(task_logit, task_labels))
            loss = sum(losses) / len(losses)
            return loss, tuple(task_logits)

        return tuple(task_logits)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = []

    # Convert logits to predictions
    for task_logits in logits:
        task_preds = np.argmax(task_logits, axis=-1)
        predictions.append(task_preds)

    # Calculate accuracy for each task
    accuracies = {}
    task_names = ['recurred', 'stage', 'risk']

    for i, task_name in enumerate(task_names):
        task_preds = predictions[i]
        task_labels = labels[:, i]
        accuracy = (task_preds == task_labels).mean()
        accuracies[f'{task_name}_accuracy'] = accuracy

    return accuracies

# Initialize model with DistilBERT
model = MultiLabelClassificationModel("distilbert-base-uncased", num_labels_per_task)
model.to('cpu')

# Initialize training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    num_train_epochs=5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    report_to=["none"],
    dataloader_pin_memory=False,
    no_cuda=True,
)

# Prepare datasets
train_dataset = TabularDataset(train_texts, train_labels)
test_dataset = TabularDataset(test_texts, test_labels)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

# Train and evaluate
print("Starting training...")
trainer.train()
print("Training completed. Starting evaluation...")
results = trainer.evaluate()
print("\nEvaluation Results:")
print(f"Recurred Accuracy: {results['eval_recurred_accuracy']:.4f}")
print(f"Stage Accuracy: {results['eval_stage_accuracy']:.4f}")
print(f"Risk Accuracy: {results['eval_risk_accuracy']:.4f}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Unique values before encoding:
Recurred: ['No' 'Yes']
Stage: ['I' 'II' 'IVB' 'III' 'IVA']
Risk: ['Low' 'Intermediate' 'High']

Encoding for Recurred:
Original values: ['No' 'Yes']
Encoded values: [0 1]

Encoding for Stage:
Original values: ['I' 'II' 'III' 'IVA' 'IVB']
Encoded values: [0 1 2 3 4]

Encoding for Risk:
Original values: ['High' 'Intermediate' 'Low']
Encoded values: [0 1 2]

Number of classes per task: [2, 5, 3]


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...


Epoch,Training Loss,Validation Loss,Recurred Accuracy,Stage Accuracy,Risk Accuracy
1,No log,0.637235,0.753247,0.909091,0.714286
2,No log,0.449506,0.857143,0.909091,0.831169
3,No log,0.414644,0.87013,0.909091,0.831169
4,No log,0.40669,0.87013,0.909091,0.831169
5,No log,0.403991,0.87013,0.909091,0.818182


Training completed. Starting evaluation...



Evaluation Results:
Recurred Accuracy: 0.8701
Stage Accuracy: 0.9091
Risk Accuracy: 0.8182


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import (
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    OPTForSequenceClassification,
    AutoConfig
)
import torch
from torch.utils.data import Dataset
import numpy as np
import torch.nn as nn
from google.colab import drive
import os
import warnings
import random
from torch.cuda.amp import autocast, GradScaler

# Basic setup
warnings.filterwarnings('ignore')
drive.mount('/content/drive')

# Set up GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

# Load and preprocess data
df = pd.read_csv('/content/drive/MyDrive/Thyroid_Diff.csv')

def row_to_text(row):
    return f"Patient of age {row['Age']}, gender {row['Gender']}, smoking history: {row['Smoking']}, " \
           f"radiotherapy: {row.get('Hx Radiotherapy', 'N/A')}, thyroid function: {row['Thyroid Function']}, " \
           f"physical examination: {row['Physical Examination']}, adenopathy: {row['Adenopathy']}, " \
           f"pathology: {row['Pathology']}, focality: {row['Focality']}."

df['text'] = df.apply(row_to_text, axis=1)

# Print unique values before encoding
print("Unique values before encoding:")
for column in ['Recurred', 'Stage', 'Risk']:
    print(f"{column}: {df[column].unique()}")

# Encode target columns
label_encoders = {}
for column in ['Recurred', 'Stage', 'Risk']:
    le = LabelEncoder()
    values = df[column].astype(str)
    encoded_values = le.fit_transform(values)
    df[column] = encoded_values
    label_encoders[column] = le

# Get number of classes for each task
num_labels_per_task = [
    len(label_encoders['Recurred'].classes_),
    len(label_encoders['Stage'].classes_),
    len(label_encoders['Risk'].classes_)
]

print("\nNumber of classes per task:", num_labels_per_task)

# Prepare multi-label target data
labels = df[['Recurred', 'Stage', 'Risk']].values

# Split data
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['text'].tolist(),
    labels,
    test_size=0.2,
    random_state=42
)

# Initialize tokenizer (using OPT-350m, but you can change to other OPT variants)
model_name = "facebook/opt-350m"
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    use_fast=True,
    padding_side="right"
)

# Add padding token if not present
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

class TabularDataset(Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=512,
            return_tensors="pt"
        )
        self.labels = labels

    def __getitem__(self, idx):
        item = {
            key: val[idx].clone().detach()
            for key, val in self.encodings.items()
        }
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

    def __len__(self):
        return len(self.labels)

class MultiLabelOPTModel(nn.Module):
    def __init__(self, model_name, num_labels_per_task):
        super(MultiLabelOPTModel, self).__init__()
        self.num_labels_per_task = num_labels_per_task
        total_labels = sum(num_labels_per_task)

        # Create configuration
        config = AutoConfig.from_pretrained(
            model_name,
            num_labels=total_labels,
            output_hidden_states=True
        )

        # Initialize OPT model
        self.backbone = OPTForSequenceClassification.from_pretrained(
            model_name,
            config=config
        )

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.backbone(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        logits = outputs.logits

        # Split logits for each task
        start_idx = 0
        task_logits = []
        for num_labels in self.num_labels_per_task:
            task_logits.append(logits[:, start_idx:start_idx + num_labels])
            start_idx += num_labels

        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            losses = []
            for i, (task_logit, num_labels) in enumerate(zip(task_logits, self.num_labels_per_task)):
                task_labels = labels[:, i]
                losses.append(loss_fn(task_logit, task_labels))
            loss = sum(losses) / len(losses)
            return loss, tuple(task_logits)

        return tuple(task_logits)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = []

    # Convert logits to predictions
    for task_logits in logits:
        task_preds = np.argmax(task_logits, axis=-1)
        predictions.append(task_preds)

    # Calculate accuracy for each task
    accuracies = {}
    task_names = ['recurred', 'stage', 'risk']

    for i, task_name in enumerate(task_names):
        task_preds = predictions[i]
        task_labels = labels[:, i]
        accuracy = (task_preds == task_labels).mean()
        accuracies[f'{task_name}_accuracy'] = accuracy

    return accuracies

# Initialize model
model = MultiLabelOPTModel(model_name, num_labels_per_task)
model.to(device)

# Initialize training arguments with lower resource usage
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    num_train_epochs=5,  # Reduced from 10 to 5
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=200,  # Reduced from 500 to 200
    weight_decay=0.01,
    logging_dir='./logs',
    report_to=["none"],
    fp16=True,  # Enable mixed precision training
    gradient_accumulation_steps=4,  # Accumulate gradients to simulate larger batch size
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# Prepare datasets
train_dataset = TabularDataset(train_texts, train_labels)
test_dataset = TabularDataset(test_texts, test_labels)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

# Train and evaluate
print("Starting training...")
trainer.train()

print("Training completed. Starting evaluation...")
results = trainer.evaluate()

print("\nEvaluation Results:")
print(f"Recurred Accuracy: {results['eval_recurred_accuracy']:.4f}")
print(f"Stage Accuracy: {results['eval_stage_accuracy']:.4f}")
print(f"Risk Accuracy: {results['eval_risk_accuracy']:.4f}")

# Save the model
model_save_path = './opt_thyroid_model'
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)
print(f"\nModel saved to {model_save_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Unique values before encoding:
Recurred: ['No' 'Yes']
Stage: ['I' 'II' 'IVB' 'III' 'IVA']
Risk: ['Low' 'Intermediate' 'High']

Number of classes per task: [2, 5, 3]


tokenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting training...


Epoch,Training Loss,Validation Loss,Recurred Accuracy,Stage Accuracy,Risk Accuracy
0,No log,0.597117,0.753247,0.909091,0.714286
1,No log,0.526068,0.857143,0.883117,0.805195
2,No log,0.464403,0.87013,0.909091,0.805195
