In [8]:
import os
import pandas as pd
import torch
import numpy as np
import random
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import Dataset
from datasets.features import Features, Value, Sequence
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from typing import List, Dict
import ollama
import time

# Set random seeds for reproducibility
SEED = 1
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Unified data loading (handles all models and returns ids)
def load_data(csv_path: str) -> tuple[List[str], List[List[float]], int, List[str]]:
    encodings = ['utf-8', 'latin-1', 'cp1252']
    for encoding in encodings:
        try:
            df = pd.read_csv(csv_path, encoding=encoding, delimiter=';', quotechar='"', on_bad_lines='warn')
            if not df.empty:
                break
        except UnicodeDecodeError:
            continue
    else:
        raise UnicodeDecodeError(f"Failed to decode {csv_path} with tried encodings: {encodings}")
    
    text_column = 'Text'
    all_columns = df.columns.tolist()
    if 'SANTA_ID' in all_columns:
        ids = df['SANTA_ID'].tolist()
        all_columns.remove('SANTA_ID')
    else:
        ids = [f"ID_{i}" for i in range(len(df))]
    if text_column in all_columns:
        all_columns.remove(text_column)
    
    # Handle 'Acts' column if present (for Santa)
    acts_column = 'Acts' if 'Acts' in all_columns else None
    numeric_label_columns = [col for col in all_columns if col != acts_column]
    
    # Regularize numeric label columns: convert to numeric, coerce errors to NaN, fill NaN with 0
    df[numeric_label_columns] = df[numeric_label_columns].apply(pd.to_numeric, errors='coerce').fillna(0)
    
    # One-hot encode the 'Acts' column
    if acts_column:
        acts_encoded = pd.get_dummies(df[acts_column], prefix='Acts')
        df = pd.concat([df, acts_encoded], axis=1)
        label_columns = numeric_label_columns + acts_encoded.columns.tolist()
    else:
        # For Mystery and Combined, take first 11 or all except last 3
        label_columns = all_columns[:11] if len(all_columns) <= 11 else all_columns[:-3]
    
    texts = df[text_column].tolist()
    labels = df[label_columns].values.astype(float).tolist()
    return texts, labels, len(label_columns), ids

def tokenize_function(examples, tokenizer):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = (pred.predictions > 0.5).astype(float)
    f1 = f1_score(labels, preds, average='micro')
    roc_auc = roc_auc_score(labels, pred.predictions, average='micro')
    acc = accuracy_score(labels, preds)
    return {'f1': f1, 'roc_auc': roc_auc, 'accuracy': acc}

# Function to train and evaluate a RoBERTa model
def train_and_evaluate_model(model_name: str, folder_path: str, output_dir: str, train_texts: List[str], train_labels: List[List[float]], 
                            test_texts: List[str], test_labels: List[List[float]], epochs: int = 3, batch_size: int = 16) -> Dict:
    tokenizer = RobertaTokenizer.from_pretrained('FacebookAI/roberta-base')
    
    # Prepare datasets
    features = Features({'text': Value('string'), 'labels': Sequence(Value('float32'))})
    train_dataset_dict = {'text': train_texts, 'labels': train_labels}
    test_dataset_dict = {'text': test_texts, 'labels': test_labels}
    
    train_dataset = Dataset.from_dict(train_dataset_dict, features=features)
    test_dataset = Dataset.from_dict(test_dataset_dict, features=features)
    
    train_dataset = train_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
    test_dataset = test_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
    
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    
    model = RobertaForSequenceClassification.from_pretrained(
        'FacebookAI/roberta-base', num_labels=len(train_labels[0]), problem_type='multi_label_classification'
    )
    
    training_args = TrainingArguments(
        output_dir=f"{output_dir}/{model_name}",
        num_train_epochs=epochs,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        eval_strategy='epoch',
        save_strategy='epoch',
        save_total_limit=3,
        load_best_model_at_end=True,
        metric_for_best_model='f1',
        seed=SEED
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )
    
    trainer.train()
    eval_results = trainer.evaluate()
    trainer.save_model(f"{output_dir}/{model_name}")
    tokenizer.save_pretrained(f"{output_dir}/{model_name}")
    
    print(f"Model {model_name} fine-tuned and evaluated, saved to {output_dir}/{model_name}")
    return eval_results

# Ollama few-shot prediction function
def get_ollama_predictions(test_texts: List[str], test_ids: List[str], num_labels: int) -> np.ndarray:
    examples = [
        """ACI08;"Mr. Hosmer Angel came to the house again and proposed that we 
should marry before father came back. He was in dreadful earnest 
and made me swear, with my hands on the Testament, that whatever 
happened I would always be true to him. Mother said he was quite 
right to make me swear, and that it was a sign of his passion. 
Mother was all in his favour from the first and was even fonder 
of him than I was. Then, when they talked of marrying within the 
week, I began to ask about father; but they both said never to 
mind about father, but just to tell him afterwards, and mother 
said she would make it all right with him.";0;0;0;1;0;1;1;0;0;0;none;0;0""",
        """ACI05;"I had had so many reasons to believe in my friend's subtle powers 
of reasoning and extraordinary energy in action that I felt that 
he must have some solid grounds for the assured and easy 
demeanour with which he treated the singular mystery which he had 
been called upon to fathom. Once only had I known him to fail, in 
the case of the King of Bohemia and of the Irene Adler 
photograph; but when I looked back to the weird business of the 
Sign of Four, and the extraordinary circumstances connected with 
the Study in Scarlet, I felt that it would be a strange tangle 
indeed which he could not unravel. 
I left him then, still puffing at his black clay pipe, with the 
conviction that when I came again on the next evening I would 
find that he held in his hands all the clues which would lead up 
to the identity of the disappearing bridegroom of Miss Mary 
Sutherland. 
A professional case of great gravity was engaging my own 
attention at the time, and the whole of next day I was busy at 
the bedside of the sufferer. It was not until close upon six 
o'clock that I found myself free and was able to spring into a 
hansom and drive to Baker Street, half afraid that I might be too 
late to assist at the dénouement of the little mystery. I found 
Sherlock Holmes alone, however, half asleep, with his long, thin 
form curled up in the recesses of his armchair. A formidable 
array of bottles and test-tubes, with the pungent cleanly smell 
of hydrochloric acid, told me that he had spent his day in the 
chemical work which was so dear to him.";0;1;0;0;0;1;1;1;0;0;none;0;0""",
        """TAIN27;"On the previous morning, two gentlemen had called to see his 
master. They were Italians, and the elder of the two, a man of about 
forty, gave his name as Signor Ascanio. 
The younger was a well-dressed lad of about twenty-four. 
Count Foscatini was evidently prepared for their visit and 
immediately sent Graves out upon some trivial errand. 
Here the man paused and hesitated in his story. 
In the end, however, he admitted that, curious as to the purport 
of the interview, he had not obeyed immediately, but had lingered 
about endeavouring to hear something of what was going on. 
The conversation was carried on in so low a tone that he was not 
as successful as he had hoped; but he gathered enough to make it 
clear that some kind of monetary proposition was being discussed, 
and that the basis of it was a threat. 
The discussion was anything but amicable. 
In the end, Count Foscatini raised his voice slightly, and the 
listener heard these words clearly:  'I have no time to argue 
further now, gentlemen. 
If you will dine with me to-morrow night at eight o’clock, we will
 resume the discussion.'  Afraid of being discovered listening, 
Graves had then hurried out to do his master’s errand. 
This evening the two men had arrived punctually at eight. 
During dinner they had talked of indifferent matters—politics, the
 weather, and the theatrical world. 
When Graves had placed the port upon the table and brought in the 
coffee his master told him that he might have the evening off. 
'Was that a usual proceeding of his when he had guests?' asked the
 inspector. 
'No, sir; it wasn’t. 
That’s what made me think it must be some business of a very 
unusual kind that he was going to discuss with these gentlemen.'  
That finished Graves’s story. 
He had gone out about 8.30, and, meeting a friend, had accompanied
 him to the Metropolitan Music Hall in Edgware Road. 
Nobody had seen the two men leave, but the time of the murder was 
fixed clearly enough at 8.47. 
A small clock on the writing-table had been swept off by 
Foscatini’s arm, and had stopped at that hour, which agreed with 
Miss Rider’s telephone summons. 
The police surgeon had made his examination of the body, and it 
was now lying on the couch. 
I saw the face for the first time—the olive complexion, the long 
nose, the luxuriant black moustache, and the full red lips drawn 
back from the dazzlingly white teeth. 
Not altogether a pleasant face. 
'Well,' said the inspector, refastening his notebook. 
'The case seems clear enough. 
The only difficulty will be to lay our hands on this Signor 
Ascanio. 
I suppose his address is not in the dead man’s pocket-book by any 
chance?'  As Poirot had said, the late Foscatini was an orderly 
man. 
Neatly written in small, precise handwriting was the inscription, 
'Signor Paolo Ascanio, Grosvenor Hotel.'  The inspector busied 
himself with the telephone, then turned to us with a grin. 
'Just in time. 
Our fine gentleman was off to catch the boat train to the 
Continong. 
Well, gentlemen, that’s about all we can do here. 
It’s a bad business, but straightforward enough. 
One of these Italian vendetta things, as likely as not.'  Thus 
airily dismissed, we found our way downstairs. 
Dr. Hawker was full of excitement. 
'Like the beginning of a novel, eh? 
Real exciting stuff. 
Wouldn’t believe it if you read about it.'  Poirot did not speak. 
He was very thoughtful. 
All the evening he had hardly opened his lips. 
'What says the master detective, eh?' asked Hawker, clapping him 
on the back. 
'Nothing to work your grey cells over this time.'  'You think 
not?'  'What could there be?'  'Well, for example, there is the 
window.'  'The window? 
But it was fastened. 
Nobody could have got out or in that way. 
I noticed it specially.'  'And why were you able to notice it?'  
The doctor looked puzzled. 
Poirot hastened to explain. 
'It is to the curtains I refer. 
They were not drawn. 
A little odd, that. 
And then there was the coffee. 
It was very black coffee.'  'Well, what of it?'  'Very black,' 
repeated Poirot. 
'In conjunction with that let us remember that very little of the 
rice soufflé was eaten, and we get—what?'  'Moonshine,' laughed 
the doctor. 
'You’re pulling my leg.'  'Never do I pull the leg. 
Hastings here knows that I am perfectly serious.'  'I don’t know 
what you are getting at, all the same,' I confessed. 
'You don’t suspect the manservant, do you? 
He might have been in with the gang, and put some dope in the 
coffee. 
I suppose they’ll test his alibi?'  'Without doubt, my friend; but
 it is the alibi of Signor Ascanio that interests me.'  'You think
 he has an alibi?'  'That is just what worries me. 
I have no doubt that we shall soon be enlightened on that point.'
 The _Daily Newsmonger_ enabled us to become conversant with 
succeeding events. 
Signor Ascanio was arrested and charged with the murder of Count 
Foscatini. 
When arrested, he denied knowing the Count, and declared he had 
never been near Regent’s Court either on the evening of the crime 
or on the previous morning. 
The younger man had disappeared entirely. 
Signor Ascanio had arrived alone at the Grosvenor Hotel from the 
Continent two days before the murder. 
All efforts to trace the second man failed. 
Ascanio, however, was not sent for trial. 
No less a personage than the Italian Ambassador himself came 
forward and testified at the police-court proceedings that Ascanio
 had been with him at the Embassy from eight till nine that 
evening. 
The prisoner was discharged. 
Naturally, a lot of people thought that the crime was a political 
one, and was being deliberately hushed up. 
Poirot had taken a keen interest in all these points. 
Nevertheless, I was somewhat surprised when he suddenly informed 
me one morning that he was expecting a visitor at eleven o’clock, 
and that that visitor was none other than Ascanio himself. 
'He wishes to consult you?'  '_Du tout_, Hastings. 
I wish to consult him.'  'What about?'  'The Regent’s Court 
murder.'  'You are going to prove that he did it?'  'A man cannot 
be tried twice for murder, Hastings. 
Endeavour to have the common sense. 
Ah, that is our friend’s ring. A few minutes later Signor 
Ascanio was ushered in—a small, thin man with a secretive and 
furtive glance in his eyes. 
He remained standing, darting suspicious glances from one to the 
other of us. 
'Monsieur Poirot?'  My little friend tapped himself gently on the 
chest. 
'Be seated, signor. 
You received my note. 
I am determined to get to the bottom of this mystery. 
In some small measure you can aid me. 
Let us commence. 
You—in company with a friend—visited the late Count Foscatini on 
the morning of Tuesday the 9th——'  The Italian made an angry 
gesture. 
'I did nothing of the sort. 
I have sworn in court——'  '_Précisément_—and I have a little idea 
that you have sworn falsely.'  'You threaten me? 
Bah! 
I have nothing to fear from you. 
I have been acquitted.'  'Exactly; and as I am not an imbecile, it
 is not with the gallows I threaten you—but with publicity. 
Publicity! 
I see that you do not like the word. 
I had an idea that you would not. 
My little ideas, you know, they are very valuable to me. 
Come, signor, your only chance is to be frank with me. 
I do not ask to know whose indiscretions brought you to England. 
I know this much, you came for the especial purpose of seeing 
Count Foscatini.'  'He was not a count,' growled the Italian. 
'I have already noted the fact that his name does not appear in 
the _Almanach de Gotha_. 
Never mind, the title of count is often useful in the profession 
of blackmailing.'  'I suppose I might as well be frank. 
You seem to know a good deal.'  'I have employed my grey cells to 
some advantage. 
Come, Signor Ascanio, you visited the dead man on the Tuesday 
morning—that is so, is it not?'  'Yes; but I never went there on 
the following evening. 
There was no need. 
I will tell you all.';0;0;0;0;0;0;1;0;0;0;none;0;0"""
    ]
    examples_str = "\n".join(examples)

    prompt_template = f"""
You are an expert in narratology, annotating literary texts based on the modular guidelines from Heyns and Van Zaanen (2024) for mystery novels (whodunits). For each text segment, output a CSV row with these columns: SANTA_ID;Text;Scene;Summary;Descriptive_passage;Analepsis;Prolepsis;Extradiegetic;Intradiegetic;Metadiegetic;Focalization;Voice_homodiegetic;Voice_heterodiegetic.
Here are the definitions for each tag to guide your annotation:

Scene: A segment of narrative discourse that presents the histoire (story), typically involving a coherent sequence of events with specific characters, time, and place, annotated using the SCENE tag.
Summary: A non-scene where events are condensed or narrated briefly, often as a sub-scene within a broader scene, assigned as a property of NON-SCENE.
Descriptive_passage: A non-scene focused on description rather than events, providing detailed pauses in the narrative for setting or character details, assigned as a property of NON-SCENE.
Analepsis: A flashback that shifts the narrative time from the present to the past, tagged as ANALEPSIS, which can be embedded or interruptive.
Prolepsis: A flash-forward occurring in forms like visions, prophecies, or foreshadowing, shifting narrative time to the future, tagged as PROLEPSIS, which can be embedded or interruptive.
Extradiegetic: The level of the narrator or implied author outside the story world, annotated with NARRATOR and value 0, potentially including metatextuality (meta).
Intradiegetic: The diegetic level of characters and events within the story, annotated with value 1 (and letters like 1a, 1b for sequential arrangement).
Metadiegetic: A secondary narrative embedded within the primary diegetic level, such as stories told by characters, annotated with value 2 (and letters for arrangement).
Focalization: The perspective from which the narrative is seen, indicating the narrator's access to information; can be zero/unrestricted (omniscient, knows more than characters), internal (limited to a character's knowledge), or external (observes without internal access); tag with FOCALIZATION and properties like EMBEDDED or INTERRUPTIVE.
Voice_homodiegetic: When the narrator appears in the story as a character, usually referring to themselves in the first person.
Voice_heterodiegetic: When the narrator does not appear in the story, with narration mostly in the third person.

Use 1 for 'yes' and 0 for 'no' in binary columns (e.g., Scene). 
Examples:
{examples_str}

Now annotate this new segment:
ID: {id}
Text: {text}
Output only the CSV row (no extra text).
"""

    annotations = []
    start_time = time.time()
    for i, (text, id_val) in enumerate(zip(test_texts, test_ids)):
        print(f"Processing row {i+1} of {len(test_texts)}...")
        prompt = prompt_template.format(id=id_val, text=text)
        response = ollama.chat(model='llama3:8b', messages=[{'role': 'user', 'content': prompt}], options={'temperature': 0})
        annotated_row = response['message']['content'].strip()
        annotations.append(annotated_row)
    end_time = time.time()
    total_time = end_time - start_time
    print(f"Total processing time: {total_time:.2f} seconds")
    
    # Parse annotations into a structured format for metrics
    pred_labels = []
    for row in annotations:
        # Split and convert to list of floats (skip SANTA_ID and Text)
        parts = row.split(';')[2:]  # Skip first two columns
        pred_labels.append([float(x) for x in parts])
    return np.array(pred_labels)

def compare_models():
    models = {
        'Santa': 'Finaal_data\\SANTA',
        'Mystery': 'Finaal_data\\MD',
        'Combined': 'Finaal_data\\Combination'
    }
    output_dir = './fine_tuned_models'
    results = {}
    
    # Aggregate data from all folders for a single train-test split
    all_texts, all_labels, num_labels, all_ids = [], [], 0, []
    for model_name, folder_path in models.items():
        first_csv = next((f for f in os.listdir(folder_path) if f.endswith('.csv')), None)
        if not first_csv:
            raise ValueError(f"No CSV files found in {folder_path}")
        csv_path = os.path.join(folder_path, first_csv)
        texts, labels, n_labels, ids = load_data(csv_path)
        all_texts.extend(texts)
        all_labels.extend(labels)
        all_ids.extend(ids)
        num_labels = n_labels  # Assume consistent number of labels
    
    # Single train-test split (80-20)
    train_texts, test_texts, train_labels, test_labels = train_test_split(
        all_texts, all_labels, test_size=0.2, random_state=SEED
    )
    train_ids, test_ids = train_test_split(all_ids, test_size=0.2, random_state=SEED)
    
    # Train and evaluate each RoBERTa model
    for model_name, folder_path in models.items():
        results[model_name] = train_and_evaluate_model(
            model_name, folder_path, output_dir, train_texts, train_labels, test_texts, test_labels
        )
    
    # Get Ollama predictions on the test set
    ollama_preds = get_ollama_predictions(test_texts, test_ids, num_labels)
    true_labels = np.array(test_labels)
    ollama_f1 = f1_score(true_labels, ollama_preds, average='micro')
    ollama_roc_auc = roc_auc_score(true_labels, ollama_preds, average='micro')
    ollama_acc = accuracy_score(true_labels, ollama_preds)
    results['Ollama'] = {'f1': ollama_f1, 'roc_auc': ollama_roc_auc, 'accuracy': ollama_acc}
    
    # Summary table
    summary_df = pd.DataFrame(results).T.round(4)
    print("\nOverall Metrics Summary:")
    print(summary_df)
    summary_df.to_csv('comparison_summary.csv', index=True)

if __name__ == "__main__":
    compare_models()

Map:   0%|          | 0/48 [00:00<?, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
1,No log,0.693371,0.0,0.589732,0.0
2,No log,0.692847,0.0,0.592411,0.0
3,No log,0.691936,0.0,0.598214,0.0


Model Santa fine-tuned and evaluated, saved to ./fine_tuned_models/Santa


Map:   0%|          | 0/48 [00:00<?, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
1,No log,0.704055,0.0,0.277679,0.0
2,No log,0.70355,0.0,0.2875,0.0
3,No log,0.702659,0.0,0.304018,0.0


Model Mystery fine-tuned and evaluated, saved to ./fine_tuned_models/Mystery


Map:   0%|          | 0/48 [00:00<?, ? examples/s]

Map:   0%|          | 0/12 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
1,No log,0.704055,0.0,0.277679,0.0
2,No log,0.70355,0.0,0.2875,0.0
3,No log,0.702659,0.0,0.304018,0.0


Model Combined fine-tuned and evaluated, saved to ./fine_tuned_models/Combined


UnboundLocalError: cannot access local variable 'text' where it is not associated with a value

In [9]:
import os
import pandas as pd
import torch
import numpy as np
import random
from sklearn.model_selection import KFold
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import Dataset
from datasets.features import Features, Value, Sequence
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from typing import List, Dict
import ollama
import time

# Set random seeds for reproducibility
SEED = 1
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Unified data loading
def load_data(csv_path: str) -> tuple[List[str], List[List[float]], int, List[str]]:
    encodings = ['utf-8', 'latin-1', 'cp1252']
    for encoding in encodings:
        try:
            df = pd.read_csv(csv_path, encoding=encoding, delimiter=';', quotechar='"', on_bad_lines='warn')
            if not df.empty:
                break
        except UnicodeDecodeError:
            continue
    else:
        raise UnicodeDecodeError(f"Failed to decode {csv_path} with tried encodings: {encodings}")
    
    text_column = 'Text'
    all_columns = df.columns.tolist()
    if 'SANTA_ID' in all_columns:
        ids = df['SANTA_ID'].tolist()
        all_columns.remove('SANTA_ID')
    else:
        ids = [f"ID_{i}" for i in range(len(df))]
    if text_column in all_columns:
        all_columns.remove(text_column)
    
    # Remove the last 3 columns to match 11 labels
    label_columns = all_columns[:-3]
    
    # Regularize label columns
    df[label_columns] = df[label_columns].apply(pd.to_numeric, errors='coerce').fillna(0)
    
    texts = df[text_column].tolist()
    labels = df[label_columns].values.astype(float).tolist()
    return texts, labels, len(label_columns), ids

def tokenize_function(examples, tokenizer):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = (pred.predictions > 0.5).astype(float)
    f1 = f1_score(labels, preds, average='micro')
    roc_auc = roc_auc_score(labels, pred.predictions, average='micro')
    acc = accuracy_score(labels, preds)
    return {'f1': f1, 'roc_auc': roc_auc, 'accuracy': acc}

# Function to perform k-fold training and evaluation
def train_with_kfold(model_name: str, folder_path: str, output_dir: str, k: int = 5, epochs: int = 20, batch_size: int = 16) -> Dict:
    all_texts, all_labels, num_labels, all_ids = [], [], 0, []
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            csv_path = os.path.join(folder_path, filename)
            texts, labels, n_labels, ids = load_data(csv_path)
            all_texts.extend(texts)
            all_labels.extend(labels)
            all_ids.extend(ids)
            num_labels = n_labels  # Assume consistent number of labels
    
    if len(all_texts) < k:
        raise ValueError(f"Insufficient samples ({len(all_texts)}) for {k}-fold cross-validation")
    
    kf = KFold(n_splits=k, shuffle=True, random_state=SEED)
    fold_results = []
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(all_texts)):
        print(f"Training {model_name} fold {fold + 1}/{k}...")
        train_texts = [all_texts[i] for i in train_idx]
        test_texts = [all_texts[i] for i in test_idx]
        train_labels = [all_labels[i] for i in train_idx]
        test_labels = [all_labels[i] for i in test_idx]
        train_ids = [all_ids[i] for i in train_idx]
        test_ids = [all_ids[i] for i in test_idx]
        
        features = Features({'text': Value('string'), 'labels': Sequence(Value('float32'))})
        train_dataset_dict = {'text': train_texts, 'labels': train_labels}
        test_dataset_dict = {'text': test_texts, 'labels': test_labels}
        
        train_dataset = Dataset.from_dict(train_dataset_dict, features=features)
        test_dataset = Dataset.from_dict(test_dataset_dict, features=features)
        
        tokenizer = RobertaTokenizer.from_pretrained('FacebookAI/roberta-base')
        train_dataset = train_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
        test_dataset = test_dataset.map(lambda x: tokenize_function(x, tokenizer), batched=True)
        
        train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
        test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
        
        model = RobertaForSequenceClassification.from_pretrained(
            'FacebookAI/roberta-base', num_labels=num_labels, problem_type='multi_label_classification'
        )
        
        training_args = TrainingArguments(
            output_dir=f"{output_dir}/{model_name}_fold_{fold + 1}",
            num_train_epochs=epochs,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=10,
            eval_strategy='steps',
            eval_steps=100,
            save_strategy='steps',
            save_steps=100,
            save_total_limit=3,
            load_best_model_at_end=True,
            metric_for_best_model='f1',
            seed=SEED
        )
        
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
        )
        
        trainer.train()
        eval_results = trainer.evaluate()
        fold_results.append(eval_results)
        trainer.save_model(f"{output_dir}/{model_name}_fold_{fold + 1}")
        tokenizer.save_pretrained(f"{output_dir}/{model_name}_fold_{fold + 1}")
    
    avg_results = {k: np.mean([r[k] for r in fold_results]) for k in fold_results[0].keys() if k not in ['eval_runtime', 'eval_samples_per_second', 'eval_steps_per_second']}
    return avg_results

# Ollama prediction function
def get_ollama_predictions(test_texts: List[str], test_ids: List[str], num_labels: int) -> np.ndarray:
    examples = [
        """ACI08;"Mr. Hosmer Angel came to the house again and proposed that we 
should marry before father came back. He was in dreadful earnest 
and made me swear, with my hands on the Testament, that whatever 
happened I would always be true to him. Mother said he was quite 
right to make me swear, and that it was a sign of his passion. 
Mother was all in his favour from the first and was even fonder 
of him than I was. Then, when they talked of marrying within the 
week, I began to ask about father; but they both said never to 
mind about father, but just to tell him afterwards, and mother 
said she would make it all right with him.";0;0;0;1;0;1;1;0;0;0;none;0;0""",
        """ACI05;"I had had so many reasons to believe in my friend's subtle powers 
of reasoning and extraordinary energy in action that I felt that 
he must have some solid grounds for the assured and easy 
demeanour with which he treated the singular mystery which he had 
been called upon to fathom. Once only had I known him to fail, in 
the case of the King of Bohemia and of the Irene Adler 
photograph; but when I looked back to the weird business of the 
Sign of Four, and the extraordinary circumstances connected with 
the Study in Scarlet, I felt that it would be a strange tangle 
indeed which he could not unravel. 
I left him then, still puffing at his black clay pipe, with the 
conviction that when I came again on the next evening I would 
find that he held in his hands all the clues which would lead up 
to the identity of the disappearing bridegroom of Miss Mary 
Sutherland. 
A professional case of great gravity was engaging my own 
attention at the time, and the whole of next day I was busy at 
the bedside of the sufferer. It was not until close upon six 
o'clock that I found myself free and was able to spring into a 
hansom and drive to Baker Street, half afraid that I might be too 
late to assist at the dénouement of the little mystery. I found 
Sherlock Holmes alone, however, half asleep, with his long, thin 
form curled up in the recesses of his armchair. A formidable 
array of bottles and test-tubes, with the pungent cleanly smell 
of hydrochloric acid, told me that he had spent his day in the 
chemical work which was so dear to him.";0;1;0;0;0;1;1;1;0;0;none;0;0""",
        """TAIN27;"On the previous morning, two gentlemen had called to see his 
master. They were Italians, and the elder of the two, a man of about 
forty, gave his name as Signor Ascanio. 
The younger was a well-dressed lad of about twenty-four. 
Count Foscatini was evidently prepared for their visit and 
immediately sent Graves out upon some trivial errand. 
Here the man paused and hesitated in his story. 
In the end, however, he admitted that, curious as to the purport 
of the interview, he had not obeyed immediately, but had lingered 
about endeavouring to hear something of what was going on. 
The conversation was carried on in so low a tone that he was not 
as successful as he had hoped; but he gathered enough to make it 
clear that some kind of monetary proposition was being discussed, 
and that the basis of it was a threat. 
The discussion was anything but amicable. 
In the end, Count Foscatini raised his voice slightly, and the 
listener heard these words clearly:  'I have no time to argue 
further now, gentlemen. 
If you will dine with me to-morrow night at eight o’clock, we will
 resume the discussion.'  Afraid of being discovered listening, 
Graves had then hurried out to do his master’s errand. 
This evening the two men had arrived punctually at eight. 
During dinner they had talked of indifferent matters—politics, the
 weather, and the theatrical world. 
When Graves had placed the port upon the table and brought in the 
coffee his master told him that he might have the evening off. 
'Was that a usual proceeding of his when he had guests?' asked the
 inspector. 
'No, sir; it wasn’t. 
That’s what made me think it must be some business of a very 
unusual kind that he was going to discuss with these gentlemen.'  
That finished Graves’s story. 
He had gone out about 8.30, and, meeting a friend, had accompanied
 him to the Metropolitan Music Hall in Edgware Road. 
Nobody had seen the two men leave, but the time of the murder was 
fixed clearly enough at 8.47. 
A small clock on the writing-table had been swept off by 
Foscatini’s arm, and had stopped at that hour, which agreed with 
Miss Rider’s telephone summons. 
The police surgeon had made his examination of the body, and it 
was now lying on the couch. 
I saw the face for the first time—the olive complexion, the long 
nose, the luxuriant black moustache, and the full red lips drawn 
back from the dazzlingly white teeth. 
Not altogether a pleasant face. 
'Well,' said the inspector, refastening his notebook. 
'The case seems clear enough. 
The only difficulty will be to lay our hands on this Signor 
Ascanio. 
I suppose his address is not in the dead man’s pocket-book by any 
chance?'  As Poirot had said, the late Foscatini was an orderly 
man. 
Neatly written in small, precise handwriting was the inscription, 
'Signor Paolo Ascanio, Grosvenor Hotel.'  The inspector busied 
himself with the telephone, then turned to us with a grin. 
'Just in time. 
Our fine gentleman was off to catch the boat train to the 
Continong. 
Well, gentlemen, that’s about all we can do here. 
It’s a bad business, but straightforward enough. 
One of these Italian vendetta things, as likely as not.'  Thus 
airily dismissed, we found our way downstairs. 
Dr. Hawker was full of excitement. 
'Like the beginning of a novel, eh? 
Real exciting stuff. 
Wouldn’t believe it if you read about it.'  Poirot did not speak. 
He was very thoughtful. 
All the evening he had hardly opened his lips. 
'What says the master detective, eh?' asked Hawker, clapping him 
on the back. 
'Nothing to work your grey cells over this time.'  'You think 
not?'  'What could there be?'  'Well, for example, there is the 
window.'  'The window? 
But it was fastened. 
Nobody could have got out or in that way. 
I noticed it specially.'  'And why were you able to notice it?'  
The doctor looked puzzled. 
Poirot hastened to explain. 
'It is to the curtains I refer. 
They were not drawn. 
A little odd, that. 
And then there was the coffee. 
It was very black coffee.'  'Well, what of it?'  'Very black,' 
repeated Poirot. 
'In conjunction with that let us remember that very little of the 
rice soufflé was eaten, and we get—what?'  'Moonshine,' laughed 
the doctor. 
'You’re pulling my leg.'  'Never do I pull the leg. 
Hastings here knows that I am perfectly serious.'  'I don’t know 
what you are getting at, all the same,' I confessed. 
'You don’t suspect the manservant, do you? 
He might have been in with the gang, and put some dope in the 
coffee. 
I suppose they’ll test his alibi?'  'Without doubt, my friend; but
 it is the alibi of Signor Ascanio that interests me.'  'You think
 he has an alibi?'  'That is just what worries me. 
I have no doubt that we will soon be enlightened on that point.'
 The _Daily Newsmonger_ enabled us to become conversant with 
succeeding events. 
Signor Ascanio was arrested and charged with the murder of Count 
Foscatini. 
When arrested, he denied knowing the Count, and declared he had 
never been near Regent’s Court either on the evening of the crime 
or on the previous morning. 
The younger man had disappeared entirely. 
Signor Ascanio had arrived alone at the Grosvenor Hotel from the 
Continent two days before the murder. 
All efforts to trace the second man failed. 
Ascanio, however, was not sent for trial. 
No less a personage than the Italian Ambassador himself came 
forward and testified at the police-court proceedings that Ascanio
 had been with him at the Embassy from eight till nine that 
evening. 
The prisoner was discharged. 
Naturally, a lot of people thought that the crime was a political 
one, and was being deliberately hushed up. 
Poirot had taken a keen interest in all these points. 
Nevertheless, I was somewhat surprised when he suddenly informed 
me one morning that he was expecting a visitor at eleven o’clock, 
and that that visitor was none other than Ascanio himself. 
'He wishes to consult you?'  '_Du tout_, Hastings. 
I wish to consult him.'  'What about?'  'The Regent’s Court 
murder.'  'You are going to prove that he did it?'  'A man cannot 
be tried twice for murder, Hastings. 
Endeavour to have the common sense. 
Ah, that is our friend’s ring. A few minutes later Signor 
Ascanio was ushered in—a small, thin man with a secretive and 
furtive glance in his eyes. 
He remained standing, darting suspicious glances from one to the 
other of us. 
'Monsieur Poirot?'  My little friend tapped himself gently on the 
chest. 
'Be seated, signor. 
You received my note. 
I am determined to get to the bottom of this mystery. 
In some small measure you can aid me. 
Let us commence. 
You—in company with a friend—visited the late Count Foscatini on 
the morning of Tuesday the 9th——'  The Italian made an angry 
gesture. 
'I did nothing of the sort. 
I have sworn in court——'  '_Précisément_—and I have a little idea 
that you have sworn falsely.'  'You threaten me? 
Bah! 
I have nothing to fear from you. 
I have been acquitted.'  'Exactly; and as I am not an imbecile, it
 is not with the gallows I threaten you—but with publicity. 
Publicity! 
I see that you do not like the word. 
I had an idea that you would not. 
My little ideas, you know, they are very valuable to me. 
Come, signor, your only chance is to be frank with me. 
I do not ask to know whose indiscretions brought you to England. 
I know this much, you came for the especial purpose of seeing 
Count Foscatini.'  'He was not a count,' growled the Italian. 
'I have already noted the fact that his name does not appear in 
the _Almanach de Gotha_. 
Never mind, the title of count is often useful in the profession 
of blackmailing.'  'I suppose I might as well be frank. 
You seem to know a good deal.'  'I have employed my grey cells to 
some advantage. 
Come, Signor Ascanio, you visited the dead man on the Tuesday 
morning—that is so, is it not?'  'Yes; but I never went there on 
the following evening. 
There was no need. 
I will tell you all.';0;0;0;0;0;0;1;0;0;0;none;0;0"""
    ]
    examples_str = "\n".join(examples)

    prompt_template = f"""
You are an expert in narratology, annotating literary texts based on the modular guidelines from Heyns and Van Zaanen (2024) for mystery novels (whodunits). For each text segment, output a CSV row with these columns: SANTA_ID;Text;Scene;Summary;Descriptive_passage;Analepsis;Prolepsis;Extradiegetic;Intradiegetic;Metadiegetic;Focalization;Voice_homodiegetic;Voice_heterodiegetic.
Here are the definitions for each tag to guide your annotation:

Scene: A segment of narrative discourse that presents the histoire (story), typically involving a coherent sequence of events with specific characters, time, and place, annotated using the SCENE tag.
Summary: A non-scene where events are condensed or narrated briefly, often as a sub-scene within a broader scene, assigned as a property of NON-SCENE.
Descriptive_passage: A non-scene focused on description rather than events, providing detailed pauses in the narrative for setting or character details, assigned as a property of NON-SCENE.
Analepsis: A flashback that shifts the narrative time from the present to the past, tagged as ANALEPSIS, which can be embedded or interruptive.
Prolepsis: A flash-forward occurring in forms like visions, prophecies, or foreshadowing, shifting narrative time to the future, tagged as PROLEPSIS, which can be embedded or interruptive.
Extradiegetic: The level of the narrator or implied author outside the story world, annotated with NARRATOR and value 0, potentially including metatextuality (meta).
Intradiegetic: The diegetic level of characters and events within the story, annotated with value 1 (and letters like 1a, 1b for sequential arrangement).
Metadiegetic: A secondary narrative embedded within the primary diegetic level, such as stories told by characters, annotated with value 2 (and letters for arrangement).
Focalization: The perspective from which the narrative is seen, indicating the narrator's access to information; can be zero/unrestricted (omniscient, knows more than characters), internal (limited to a character's knowledge), or external (observes without internal access); tag with FOCALIZATION and properties like EMBEDDED or INTERRUPTIVE.
Voice_homodiegetic: When the narrator appears in the story as a character, usually referring to themselves in the first person.
Voice_heterodiegetic: When the narrator does not appear in the story, with narration mostly in the third person.

Use 1 for 'yes' and 0 for 'no' in binary columns (e.g., Scene). 
Examples:
{examples_str}

Now annotate this new segment:
ID: {id}
Text: {text}
Output only the CSV row (no extra text).
"""

    annotations = []
    start_time = time.time()
    for text, id_val in zip(test_texts, test_ids):
        print(f"Processing row for ID: {id_val}")
        prompt = prompt_template.format(id=id_val, text=text)
        response = ollama.chat(model='llama3:8b', messages=[{'role': 'user', 'content': prompt}], options={'temperature': 0})
        annotated_row = response['message']['content'].strip()
        annotations.append(annotated_row)
    end_time = time.time()
    total_time = end_time - start_time
    print(f"Total processing time: {total_time:.2f} seconds")
    
    # Parse annotations into a structured format for metrics
    pred_labels = []
    for row in annotations:
        parts = row.split(';')[2:]  # Skip SANTA_ID and Text
        pred_labels.append([float(x) for x in parts])
    return np.array(pred_labels)

def compare_models():
    models = {
        'Santa': 'Finaal_data\\SANTA',
        'Mystery': 'Finaal_data\\MD',
        'Combined': 'Finaal_data\\Combination'
    }
    output_dir = './fine_tuned_models'
    results = {}
    
    # Perform k-fold for each model separately
    for model_name, folder_path in models.items():
        results[model_name] = train_with_kfold(model_name, folder_path, output_dir, k=5, epochs=20, batch_size=16)
    
    # Prepare Combined data for Ollama evaluation
    all_texts, all_labels, num_labels, all_ids = [], [], 0, []
    folder_path = models['Combined']
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            csv_path = os.path.join(folder_path, filename)
            texts, labels, n_labels, ids = load_data(csv_path)
            all_texts.extend(texts)
            all_labels.extend(labels)
            all_ids.extend(ids)
            num_labels = n_labels  # Assume consistent number of labels
    
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    ollama_fold_results = []
    
    for fold, (train_idx, test_idx) in enumerate(kf.split(all_texts)):
        print(f"Evaluating Ollama fold {fold + 1}/5...")
        test_texts = [all_texts[i] for i in test_idx]
        test_labels = [all_labels[i] for i in test_idx]
        test_ids = [all_ids[i] for i in test_idx]
        
        ollama_preds = get_ollama_predictions(test_texts, test_ids, num_labels)
        true_labels = np.array(test_labels)
        f1 = f1_score(true_labels, ollama_preds, average='micro')
        roc_auc = roc_auc_score(true_labels, ollama_preds, average='micro')
        acc = accuracy_score(true_labels, ollama_preds)
        ollama_fold_results.append({'f1': f1, 'roc_auc': roc_auc, 'accuracy': acc})
    
    avg_ollama_results = {k: np.mean([r[k] for r in ollama_fold_results]) for k in ollama_fold_results[0].keys()}
    results['Ollama'] = avg_ollama_results
    
    # Summary table
    summary_df = pd.DataFrame(results).T.round(4)
    print("\nOverall Metrics Summary:")
    print(summary_df)
    summary_df.to_csv('comparison_summary.csv', index=True)

if __name__ == "__main__":
    compare_models()

Training Santa fold 1/5...


Map:   0%|          | 0/1873 [00:00<?, ? examples/s]

Map:   0%|          | 0/469 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.4155,0.409222,0.0,0.797435,0.093817
200,0.3179,0.318321,0.544,0.892455,0.373134
300,0.2819,0.323426,0.584416,0.883699,0.381663
400,0.261,0.303212,0.618045,0.911246,0.434968
500,0.1885,0.310326,0.61165,0.90488,0.4371
600,0.2184,0.291808,0.648327,0.918697,0.458422
700,0.1916,0.302428,0.63315,0.909639,0.466951
800,0.2111,0.310432,0.636637,0.910249,0.45629
900,0.1623,0.311749,0.661765,0.914587,0.503198
1000,0.1652,0.332504,0.663345,0.909226,0.530917


Training Santa fold 2/5...


Map:   0%|          | 0/1873 [00:00<?, ? examples/s]

Map:   0%|          | 0/469 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.4033,0.400968,0.296952,0.824486,0.081023
200,0.3277,0.299075,0.557118,0.913764,0.360341
300,0.2731,0.27405,0.671192,0.923032,0.460554
400,0.2605,0.281759,0.701876,0.921231,0.477612
500,0.2826,0.2725,0.6407,0.921372,0.441365
600,0.2377,0.268914,0.707825,0.926846,0.515991
700,0.2458,0.27218,0.649652,0.925455,0.464819
800,0.2248,0.27537,0.658499,0.924118,0.481876
900,0.185,0.295738,0.680203,0.91732,0.520256
1000,0.1832,0.288113,0.702006,0.923271,0.565032


Training Santa fold 3/5...


Map:   0%|          | 0/1874 [00:00<?, ? examples/s]

Map:   0%|          | 0/468 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.4205,0.396777,0.0,0.836207,0.074786
200,0.3318,0.311506,0.432379,0.907619,0.292735
300,0.2753,0.279322,0.669162,0.922881,0.448718
400,0.2825,0.281901,0.626794,0.920306,0.42094
500,0.2345,0.298049,0.627543,0.90952,0.444444
600,0.2168,0.278113,0.710035,0.927709,0.523504
700,0.2198,0.267333,0.688188,0.930581,0.508547
800,0.2335,0.274551,0.646081,0.925681,0.463675
900,0.175,0.278854,0.705801,0.934951,0.542735
1000,0.1498,0.280669,0.700428,0.932178,0.534188


Training Santa fold 4/5...


Map:   0%|          | 0/1874 [00:00<?, ? examples/s]

Map:   0%|          | 0/468 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.4135,0.384575,0.010363,0.848815,0.07906
200,0.3145,0.313484,0.52305,0.899827,0.356838
300,0.2956,0.301029,0.60834,0.905213,0.40812
400,0.3231,0.302876,0.55794,0.899275,0.361111
500,0.2235,0.292567,0.645802,0.911122,0.457265
600,0.233,0.293338,0.624214,0.914067,0.435897
700,0.2148,0.28521,0.593776,0.917565,0.403846
800,0.1712,0.293266,0.597164,0.918162,0.416667
900,0.1686,0.314899,0.659957,0.913769,0.482906
1000,0.1518,0.325064,0.628615,0.912044,0.474359


Training Santa fold 5/5...


Map:   0%|          | 0/1874 [00:00<?, ? examples/s]

Map:   0%|          | 0/468 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.4296,0.412602,0.0,0.825189,0.066239
200,0.3245,0.307884,0.662816,0.916358,0.42094
300,0.2947,0.305888,0.500445,0.914091,0.33547
400,0.2655,0.279082,0.645358,0.923685,0.431624
500,0.2396,0.286276,0.666194,0.922303,0.476496
600,0.2258,0.286033,0.68917,0.923378,0.50641
700,0.2057,0.28501,0.675381,0.923957,0.457265
800,0.2612,0.27588,0.639571,0.929339,0.448718
900,0.1882,0.286252,0.669123,0.925873,0.461538
1000,0.1579,0.296052,0.680115,0.925567,0.5


Training Mystery fold 1/5...


Map:   0%|          | 0/229 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.3266,0.343149,0.0,0.848003,0.310345
200,0.2366,0.28585,0.536585,0.883171,0.465517
300,0.1266,0.330557,0.589928,0.842074,0.517241


Training Mystery fold 2/5...


Map:   0%|          | 0/229 [00:00<?, ? examples/s]

Map:   0%|          | 0/58 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.3301,0.325412,0.0,0.873683,0.310345
200,0.2382,0.278525,0.592,0.91062,0.534483
300,0.1575,0.318561,0.473684,0.884633,0.413793


Training Mystery fold 3/5...


Map:   0%|          | 0/230 [00:00<?, ? examples/s]

Map:   0%|          | 0/57 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.3446,0.299403,0.0,0.888263,0.385965
200,0.2303,0.322293,0.601626,0.855103,0.561404
300,0.165,0.377526,0.470588,0.819439,0.421053


Training Mystery fold 4/5...


Map:   0%|          | 0/230 [00:00<?, ? examples/s]

Map:   0%|          | 0/57 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.3251,0.33528,0.0,0.858285,0.280702
200,0.2091,0.352364,0.407407,0.841977,0.403509
300,0.1573,0.343962,0.515152,0.877529,0.491228


Training Mystery fold 5/5...


Map:   0%|          | 0/230 [00:00<?, ? examples/s]

Map:   0%|          | 0/57 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
100,0.3293,0.314106,0.0,0.887909,0.315789
200,0.2411,0.281446,0.5,0.903128,0.473684
300,0.1581,0.367275,0.592105,0.905908,0.491228


Training Combined fold 1/5...


Map:   0%|          | 0/2102 [00:00<?, ? examples/s]

Map:   0%|          | 0/526 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: stack expects each tensor to be equal size, but got [8] at entry 0 and [11] at entry 3

Train slegs combined en few-Shot