Adapted from the following, but changed to handle multi-label
https://github.com/VanekPetr/flan-t5-text-classifier/blob/main/classifier/AutoModelForSeq2SeqLM/flan-t5-finetuning.py

# Login to Hugging Face

In [1]:
#!pip install -q transformers datasets sentencepiece accelerate evaluate hf_transfer huggingface_hub scikit-learn protobuf nltk

In [2]:
from huggingface_hub import login
import os

login(token=os.getenv("HF_TOKEN"))

#from huggingface_hub import notebook_login
#notebook_login()
# Setup

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
import warnings
warnings.filterwarnings('ignore')

base_model_id = 'google/flan-t5-small'


seed=2024

# Training
num_train_epochs=1
batch_size = 2

learning_rate = 5e-5
#learning_rate=3e-4
#learning_rate = 1e-3

# Regularisation
dropout_rate = 0.1
weight_decay=0.001

# Evaluation
label_threshold=0.5

# Misc
results_output_dir = 'results'
logging_dir='logs'



hf_site_id = '2024-mcm-everitt-ryan'
dataset_id = f'{hf_site_id}/job-bias-synthetic-human-benchmark-v2'
#dataset_id = f'{hf_site_id}/job-bias-synthetic-human-verified'
base_model_name = base_model_id.split('/')[-1]
model_id = f'{base_model_name}-job-bias-seq2seq-cls'
hub_model_id = f'{hf_site_id}/{model_id}'


# Dataset

In [26]:
from datasets import load_dataset

dataset = load_dataset(dataset_id)
column_names = dataset['train'].column_names


text_col = 'text'
label_cols = [col for col in column_names if col.startswith('label_')]

labels = [label.replace("label_", "") for label in label_cols]

id2label = {idx: label for idx, label in enumerate(labels)}
label2id = {label: idx for idx, label in enumerate(labels)}

# Remove all columns apart from the two needed for multi-class classification
keep_columns = ['id', text_col] + label_cols
for split in ["train", "val", "test"]:
    dataset[split] = dataset[split].remove_columns(
        [col for col in dataset[split].column_names if col not in keep_columns])

for type in ['train','val','test']:
    dataset[type] = dataset[type].shuffle(seed=seed).select(range(5))

dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'label_age', 'label_disability', 'label_feminine', 'label_general', 'label_masculine', 'label_neutral', 'label_racial', 'label_sexuality', 'text'],
        num_rows: 5
    })
    val: Dataset({
        features: ['id', 'label_age', 'label_disability', 'label_feminine', 'label_general', 'label_masculine', 'label_neutral', 'label_racial', 'label_sexuality', 'text'],
        num_rows: 5
    })
    test: Dataset({
        features: ['id', 'label_age', 'label_disability', 'label_feminine', 'label_general', 'label_masculine', 'label_neutral', 'label_racial', 'label_sexuality', 'text'],
        num_rows: 5
    })
})

# Tokeniser

In [27]:
from transformers import AutoTokenizer
from huggingface_hub import HfFolder

tokenizer = AutoTokenizer.from_pretrained(base_model_id, token=HfFolder.get_token())
#tokenizer

In [28]:
from datasets import concatenate_datasets
from transformers import Seq2SeqTrainingArguments


tokenized_inputs = concatenate_datasets([dataset["train"], dataset["test"]]).map(
    lambda x: tokenizer(x["text"], truncation=True),
    batched=True,
    remove_columns=dataset['train'].column_names,
)
max_source_length = max([len(x) for x in tokenized_inputs["input_ids"]])
print(f"Max source length: {max_source_length}")

# Prepare target sequences for T5
def create_target_sequence(example):
    labels = [key.replace('label_','') for key, value in example.items() if key.startswith('label_') and value]
    labels = ','.join(labels)
    labels = labels.strip()    
    return labels

# Add target sequence to the dataset
dataset = dataset.map(lambda x: {'labels': create_target_sequence(x)}, remove_columns=[col for col in dataset['train'].column_names if col.startswith('label_')])

# Tokenise targets
tokenized_targets = concatenate_datasets([dataset["train"], dataset["test"]]).map(
    lambda x: tokenizer(x["labels"], truncation=True),
    batched=True,
    remove_columns=dataset['train'].column_names,
)
max_target_length = max([len(x) for x in tokenized_targets["input_ids"]])
print(f"Max target length: {max_target_length}")

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Max source length: 477


Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Max target length: 9


In [29]:
#tokenized_targets["input_ids"]

# Model

In [30]:
from transformers import AutoModelForSeq2SeqLM, AutoConfig

config = AutoConfig.from_pretrained(base_model_id, dropout_rate=dropout_rate)

model = AutoModelForSeq2SeqLM.from_pretrained(
    base_model_id,
    config=config
)
#model

In [31]:
model.config

T5Config {
  "_name_or_path": "google/flan-t5-small",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 1024,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 8,
  "num_heads": 6,
  "num_layers": 8,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size": 3,
      "num_beams": 4,
      "prefix": "summarize: "
    },
    "translation_en_to_de": {
      "early_stopping": true,
      "ma

# Preprocessing/Evaluation functions

In [73]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, \
    classification_report
import nltk
from transformers import  DataCollatorForSeq2Seq, Seq2SeqTrainer
import numpy as np
from nltk import sent_tokenize
from typing import List, Tuple
from datasets import Dataset


def preprocess_function(sample: Dataset, padding: str = "max_length") -> dict:
    """Preprocess the dataset."""
    inputs = [item for item in sample["text"]]
    labels = [item for item in sample["labels"]]

    model_inputs = tokenizer(
        inputs, max_length=max_source_length, padding=padding, truncation=True
    )

    labels = tokenizer(
        text_target=labels, max_length=max_target_length, padding=padding, truncation=True
    )

    if padding == "max_length":
        labels["input_ids"] = [
            [(la if la != tokenizer.pad_token_id else -100) for la in label]
            for label in labels["input_ids"]
        ]

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

def postprocess_text(labels: List[str], preds: List[str]) -> Tuple[List[str], List[str]]:
    """Helper function to postprocess text"""
    preds = [pred.strip() for pred in preds]
    labels = [label.strip() for label in labels]
    preds = ["\n".join(sent_tokenize(pred)) for pred in preds]
    labels = ["\n".join(sent_tokenize(label)) for label in labels]
    return labels, preds


def compute_metrics(eval_predictions):
    
    y_hat, y = eval_predictions
    
    # Replace -100 in the labels .
    y = np.where(y != -100, y, tokenizer.pad_token_id)
    
    if isinstance(y_hat, tuple):
        y_hat = y_hat[0]
        
    y_str = tokenizer.batch_decode(y, skip_special_tokens=True)
    y_hat_str = tokenizer.batch_decode(y_hat, skip_special_tokens=True)

    #print(f'y_str:decoded:::{y_str}')
    #print(f'pred_flat:decoded:::{y_hat_str}')
    
    y_str, y_hat_str = postprocess_text( y_str, y_hat_str)
    
    #print(f'y_str:post:::{y_str}')
    #print(f'y_hat_str:post:::{y_hat_str}')
    
    # Flatten the list of labels
    true_flat = [label.strip() for sublist in [t.split(',') for t in y_str] for label in sublist]
    #pred_flat = [label.strip() for sublist in [p.split(',') for p in y_hat_str] for label in sublist]
    
    #print(f'true_flat:::{true_flat}')
    #print(f'pred_flat:::{pred_flat}')
    
    # Convert to binary format for multi-label metrics
    unique_labels = list(set(true_flat))
    #print(f'unique_labels:::{unique_labels}')
    
    # Remove the blank label (no bias)
    unique_labels = list([label for label in unique_labels if label != '' and label is not None])
    unique_labels = sorted(list(set(unique_labels)))
    target_names=sorted(list(set(id2label.values())))
    use_auc_roc = len(unique_labels) == len(target_names)

    #print(f'unique_labels:::{unique_labels}')
    #print(f'target_names:::{target_names}')
    
    y_true = [[1 if label in t else 0 for label in target_names] for t in y_str]
    y_pred = [[1 if label in p else 0 for label in target_names] for p in y_hat_str]

    #print(f'y_true:::{y_true}')
    #print(f'y_pred:::{y_pred}')
    print(classification_report(y_true, y_pred, target_names=target_names))
    
    # return as dictionary
    metrics = {
        'accuracy': accuracy_score(y_true=y_true, y_pred=y_pred)
    }
    
    for average in ['micro','macro','samples','weighted']:
        metrics[f'f1_{average}'] = f1_score(y_true=y_true, y_pred=y_pred, average=average)
        metrics[f'precision_{average}'] = precision_score(y_true=y_true, y_pred=y_pred, average=average)
        metrics[f'recall_{average}'] = recall_score(y_true=y_true, y_pred=y_pred, average=average)
        if use_auc_roc:
            metrics[f'roc_auc_{average}'] = roc_auc_score(y_true=y_true, y_score=y_pred, average=average)
    
    return metrics


# Train

In [74]:
from transformers import TrainerCallback

args = Seq2SeqTrainingArguments(
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    learning_rate=learning_rate,
    output_dir=results_output_dir,
    #logging_dir=logging_dir,  # logging & evaluation strategies
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_train_epochs,
    weight_decay=weight_decay,
    load_best_model_at_end=True,
    metric_for_best_model='loss',
    predict_with_generate=True,
    fp16=False,  # Overflows with fp16
    #report_to="tensorboard",
    #push_to_hub=True,
    #hub_strategy="every_save",
    #hub_model_id=REPOSITORY_ID,
    #hub_token=HfFolder.get_token(),
)

encoded_dataset = dataset.map(
    preprocess_function, batched=True, remove_columns=["text", "labels"]
)
print(f"Keys of tokenized dataset: {list(encoded_dataset['train'].features)}")

nltk.download("punkt")

label_pad_token_id = -100
data_collator = DataCollatorForSeq2Seq(
    tokenizer, model=model, label_pad_token_id=label_pad_token_id, pad_to_multiple_of=8
)

#early_stop = transformers.EarlyStoppingCallback(10, 1.15)
class PrintClassificationCallback(TrainerCallback):
    def on_evaluate(self, args, state, control, logs=None, **kwargs):
        print("----------------------------------------------------------")




Keys of tokenized dataset: ['id', 'labels', 'input_ids', 'attention_mask']


[nltk_data] Downloading package punkt to /home/teveritt/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [75]:
trainer = Seq2SeqTrainer(
    model=model,
    args=args,
    data_collator=data_collator,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["val"],
    compute_metrics=compute_metrics,
    callbacks=[PrintClassificationCallback]
)

model.config.use_cache = False  # Silence the warnings.

!nvidia-smi

Wed Jul 17 00:48:55 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A2000 12GB          On  | 00000000:1C:00.0 Off |                  Off |
| 41%   69C    P2              28W /  70W |   3319MiB / 12282MiB |      2%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                         

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [76]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,Precision Micro,Recall Micro,F1 Macro,Precision Macro,Recall Macro,F1 Samples,Precision Samples,Recall Samples,F1 Weighted,Precision Weighted,Recall Weighted
1,1.3911,5.604636,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


              precision    recall  f1-score   support

         age       0.00      0.00      0.00         1
  disability       0.00      0.00      0.00         0
    feminine       0.00      0.00      0.00         1
     general       0.00      0.00      0.00         0
   masculine       0.00      0.00      0.00         3
     neutral       0.00      0.00      0.00         0
      racial       0.00      0.00      0.00         0
   sexuality       0.00      0.00      0.00         0

   micro avg       0.00      0.00      0.00         5
   macro avg       0.00      0.00      0.00         5
weighted avg       0.00      0.00      0.00         5
 samples avg       0.00      0.00      0.00         5

----------------------------------------------------------


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


TrainOutput(global_step=3, training_loss=1.391081174214681, metrics={'train_runtime': 3.8213, 'train_samples_per_second': 1.308, 'train_steps_per_second': 0.785, 'total_flos': 871361740800.0, 'train_loss': 1.391081174214681, 'epoch': 1.0})

# Evaluate

In [77]:
test_results = trainer.evaluate(eval_dataset=encoded_dataset['test'])
test_results

              precision    recall  f1-score   support

         age       0.00      0.00      0.00         0
  disability       0.00      0.00      0.00         2
    feminine       0.00      0.00      0.00         1
     general       0.00      0.00      0.00         1
   masculine       0.00      0.00      0.00         0
     neutral       0.00      0.00      0.00         1
      racial       0.00      0.00      0.00         0
   sexuality       0.00      0.00      0.00         0

   micro avg       0.00      0.00      0.00         5
   macro avg       0.00      0.00      0.00         5
weighted avg       0.00      0.00      0.00         5
 samples avg       0.00      0.00      0.00         5

----------------------------------------------------------


{'eval_loss': 7.552880764007568,
 'eval_accuracy': 0.0,
 'eval_f1_micro': 0.0,
 'eval_precision_micro': 0.0,
 'eval_recall_micro': 0.0,
 'eval_f1_macro': 0.0,
 'eval_precision_macro': 0.0,
 'eval_recall_macro': 0.0,
 'eval_f1_samples': 0.0,
 'eval_precision_samples': 0.0,
 'eval_recall_samples': 0.0,
 'eval_f1_weighted': 0.0,
 'eval_precision_weighted': 0.0,
 'eval_recall_weighted': 0.0,
 'eval_runtime': 0.3988,
 'eval_samples_per_second': 12.539,
 'eval_steps_per_second': 7.523,
 'epoch': 1.0}

In [78]:
y_true = dataset["test"].map(
    lambda x: tokenizer(x["labels"], truncation=True, max_length=13, padding='max_length'),
    batched=True,
    remove_columns=dataset['train'].column_names,
)
y_true = y_true['input_ids']
y_true = np.where(y_true != -100, y_true, tokenizer.pad_token_id)


predictions = trainer.predict(encoded_dataset['test'])
y_pred = predictions.predictions


Map:   0%|          | 0/5 [00:00<?, ? examples/s]

              precision    recall  f1-score   support

         age       0.00      0.00      0.00         0
  disability       0.00      0.00      0.00         2
    feminine       0.00      0.00      0.00         1
     general       0.00      0.00      0.00         1
   masculine       0.00      0.00      0.00         0
     neutral       0.00      0.00      0.00         1
      racial       0.00      0.00      0.00         0
   sexuality       0.00      0.00      0.00         0

   micro avg       0.00      0.00      0.00         5
   macro avg       0.00      0.00      0.00         5
weighted avg       0.00      0.00      0.00         5
 samples avg       0.00      0.00      0.00         5



In [79]:

y_str = tokenizer.batch_decode(y_true, skip_special_tokens=True)
y_hat_str = tokenizer.batch_decode(y_pred, skip_special_tokens=True)

y_str, y_hat_str = postprocess_text(y_str, y_hat_str)

# Flatten the list of labels
true_flat = [label.strip() for sublist in [t.split(',') for t in y_str] for label in sublist]
pred_flat = [label.strip() for sublist in [p.split(',') for p in y_hat_str] for label in sublist]

target_names=sorted(list(set(id2label.values())))

y_true = [[1 if label in t else 0 for label in target_names] for t in y_str]
y_pred = [[1 if label in p else 0 for label in target_names] for p in y_hat_str]

report = classification_report(y_true, y_pred, target_names=target_names)

#print(report)

# Convert to Markdown
report_lines = report.split('\n')
markdown_classification_report = "\n".join([f"    {line}" for line in report_lines])
print(markdown_classification_report)

                  precision    recall  f1-score   support
    
             age       0.00      0.00      0.00         0
      disability       0.00      0.00      0.00         2
        feminine       0.00      0.00      0.00         1
         general       0.00      0.00      0.00         1
       masculine       0.00      0.00      0.00         0
         neutral       0.00      0.00      0.00         1
          racial       0.00      0.00      0.00         0
       sexuality       0.00      0.00      0.00         0
    
       micro avg       0.00      0.00      0.00         5
       macro avg       0.00      0.00      0.00         5
    weighted avg       0.00      0.00      0.00         5
     samples avg       0.00      0.00      0.00         5
    


In [192]:
import pandas as pd
df = pd.DataFrame(list(test_results.items()), columns=['Metric', 'Value'])
print(df.to_string(index=False))
import pandas as pd
df = pd.DataFrame(list(test_results.items()), columns=['Metric', 'Value'])
print(df.to_string(index=False))

                 Metric      Value
              eval_loss   0.629769
          eval_accuracy   0.724596
          eval_f1_micro   0.654174
          eval_f1_macro   0.664987
        eval_f1_samples   0.789110
       eval_f1_weighted   0.664126
   eval_precision_micro   0.630542
   eval_precision_macro   0.663275
 eval_precision_samples   0.883982
eval_precision_weighted   0.662831
      eval_recall_micro   0.679646
      eval_recall_macro   0.681019
    eval_recall_samples   0.863881
   eval_recall_weighted   0.679646
     eval_roc_auc_micro   0.823293
     eval_roc_auc_macro   0.823978
           eval_runtime 228.462700
eval_samples_per_second   4.609000
  eval_steps_per_second   0.578000
                  epoch   1.000000
                 Metric      Value
              eval_loss   0.629769
          eval_accuracy   0.724596
          eval_f1_micro   0.654174
          eval_f1_macro   0.664987
        eval_f1_samples   0.789110
       eval_f1_weighted   0.664126
   eval_precision_mi

In [193]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def classify_text(text, model, tokenizer, label_columns, device):
    input_text = f"classify: {text}"
    inputs = tokenizer(input_text, return_tensors='pt', padding=True, truncation=True).to(device)
    outputs = model.generate(**inputs)
    predicted_labels = tokenizer.decode(outputs[0], skip_special_tokens=True)
    predicted_labels = [label.strip() for label in predicted_labels.split(',')]
    label_dict = {label: False for label in label_columns}
    for label in predicted_labels:
        if label in label_dict:
            label_dict[label] = True
    return label_dict

In [194]:
text = "Looking for a native English speaker"

classify_text(text, model, tokenizer, labels, device)

{'age': False,
 'disability': False,
 'masculine': False,
 'feminine': False,
 'racial': True,
 'sexuality': False,
 'general': True}

# Push to Hugging Face

In [None]:
model.push_to_hub(repo_id=hub_model_id, token=HfFolder.get_token())
tokenizer.push_to_hub(repo_id=hub_model_id, token=HfFolder.get_token())

# Update Model Card

In [None]:
from huggingface_hub import ModelCard, EvalResult, ModelCardData
import platform
import json
import sys
import os

training_regime = []
training_regime_args = args.to_sanitized_dict()
for k,v in training_regime_args.items():
    if (isinstance(v, (int, str, bool, float))
            and '_dir' not in k
            and 'logging' not in k
            and 'log_' not in k
            and 'hub_' not in k
            and '_hub' not in k
            and 'save_' not in k
            and 'run_name' not in k
            and 'debug' not in k
            and 'token' not in k):
        training_regime.append(f'{k}={json.dumps(v)}')

training_regime = sorted(training_regime)

training_regime = ', '.join(training_regime)


## Hardware
compute_infrastructure = []
mem_total = !cat /proc/meminfo | grep MemTotal
mem_total = list(set(mem_total))[0]
cpu_info = !cat /proc/cpuinfo | grep "model name"
cpu_count = len(list(cpu_info))
cpu_name = list(set(cpu_info))[0]
cpu_name = cpu_name.strip()
cpu_name = cpu_name.replace('model name\t:', '')
cpu_name = cpu_name.strip()

compute_infrastructure.append(f'- {platform.system()} {platform.release()} {platform.processor()}')
compute_infrastructure.append(f'- {mem_total}')
compute_infrastructure.append(f'- {cpu_count} X {cpu_name}')


gpu_name = !nvidia-smi --query-gpu=gpu_name --format=csv,noheader
gpus = set()
for idx, gpu in enumerate(gpu_name):
    compute_infrastructure.append(f"- GPU_{idx}: {gpu}")
    gpus.add(gpu)

gpus =list(gpus)
compute_infrastructure = '\n'.join(compute_infrastructure)
hardware_type = f'{len(gpus)} X {gpus[0]}'

## Software
software_list = !pip list
inc_software = []
inc_software.append(f'python {platform.python_version()}')

for software in software_list:
    if software and '[notice]' not in software and '---' not in software and 'Package' not in software:
        inc_software.append(' '.join(software.split()))
 

software = ", ".join(inc_software)   

hours_used = ""
eval_results = []
for k, v in test_results.items():
    metric_type = k.replace("eval_", "", 1)
    if metric_type == 'runtime':
        hours_used = f"{int(v)/60.0:.2f}"
    eval_results.append(EvalResult(
        task_type='multi_label_classification',
        dataset_type='mix_human-eval_synthetic',
        dataset_name=dataset_id,
        metric_type=metric_type,
        metric_value=v))

direct_use = """
    ```python
    from transformers import pipeline

    pipe = pipeline("text-classification", model="${hub_model_id}", return_all_scores=True)

    results = pipe("Join our dynamic and fast-paced team as a Junior Marketing Specialist. We seek a tech-savvy and energetic individual who thrives in a vibrant environment. Ideal candidates are digital natives with a fresh perspective, ready to adapt quickly to new trends. You should have recent experience in social media strategies and a strong understanding of current digital marketing tools. We're looking for someone with a youthful mindset, eager to bring innovative ideas to our young and ambitious team. If you're a recent graduate or early in your career, this opportunity is perfect for you!")
    print(results)
    ```
    >> [[
    {'label': 'age', 'score': 0.9883460402488708}, 
    {'label': 'disability', 'score': 0.00787709467113018}, 
    {'label': 'feminine', 'score': 0.007224376779049635}, 
    {'label': 'general', 'score': 0.09967829287052155}, 
    {'label': 'masculine', 'score': 0.0035264550242573023}, 
    {'label': 'racial', 'score': 0.014618005603551865}, 
    {'label': 'sexuality', 'score': 0.005568435415625572}
    ]]
    """

direct_use = direct_use.replace('${hub_model_id}', hub_model_id, -1)

card_data = ModelCardData(
    model_id=model_id,
    model_name=model_id,
    model_description="The model is a multi-label classifier designed to detect various types of bias within job descriptions.",
    base_model=base_model_id,
    language='en',
    license='apache-2.0',
    developers="Tristan Everitt and Paul Ryan",
    model_card_authors='See developers',
    model_card_contact='See developers',
    repo="https://gitlab.computing.dcu.ie/everitt2/2024-mcm-everitt-ryan",
    training_regime=training_regime,
    eval_results=eval_results,
    results=markdown_classification_report,
    compute_infrastructure=compute_infrastructure,
    # hardware_requirements='N/A',
    software=software,
    hardware_type=hardware_type,
    hours_used=hours_used,
    cloud_provider='N/A',
    cloud_region='N/A',
    co2_emitted='N/A',
    datasets=[dataset_id],
    direct_use=direct_use
)

card = ModelCard.from_template(card_data)

card.push_to_hub(repo_id=hub_model_id, token=HfFolder.get_token())