In [1]:
import datasets
import torch
import evaluate
from datasets import load_dataset
from transformers import pipeline
from transformers import Trainer, TrainingArguments
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [2]:
emotion_dataset = datasets.load_from_disk('C:/Users/JGras/instagram-topics/instagram-topics/Data/dair-ai-amotion-de-test-train-val')

In [3]:
emotion_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 27200
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 3400
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 3400
    })
})

In [4]:
checkpoint = "google-bert/bert-base-german-cased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [5]:
def tokenize_function(sample):
  return tokenizer(sample["text"], truncation=True)

In [6]:
tokenized_datasets = emotion_dataset.map(tokenize_function, batched=True)

In [7]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=6)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-german-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
def compute_metrics(pred):
  # pred ist vom Typ EvalPrediction. Daher können wir auf die
  # label_ids als auch auf die predictions zugreifen
  labels = pred.label_ids
  # argmax gibt den Arrayindex mit der größten Nummer zurück 
  # dadurch wissen wir, welches Sentiment vorhergesagt wurde
  preds = pred.predictions.argmax(-1)
  f1 = f1_score(labels, preds, average="macro")
  acc = accuracy_score(labels, preds)
  precision = precision_score(labels, preds, average="macro")
  recall = recall_score(labels, preds, average="macro")
  return {"precision": precision, "recall": recall, "acc": acc, "f1": f1}

In [10]:
batch_size = 8
logging_steps = len(tokenized_datasets["train"]) // batch_size
model_name = f"{checkpoint}-dair-ai-de-emotion-finetuned"
training_args = TrainingArguments(output_dir = model_name, 
                                  num_train_epochs = 4, # Anzahl der Epochen, d.h. Trainingsdurchläufe
                                  learning_rate = 4e-5, # Lernrate: setzt man normalerweise immer auf 1e-5/2e-5/3e-5/4e-5
                                  per_device_train_batch_size = batch_size,
                                  per_device_eval_batch_size = batch_size,
                                  evaluation_strategy="epoch", # Evaluierung nach jeder Epoche
                                  disable_tqdm = False, # Fortschrittsbalken wird angezeigt
                                  logging_steps = logging_steps, # Anzahl der Updateschritte zwischen zwei Logs
                                  log_level="error")

In [11]:
train_dataset = tokenized_datasets["train"].shuffle().select(range(13600))
test_dataset = tokenized_datasets["test"].shuffle(42).select(range(3000))

In [12]:
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

In [13]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,Acc,F1
1,No log,0.464199,0.805639,0.794691,0.846667,0.797273
2,0.513900,0.434948,0.846881,0.823805,0.88,0.830685
3,0.513900,0.40777,0.859785,0.85677,0.896667,0.858032
4,0.169400,0.480041,0.84981,0.856126,0.895,0.852647


TrainOutput(global_step=6800, training_loss=0.3416731980267693, metrics={'train_runtime': 984.4412, 'train_samples_per_second': 55.26, 'train_steps_per_second': 6.907, 'total_flos': 1478457671619456.0, 'train_loss': 0.3416731980267693, 'epoch': 4.0})

In [22]:
eval = trainer.evaluate(eval_dataset=tokenized_datasets['validation'])

In [23]:
result = eval
params = {"model": "bert-base-german-cased"}
evaluate.save("./results/", **result, **params)

WindowsPath('results/result-2024_03_31-14_07_20.json')

In [20]:
trainer.save_model('bert-base-german-cased-dair-ai-emotion-de-finetuned')

In [17]:
#emotion_finetuned_pipe = pipeline("text-classification", model="bert-base-german-finetuned-cased-dair-ai-emotion-de")

In [18]:
#emotion_finetuned_pipe(text)