In [61]:
import os
import sys
import pandas as pd

In [62]:
test_df = pd.read_csv('../../data/german/test.csv')
train_df = pd.read_csv('../../data/german/train.csv')
val_df = pd.read_csv('../../data/german/valid.csv')


test_df.head()

Unnamed: 0,text,label,source
0,@user korrekt! Verstehe sowas nicht...,negative,sb_10k
1,Einparken kÃ¶nnen die Aliens auch nicht! #schlefaz,neutral,sb_10k
2,Der Dubbletimepart von Julien war ja mal sowas...,positive,sb_10k
3,#Instachallenge #Day16 #what #i #am #reading #...,negative,sb_10k
4,Also gleich. Mach noch das Video fertig.,neutral,sb_10k


In [63]:
# from sklearn.model_selection import train_test_split

# train_df, _ = train_test_split(
#     train_df,
#     test_size = 0.6 ,
#     stratify = train_df['label'],
#     random_state = 42
# )

In [64]:
train_df['label'].unique()


mapping = {'negative': 0, 'neutral': 1, 'positive': 2}

# Create a new column with mapped values
train_df['sentiment_num'] = train_df['label'].map(mapping)
test_df['sentiment_num'] = test_df['label'].map(mapping)
val_df['sentiment_num'] = val_df['label'].map(mapping)

In [65]:
#dropping neutral rows
train_df = train_df[train_df["label"] != "neutral"]
test_df  = test_df[test_df["label"]  != "neutral"]
val_df   = val_df[val_df["label"]   != "neutral"]
train_df.head()

Unnamed: 0,text,label,source,sentiment_num
627,Klasen mit rot runter. Da hatten mehr fliegen ...,negative,sb_10k,0
1268,@user bitteschÃ¶n meine prinzessin immer wieder...,positive,sb_10k,2
1050,"Ieh, lauter glÃ¼ckliche PÃ¤rchen hier... ðŸ‘±ðŸ”«",negative,sb_10k,0
1101,Da hat wohl jemandem der Snack nicht geschmeck...,negative,sb_10k,0
575,@user @user Hey Leute! :) Nachher Eintracht vs...,positive,sb_10k,2


In [66]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, AdamW
from datasets import Dataset

model_name = "bert-base-multilingual-uncased"
tok = BertTokenizer.from_pretrained(model_name)

MAX_LEN = 100  # tweets are short
#Converting to tensors
def tokenize(batch):
    return tok(
        batch["text"].tolist(),
        padding="max_length",
        truncation=True,
        max_length=MAX_LEN,
        return_tensors="pt"
    )

train_encodings = tokenize(train_df)
val_encodings = tokenize(val_df)



In [67]:
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

train_dataset = SentimentDataset(train_encodings, train_df["sentiment_num"].values)
val_dataset = SentimentDataset(val_encodings, val_df["sentiment_num"].values)

In [68]:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, shuffle = True, batch_size = 8)
#change number of labels based on keep/drop neutral class
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

training_args = TrainingArguments(
    output_dir="results",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
 33%|â–ˆâ–ˆâ–ˆâ–Ž      | 31/93 [00:22<00:57,  1.08it/s]

{'loss': 0.0892, 'grad_norm': 0.029356185346841812, 'learning_rate': 1.3333333333333333e-05, 'epoch': 1.0}



 33%|â–ˆâ–ˆâ–ˆâ–Ž      | 31/93 [00:24<00:57,  1.08it/s]

{'eval_loss': 0.0017323449719697237, 'eval_runtime': 2.228, 'eval_samples_per_second': 96.948, 'eval_steps_per_second': 6.284, 'epoch': 1.0}


 67%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‹   | 62/93 [00:47<00:18,  1.65it/s]

{'loss': 0.0013, 'grad_norm': 0.019368309527635574, 'learning_rate': 6.666666666666667e-06, 'epoch': 2.0}



 67%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‹   | 62/93 [00:49<00:18,  1.65it/s]

{'eval_loss': 0.0007552846800535917, 'eval_runtime': 2.0845, 'eval_samples_per_second': 103.621, 'eval_steps_per_second': 6.716, 'epoch': 2.0}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 93/93 [01:15<00:00,  1.67it/s]

{'loss': 0.0009, 'grad_norm': 0.011485415510833263, 'learning_rate': 0.0, 'epoch': 3.0}


                                               
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 93/93 [01:17<00:00,  1.67it/s]

{'eval_loss': 0.0006587993702851236, 'eval_runtime': 2.1054, 'eval_samples_per_second': 102.595, 'eval_steps_per_second': 6.65, 'epoch': 3.0}


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 93/93 [01:19<00:00,  1.17it/s]

{'train_runtime': 79.6474, 'train_samples_per_second': 18.456, 'train_steps_per_second': 1.168, 'train_loss': 0.03047765821458832, 'epoch': 3.0}





TrainOutput(global_step=93, training_loss=0.03047765821458832, metrics={'train_runtime': 79.6474, 'train_samples_per_second': 18.456, 'train_steps_per_second': 1.168, 'total_flos': 75541650660000.0, 'train_loss': 0.03047765821458832, 'epoch': 3.0})

In [69]:
metrics = trainer.evaluate()
print(metrics)

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 14/14 [00:02<00:00,  6.71it/s]

{'eval_loss': 0.0006587993702851236, 'eval_runtime': 2.1246, 'eval_samples_per_second': 101.665, 'eval_steps_per_second': 6.589, 'epoch': 3.0}





In [70]:
predictions = trainer.predict(val_dataset)

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 14/14 [00:02<00:00,  6.71it/s]


In [71]:
import numpy as np

from sklearn.metrics import accuracy_score, f1_score, classification_report

logits = predictions.predictions      # or predictions[0]
labels = predictions.label_ids        # or predictions[1]

y_pred = np.argmax(logits, axis=-1)

print("Accuracy:", accuracy_score(labels, y_pred))
print("F1 (macro):", f1_score(labels, y_pred, average="macro"))

print(
    classification_report(
        labels,
        y_pred,
        #target_names=["negative", "neutral", "positive"]
        target_names=["negative","positive"]
    )
)

Accuracy: 0.5
F1 (macro): 0.3333333333333333
              precision    recall  f1-score   support

    negative       0.50      1.00      0.67       108
    positive       0.00      0.00      0.00       108

    accuracy                           0.50       216
   macro avg       0.25      0.50      0.33       216
weighted avg       0.25      0.50      0.33       216



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
