<a href="https://colab.research.google.com/github/HimashiRathnayake/CMCS-Text-Classification/blob/main/CMCS_Multi_task_Training_with_XLM_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Multi-task Training with XLM-R** 
For Sinhala-English Code-Mixed and Code-Switched Classification Tasks.

*   Sentiment Analysis
*   Humor Detection
*   Hate Speech Detection
*   Language Identification



### **Library setup**

In [1]:
!pip install transformers
!pip install sentencepiece
!pip install datasets
!pip install seqeval

Collecting transformers
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 14.5 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 57.3 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 65.9 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 72.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 5.0 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attemp

In [2]:
import numpy as np
import torch
import torch.nn as nn
import transformers
from google.colab import drive
import pandas as pd
from sklearn.model_selection import train_test_split
import dataclasses
from torch.utils.data.dataloader import DataLoader
from transformers.data.data_collator import DataCollator, InputDataClass, DefaultDataCollator, DataCollatorForTokenClassification
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data.sampler import RandomSampler
from typing import List, Union, Dict
from datasets import load_metric, load_dataset
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

### **Load and preprocess data**

In [3]:
sent_dataset_path = "/content/drive/Shareddrives/FYP/corpus/çompleted_draft.csv"
token_dataset_path = '/content/drive/Shareddrives/FYP-CodeStars/Dataset/LID/LID_withSenID_Final.json'
model_name = "xlm-roberta-base"
max_length = 128

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
sent_all_data = pd.read_csv(sent_dataset_path)
token_all_data = pd.read_json(token_dataset_path, lines=True)

In [6]:
token_all_data

Unnamed: 0,tokens,tags
0,"[Ai, rate, neethiya, muta, balapannethida]","[Sin-Eng, Sin-Eng, Sin-Eng, Sin-Eng, Sin-Eng]"
1,"[hi, mobitel]","[English, NameEntity]"
2,[Short],[English]
3,"[Wadakma, na, ,, signal, na, ,, ,, ,, mathugama]","[Sin-Eng, Sin-Eng, Symbol, English, Sin-Eng, S..."
4,"[7812981, 13, සිගනල්, එන්නෙ, නැ]","[NameEntity, NameEntity, Eng-Sin, Sinhala, Sin..."
...,...,...
13416,"[Ha, zitizan, 2, mp, gadara]","[Sin-Eng, English, NameEntity, NameEntity, Sin..."
13417,"[Dialog, තරමි, වෙිගත්තව, නැ, අනිත්, එවා, ඒකයි,...","[NameEntity, Sinhala, Sinhala, Sinhala, Sinhal..."
13418,"[Cliq, plan, 3G, package, walata, wattamak, di...","[NameEntity, English, NameEntity, English, Sin..."
13419,[Mata],[Sin-Eng]


In [7]:
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

Downloading:   0%|          | 0.00/512 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/8.68M [00:00<?, ?B/s]

In [8]:
class DatasetObject(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
        # self.labels = [[l] + [-100] * (max_length - 1) for l in labels]

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [9]:
def tokenize(inputs):
    features = tokenizer.batch_encode_plus(
        inputs, max_length=max_length, pad_to_max_length=True
    )
    return features

In [10]:
def sent_create_dataset(all_data, task_name):
    all_data = all_data[['Sentence', task_name]]
    all_data.columns = ['sentence', 'label']
    all_data['label'], uniq = pd.factorize(all_data['label'])
    X = all_data['sentence'].values.tolist()
    y = all_data['label'].values.tolist()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 41)
    encoded_X_train = tokenize(X_train)
    encoded_X_test = tokenize(X_test)
    train_dataset = DatasetObject(encoded_X_train, y_train)
    test_dataset = DatasetObject(encoded_X_test, y_test)
    return {"train": train_dataset, "test": test_dataset}

In [11]:
label_all_tokens = True
def token_level_task_tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True, padding="max_length", max_length=max_length)

    labels = []
    for i, label in enumerate(examples[f"tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            # Special tokens have a word id that is None. We set the label to -100 so they are automatically
            # ignored in the loss function.
            if word_idx is None:
                label_ids.append(-100)
            # We set the label for the first token of each word.
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            # For the other tokens in a word, we set the label to either the current label or -100, depending on
            # the label_all_tokens flag.
            else:
                label_ids.append(label[word_idx] if label_all_tokens else -100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [12]:
def tokenLevel_create_dataset():
    tags_ind = ['Sinhala', 'English', 'Sin-Eng', 'Eng-Sin', 'Mixed', 'NameEntity', 'Symbol']

    df = pd.read_json(token_dataset_path, lines=True)
    count = 0
    for labels in df['tags']:
      temp =[]
      for label in labels:
        temp.append(tags_ind.index(label))
      df['tags'][count] = temp
      count +=1

    #split the data into train and test set
    trainData,testData = train_test_split(df, test_size=0.10, random_state=42)
    #save the data
    trainData.to_json('/content/drive/Shareddrives/FYP/corpus/lang_id_train.json', orient='records', lines=True,  force_ascii=False)
    testData.to_json('/content/drive/Shareddrives/FYP/corpus/lang_id_test.json', orient='records', lines=True,  force_ascii=False)

    dataset = load_dataset('json', data_files={'train': '/content/drive/Shareddrives/FYP/corpus/lang_id_train.json',
                                           'test': '/content/drive/Shareddrives/FYP/corpus/lang_id_test.json'})
    tokenized_datasets = dataset.map(token_level_task_tokenize_and_align_labels, batched=True)
    tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
    return tokenized_datasets

In [13]:
dataset_dict = {
    "sentiment": sent_create_dataset(sent_all_data, "Sentiment"),
    "humor": sent_create_dataset(sent_all_data, "Humor"),
    "hate": sent_create_dataset(sent_all_data, "Hate_speech"),
    "langID": tokenLevel_create_dataset(),
}

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Using custom data configuration default-14dcfc50e004440b


Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-14dcfc50e004440b/0.0.0/c90812beea906fcffe0d5e3bb9eba909a80a998b5f88e9f8acbd320aa91acfde...


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/default-14dcfc50e004440b/0.0.0/c90812beea906fcffe0d5e3bb9eba909a80a998b5f88e9f8acbd320aa91acfde. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

### **Define Multi-task Model**

In [14]:
class MultitaskModel(transformers.PreTrainedModel):
    def __init__(self, encoder, taskmodels_dict):
        """
        Setting MultitaskModel up as a PretrainedModel allows us
        to take better advantage of Trainer features
        """
        super().__init__(transformers.PretrainedConfig())

        self.encoder = encoder
        self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)

    @classmethod
    def create(cls, model_name, model_type_dict, model_config_dict):
        """
        This creates a MultitaskModel using the model class and config objects
        from single-task models. 

        We do this by creating each single-task model, and having them share
        the same encoder transformer.
        """
        shared_encoder = None
        taskmodels_dict = {}
        for task_name, model_type in model_type_dict.items():
            model = model_type.from_pretrained(
                model_name, 
                config=model_config_dict[task_name],
            )
            if shared_encoder is None:
                shared_encoder = getattr(model, cls.get_encoder_attr_name(model))
            else:
                setattr(model, cls.get_encoder_attr_name(model), shared_encoder)
            taskmodels_dict[task_name] = model
        return cls(encoder=shared_encoder, taskmodels_dict=taskmodels_dict)

    @classmethod
    def get_encoder_attr_name(cls, model):
        """
        The encoder transformer is named differently in each model "architecture".
        This method lets us get the name of the encoder attribute
        """
        model_class_name = model.__class__.__name__
        if model_class_name.startswith("Bert"):
            return "bert"
        elif model_class_name.startswith("Roberta"):
            return "roberta"
        elif model_class_name.startswith("XLMRoberta"):
            return "roberta" 
        elif model_class_name.startswith("Albert"):
            return "albert"
        else:
            raise KeyError(f"Add support for new model {model_class_name}")

    def forward(self, task_name, **kwargs):
        return self.taskmodels_dict[task_name](**kwargs)

In [15]:
multitask_model = MultitaskModel.create(
    model_name=model_name,
    model_type_dict={
        "sentiment": transformers.AutoModelForSequenceClassification,
        "humor": transformers.AutoModelForSequenceClassification,
        "hate": transformers.AutoModelForSequenceClassification,
        "langID": transformers.AutoModelForTokenClassification,
    },
    model_config_dict={
        "sentiment": transformers.AutoConfig.from_pretrained(model_name, num_labels=4),
        "humor": transformers.AutoConfig.from_pretrained(model_name, num_labels=2),
        "hate": transformers.AutoConfig.from_pretrained(model_name, num_labels=3),
        "langID": transformers.AutoConfig.from_pretrained(model_name, num_labels=7),
    },
)

Downloading:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Confirm that all three task-models use the same encoder

In [16]:
print(multitask_model.encoder.embeddings.word_embeddings.weight.data_ptr())
print(multitask_model.taskmodels_dict["sentiment"].roberta.embeddings.word_embeddings.weight.data_ptr())
print(multitask_model.taskmodels_dict["humor"].roberta.embeddings.word_embeddings.weight.data_ptr())
print(multitask_model.taskmodels_dict["hate"].roberta.embeddings.word_embeddings.weight.data_ptr())
print(multitask_model.taskmodels_dict["langID"].roberta.embeddings.word_embeddings.weight.data_ptr())

94230002958336
94230002958336
94230002958336
94230002958336
94230002958336


### **Preparing Multi-task data loader and Trainer**

In [17]:
class NLPDataCollator(DefaultDataCollator):
    """
    Extending the existing DataCollator to work with NLP dataset batches
    """
    def collate_batch(self, features: List[Union[InputDataClass, Dict]]) -> Dict[str, torch.Tensor]:
        first = features[0]
        if isinstance(first, dict):
          # NLP data sets current works presents features as lists of dictionary
          # (one per example), so we  will adapt the collate_batch logic for that
          if "labels" in first and first["labels"] is not None:
              if first["labels"].dtype == torch.int64:
                  if(first["labels"].size()==torch.Size([])):
                      labels = torch.tensor([f["labels"] for f in features], dtype=torch.long)
                  else:
                      labels = torch.stack([f["labels"] for f in features])
              else:
                  labels = torch.tensor([f["labels"] for f in features], dtype=torch.float)
              batch = {"labels": labels}
          for k, v in first.items():
              if k != "labels" and v is not None and not isinstance(v, str):
                  # print(k, v)
                  batch[k] = torch.stack([f[k] for f in features])
          return batch
        else:
          # otherwise, revert to using the default collate_batch
          return DefaultDataCollator().collate_batch(features)


class StrIgnoreDevice(str):
    """
    This is a hack. The Trainer is going call .to(device) on every input
    value, but we need to pass in an additional `task_name` string.
    This prevents it from throwing an error
    """
    def to(self, device):
        return self


class DataLoaderWithTaskname:
    """
    Wrapper around a DataLoader to also yield a task name
    """
    def __init__(self, task_name, data_loader):
        self.task_name = task_name
        self.data_loader = data_loader
        self.batch_size = data_loader.batch_size
        self.dataset = data_loader.dataset

    def __len__(self):
        return len(self.data_loader)
    
    def __iter__(self):
        for batch in self.data_loader:
            batch["task_name"] = StrIgnoreDevice(self.task_name)
            yield batch


class MultitaskDataloader:
    """
    Data loader that combines and samples from multiple single-task
    data loaders.
    """
    def __init__(self, dataloader_dict):
        self.dataloader_dict = dataloader_dict
        self.num_batches_dict = {
            task_name: len(dataloader) 
            for task_name, dataloader in self.dataloader_dict.items()
        }
        self.task_name_list = list(self.dataloader_dict)
        self.dataset = [None] * sum(
            len(dataloader.dataset) 
            for dataloader in self.dataloader_dict.values()
        )

    def __len__(self):
        return sum(self.num_batches_dict.values())

    def __iter__(self):
        """
        For each batch, sample a task, and yield a batch from the respective
        task Dataloader.

        We use size-proportional sampling, but you could easily modify this
        to sample from some-other distribution.
        """
        task_choice_list = []
        for i, task_name in enumerate(self.task_name_list):
            task_choice_list += [i] * self.num_batches_dict[task_name]
        task_choice_list = np.array(task_choice_list)
        np.random.shuffle(task_choice_list)
        dataloader_iter_dict = {
            task_name: iter(dataloader) 
            for task_name, dataloader in self.dataloader_dict.items()
        }
        for task_choice in task_choice_list:
            task_name = self.task_name_list[task_choice]
            yield next(dataloader_iter_dict[task_name])    

class MultitaskTrainer(transformers.Trainer):

    def get_single_train_dataloader(self, task_name, train_dataset):
        """
        Create a single-task data loader that also yields task names
        """
        if self.train_dataset is None:
            raise ValueError("Trainer: training requires a train_dataset.")
        # if is_tpu_available():
        #     train_sampler = get_tpu_sampler(train_dataset)
        # else:
        train_sampler = (
            RandomSampler(train_dataset)
            # if self.args.local_rank == -1
            # else DistributedSampler(train_dataset)
        )

        data_loader = DataLoaderWithTaskname(
            task_name = task_name,
            data_loader = DataLoader(
              train_dataset,
              batch_size=self.args.train_batch_size,
              sampler=train_sampler,
              collate_fn=self.data_collator.collate_batch,
            ),
        )

        # if is_tpu_available():
        #     data_loader = pl.ParallelLoader(
        #         data_loader, [self.args.device]
        #     ).per_device_loader(self.args.device)
        return data_loader

    def get_train_dataloader(self):
        """
        Returns a MultitaskDataloader, which is not actually a Dataloader
        but an iterable that returns a generator that samples from each 
        task Dataloader
        """
        return MultitaskDataloader({
            task_name: self.get_single_train_dataloader(task_name, task_dataset)
            for task_name, task_dataset in self.train_dataset.items()
        })

### Train the model

In [18]:
train_dataset = {
    task_name: dataset["train"] 
    for task_name, dataset in dataset_dict.items()
}

data_collator = NLPDataCollator()

training_args = transformers.TrainingArguments(
        output_dir="./models/multitask_model",
        overwrite_output_dir=True,
        learning_rate=2e-5,
        do_train=True,
        num_train_epochs=3,
        per_device_train_batch_size=32,  
        save_steps=3000,
    )

trainer = MultitaskTrainer(
    model=multitask_model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
)
trainer.train()

***** Running training *****
  Num examples = 48576
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 18219


Step,Training Loss
500,0.6251
1000,0.5059
1500,0.4483
2000,0.4256
2500,0.3862
3000,0.3978
3500,0.3634
4000,0.3469
4500,0.3898
5000,0.3713


Saving model checkpoint to ./models/multitask_model/checkpoint-3000
Configuration saved in ./models/multitask_model/checkpoint-3000/config.json
Model weights saved in ./models/multitask_model/checkpoint-3000/pytorch_model.bin
Saving model checkpoint to ./models/multitask_model/checkpoint-6000
Configuration saved in ./models/multitask_model/checkpoint-6000/config.json
Model weights saved in ./models/multitask_model/checkpoint-6000/pytorch_model.bin
Saving model checkpoint to ./models/multitask_model/checkpoint-9000
Configuration saved in ./models/multitask_model/checkpoint-9000/config.json
Model weights saved in ./models/multitask_model/checkpoint-9000/pytorch_model.bin
Saving model checkpoint to ./models/multitask_model/checkpoint-12000
Configuration saved in ./models/multitask_model/checkpoint-12000/config.json
Model weights saved in ./models/multitask_model/checkpoint-12000/pytorch_model.bin
Saving model checkpoint to ./models/multitask_model/checkpoint-15000
Configuration saved in .

Step,Training Loss
500,0.6251
1000,0.5059
1500,0.4483
2000,0.4256
2500,0.3862
3000,0.3978
3500,0.3634
4000,0.3469
4500,0.3898
5000,0.3713


Saving model checkpoint to ./models/multitask_model/checkpoint-18000
Configuration saved in ./models/multitask_model/checkpoint-18000/config.json
Model weights saved in ./models/multitask_model/checkpoint-18000/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=18219, training_loss=0.32185289963022323, metrics={'train_runtime': 5409.7432, 'train_samples_per_second': 0.002, 'train_steps_per_second': 3.368, 'total_flos': 9719063944888320.0, 'train_loss': 0.32185289963022323, 'epoch': 3.0})

### **Evaluate Model**

In [21]:
preds_dict = {}
for task_name in ["sentiment", "humor", "hate", "langID"]:
    
    if (task_name=="langID"):
        eval_dataloader = DataLoader(dataset_dict[task_name]["test"], sampler=RandomSampler(dataset_dict[task_name]["test"]) , batch_size=8)
    else:
        eval_dataloader = trainer.get_eval_dataloader(eval_dataset=dataset_dict[task_name]["test"])
    
    eval_dataloader_with_task = DataLoaderWithTaskname(
        task_name,
        eval_dataloader
    )
    
    preds_dict[task_name] = trainer.prediction_loop(
        eval_dataloader_with_task, 
        description=f"Test: {task_name}",
    )

***** Running Test: sentiment *****
  Num examples = 1352
  Batch size = 8


***** Running Test: humor *****
  Num examples = 1352
  Batch size = 8
***** Running Test: hate *****
  Num examples = 1352
  Batch size = 8
***** Running Test: langID *****
  Num examples = 1343
  Batch size = 8


In [23]:
for task_name in ["sentiment", "humor", "hate", "langID"]: 
    labels = preds_dict[task_name].label_ids
    if (task_name=="langID"):
        preds = np.argmax(preds_dict[task_name].predictions, axis=2)

        # Remove ignored index (special tokens)
        tags_ind = ['Sinhala', 'English', 'Sin-Eng', 'Eng-Sin', 'Mixed', 'NameEntity', 'Symbol']
        true_predictions = [
            [tags_ind[p] for (p, l) in zip(preds, label) if l != -100]
            for preds, label in zip(preds, labels)
        ]
        true_labels = [
            [tags_ind[l] for (p, l) in zip(preds, label) if l != -100]
            for preds, label in zip(preds, labels)
        ]

        metric = load_metric("seqeval")
        results = metric.compute(predictions=true_predictions, references=true_labels)
        print("Task ", task_name, " :")
        print("Accuracy", results["overall_accuracy"])
        print("Precision", results["overall_precision"])
        print("Recall", results["overall_recall"])
        print("F1-Score", results["overall_f1"])
    
    else:
        preds = np.argmax(preds_dict[task_name].predictions, axis=1)

        print("Task ", task_name, " :")
        print("Accuracy", accuracy_score(labels, preds))
        print("Precision", precision_score(labels, preds, average='macro'))
        print("Recall", recall_score(labels, preds, average='macro'))
        print("F1 Score", f1_score(labels, preds, average='macro'), "\n")
        print("Classification Report", classification_report(labels, preds), "\n")

Task  sentiment  :
Accuracy 0.7988165680473372
Precision 0.5406693136698809
Recall 0.5279700148215711
F1 Score 0.5314126474975492 

Classification Report               precision    recall  f1-score   support

           0       0.70      0.80      0.75       358
           1       0.87      0.84      0.85       882
           2       0.60      0.47      0.52       105
           3       0.00      0.00      0.00         7

    accuracy                           0.80      1352
   macro avg       0.54      0.53      0.53      1352
weighted avg       0.80      0.80      0.80      1352
 

Task  humor  :
Accuracy 0.9437869822485208
Precision 0.8345089826959109
Recall 0.7622407834101382
F1 Score 0.7929172007384298 

Classification Report               precision    recall  f1-score   support

           0       0.96      0.98      0.97      1240
           1       0.71      0.54      0.62       112

    accuracy                           0.94      1352
   macro avg       0.83      0.76      0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Task  langID  :
Accuracy 0.9636491439473817
Precision 0.9658250725669533
Recall 0.9672023254538474
F1-Score 0.9665132083744382
