In [1]:
import json
args_dict = {
  "n_gpu": 8,
  "model_type": 't5',
  "config_name":"../configs",
  "config_path":"../configs/T5Mem.json",
  "model_name_or_path":"../configs",
  "tokenizer_name":None,  
  "dataset_name":"wikitext",
  "dataset_config_name":"wikitext-103-raw-v1", 
  "max_seq_length": 512 ,
  "output_dir": './test',
  "overwrite_output_dir": True,
  "per_device_train_batch_size": 10,
  "per_device_eval_batch_size": 10,
  "learning_rate": 0.005,
  "num_train_epochs": 1,
  "do_train": True,
  "do_eval":True,
  "logging_steps":500,
  "save_steps":10000,
  "eval_steps":500,
  "report_to":"wandb",
  "evauation_strategy":"steps",
#   "train_file_path":"Data/128train_data.pt",
#   "valid_file_path":"Data/128valid_data.pt",    
}
with open('args.json', 'w') as f:
    json.dump(args_dict, f)

In [2]:
400/6/24

2.777777777777778

In [None]:
%%time
from torch.utils.data import DataLoader
import logging
import os
import sys
import json
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Optional
import torch
import transformers
import numpy as np
from datasets import load_dataset
import datasets
from datasets import load_dataset
from tqdm import tqdm
import math
from itertools import chain
from transformers.integrations import TensorBoardCallback
from huggingface_hub import Repository
from transformers.models.t5.modeling_flax_t5 import shift_tokens_right

from transformers.file_utils import get_full_repo_name
from transformers import (
    CONFIG_MAPPING,
    MODEL_FOR_MASKED_LM_MAPPING,
    AutoTokenizer,
    BatchEncoding,
    HfArgumentParser,
    PreTrainedTokenizerBase,
    T5Config,
    TrainingArguments,
    is_tensorboard_available,
    set_seed,
    T5ForConditionalGeneration,
    T5Tokenizer,
    Trainer    
)

from sklearn.metrics import accuracy_score
from datasets import load_metric
from transformers.trainer_utils import get_last_checkpoint

logger = logging.getLogger(__name__)

@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
    """

    model_name_or_path: Optional[str] = field(
        default=None,
        metadata={
            "help": "The model checkpoint for weights initialization."
            "Don't set if you want to train a model from scratch."
        },
    )
    model_type: Optional[str] = field(
        default=None,
        metadata={"help": "If training from scratch, pass a model type from the list: " }, #+ ", ".join(MODEL_TYPES)},
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    config_path: Optional[str] = field(
        default=None, metadata={"help": "Full path if not the same as model_name"}
    )
    
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
    )
    use_fast_tokenizer: bool = field(
        default=True,
        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
    )
    dtype: Optional[str] = field(
        default="float32",
        metadata={
            "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
        },
    )


@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    """

    dataset_name: Optional[str] = field(
        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
    )
    dataset_config_name: Optional[str] = field(
        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
    )
    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
    validation_file: Optional[str] = field(
        default=None,
        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
    )
    train_ref_file: Optional[str] = field(
        default=None,
        metadata={"help": "An optional input train ref data file for whole word masking in Chinese."},
    )
    validation_ref_file: Optional[str] = field(
        default=None,
        metadata={"help": "An optional input validation ref data file for whole word masking in Chinese."},
    )
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )
    validation_split_percentage: Optional[int] = field(
        default=5,
        metadata={
            "help": "The percentage of the train set used as validation set in case there's no validation split"
        },
    )
    
    train_file_path: Optional[str] = field(
        default= None,
        metadata={"help": "Path for cached train dataset"},
    )
    valid_file_path: Optional[str] = field(
        default= None,
        metadata={"help": "Path for cached valid dataset"},
    )
    
    max_seq_length: Optional[int] = field(
        default=None,
        metadata={
            "help": "The maximum total input sequence length after tokenization and masking. Sequences longer than this will be truncated. Default to the max input length of the model."
        },
    )
    preprocessing_num_workers: Optional[int] = field(
        default=None,
        metadata={"help": "The number of processes to use for the preprocessing."},
    )
    mlm_probability: float = field(
        default=0.15, metadata={"help": "Ratio of tokens to mask for span masked language modeling loss"}
    )
    mean_noise_span_length: float = field(
        default=3.0,
        metadata={"help": "Mean span length of masked tokens"},
    )
    
    
    
def process_datasets(model_args, data_args, training_args, tokenizer, expanded_inputs_length, save_path):
    raw_datasets = None
    if data_args.dataset_name is not None:
        # Downloading and loading a dataset from the hub.
        raw_datasets = load_dataset(
            data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
        )
        if "validation" not in raw_datasets.keys():
            raw_datasets["validation"] = load_dataset(
                data_args.dataset_name,
                data_args.dataset_config_name,
                split=f"train[:{data_args.validation_split_percentage}%]",
                cache_dir=model_args.cache_dir,
            )
            raw_datasets["train"] = load_dataset(
                data_args.dataset_name,
                data_args.dataset_config_name,
                split=f"train[{data_args.validation_split_percentage}%:]",
                cache_dir=model_args.cache_dir,
            )
    else:
        data_files = {}
        if data_args.train_file is not None:
            data_files["train"] = data_args.train_file
        if data_args.validation_file is not None:
            data_files["validation"] = data_args.validation_file
        extension = data_args.train_file.split(".")[-1]
        if extension == "txt":
            extension = "text"
        raw_datasets = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir)

        # If no validation data is there, validation_split_percentage will be used to divide the dataset.
        if "validation" not in raw_datasets.keys():
            raw_datasets["validation"] = load_dataset(
                extension,
                data_files=data_files,
                split=f"train[:{data_args.validation_split_percentage}%]",
                cache_dir=model_args.cache_dir,
            )
            raw_datasets["train"] = load_dataset(
                extension,
                data_files=data_files,
                split=f"train[{data_args.validation_split_percentage}%:]",
                cache_dir=model_args.cache_dir,
            )


    # Preprocessing the datasets.
    # First we tokenize all the texts.
    if training_args.do_train:
        column_names = raw_datasets["train"].column_names
    else:
        column_names = raw_datasets["validation"].column_names
    text_column_name = "text" if "text" in column_names else column_names[0]

    if data_args.max_seq_length is None:
        max_seq_length = tokenizer.model_max_length
        if max_seq_length > 512:
            logger.warning(
                f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
                "Picking 1024 instead. You can change that default value by passing --max_seq_length xxx."
            )
            max_seq_length = 512
    else:
        if data_args.max_seq_length > tokenizer.model_max_length:
            logger.warning(
                f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
                f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
            )
        max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
        

    def tokenize_function(examples):
        # remove empty lines
        examples[text_column_name] =  [x.replace("\n", "").strip() for x in examples[text_column_name]  if x.replace("\n", "").strip()] 
#         import pdb; pdb.set_trace()
        return tokenizer(examples[text_column_name], return_attention_mask=False)

    print("******************** Tokenize data set  ************************")
    with training_args.main_process_first(desc="dataset map tokenization"):
        tokenized_datasets = raw_datasets.map(
            tokenize_function,
            batched=True,
            num_proc=data_args.preprocessing_num_workers,
            remove_columns=column_names,
            load_from_cache_file=not data_args.overwrite_cache,
            desc="Running tokenizer on every text in dataset",
        )
#     import pdb; pdb.set_trace()

    # Main data processing function that will concatenate all texts from our dataset and generate chunks of
    # expanded_inputs_length.
    def group_texts(examples):
        # Concatenate all texts.
        concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
        total_length = len(concatenated_examples[list(examples.keys())[0]])
        # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
        # customize this part to your needs.
        if total_length >= expanded_inputs_length:
            total_length = (total_length // expanded_inputs_length) * expanded_inputs_length
        # Split by chunks of max_len.
        result = {
            k: [t[i : i + expanded_inputs_length] for i in range(0, total_length, expanded_inputs_length)]
            for k, t in concatenated_examples.items()
        }
        return result

        # Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a
        # remainder for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value
        # might be slower to preprocess.
        #
        # To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
        # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map

    with training_args.main_process_first(desc="grouping texts together"):
        tokenized_datasets = tokenized_datasets.map(
            group_texts,
            batched=True,
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
            desc=f"Grouping texts in chunks of {max_seq_length}",
        )

    if training_args.do_train:
        if "train" not in tokenized_datasets:
            raise ValueError("--do_train requires a train dataset")
        train_dataset = tokenized_datasets["train"]
#         if data_args.max_train_samples is not None:
#             train_dataset = train_dataset.select(range(data_args.max_train_samples))

    if training_args.do_eval:
        if "validation" not in tokenized_datasets:
            raise ValueError("--do_eval requires a validation dataset")
        eval_dataset = tokenized_datasets["validation"]

    if "test" not in tokenized_datasets:
        raise ValueError("--do_predict requires a test dataset")
    test_dataset = tokenized_datasets["test"]

    # cach the dataset, so we can load it directly for training
    torch.save(train_dataset, save_path+str(max_seq_length)+'train_data.pt') 
    torch.save(eval_dataset, save_path+str(max_seq_length)+'valid_data.pt')
    torch.save(test_dataset, save_path+str(max_seq_length)+'test_data.pt')
    return train_dataset, eval_dataset, test_dataset
    
    
def create_tokenizer(model_args):
    config = None
#     if model_args.config_name:
#         config = T5Config.from_pretrained(
#             model_args.config_name, cache_dir=model_args.cache_dir
#         )
#     elif model_args.model_name_or_path: 
#         config = T5Config.from_json_file(
#             model_args.model_name_or_path, cache_dir=model_args.cache_dir
#         )

#     else:
#         config = CONFIG_MAPPING[model_args.model_type]()
#         logger.warning("You are instantiating a new config instance from scratch.")  
    config = T5Config.from_json_file(json_file=model_args.config_path)
    print("Config from tokenizer:  ", config)
    print(" config.tokenizer_path",  config.tokenizer_path)
    
    if model_args.tokenizer_name:
        tokenizer = AutoTokenizer.from_pretrained(
            model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
        )
        print("t5-tokenizer")
        print(tokenizer)
        print('****************************************')
        return tokenizer
    elif model_args.model_name_or_path:
        tokenizer = AutoTokenizer.from_pretrained(
            config.tokenizer_path, config=config
        )
        print("wiki-tokenizer")
        print(tokenizer)
        print('****************************************')
        return tokenizer
    else:
        raise ValueError(
            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

@dataclass
class DataCollatorForT5MLM:
    """
    Data collator used for T5 span-masked language modeling.
    It is made sure that after masking the inputs are of length `data_args.max_seq_length` and targets are also of fixed length.
    For more information on how T5 span-masked language modeling works, one can take a look
    at the `official paper <https://arxiv.org/pdf/1910.10683.pdf>`__
    or the `official code for preprocessing <https://github.com/google-research/text-to-text-transfer-transformer/blob/master/t5/data/preprocessors.py>`__ .
    Args:
        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
            The tokenizer used for encoding the data.
        noise_density (:obj:`float`):
            The probability with which to (randomly) mask tokens in the input.
        mean_noise_span_length (:obj:`float`):
            The average span length of the masked tokens.
        input_length (:obj:`int`):
            The expected input length after masking.
        target_length (:obj:`int`):
            The expected target length after masking.
        pad_token_id: (:obj:`int`):
            The pad token id of the model
        decoder_start_token_id: (:obj:`int):
            The decoder start token id of the model
    """

    tokenizer: PreTrainedTokenizerBase
    noise_density: float
    mean_noise_span_length: float
    input_length: int
    target_length: int
    pad_token_id: int

    def __call__(self, examples: List[Dict[str, np.ndarray]]) -> Dict[str, np.ndarray]:
        '''
        1. random_spans_noise_mask
        2. random span noise 
        3. create_sentinel_ids
        4. filter_input_ids
        if max length = 512 then len(example['input_ids']) = 568
        '''
        
        batch = BatchEncoding( # batch.keys() = input_ids
            {k: np.array([examples[i][k] for i in range(len(examples))]) for k, v in examples[0].items()}
        )
        print('---------------------------   samples before  ------------------------------------')
        for i , x in enumerate(batch["input_ids"]):
            print(self.tokenizer.decode(x))
        print("----------------------------------------------------------------------------------")   
        input_ids = batch["input_ids"]
        batch_size, expandend_input_length = input_ids.shape
        mask_indices = np.asarray([self.random_spans_noise_mask(expandend_input_length) for i in range(batch_size)])
        labels_mask = ~mask_indices
        input_ids_sentinel = self.create_sentinel_ids(mask_indices.astype(np.int8))
        labels_sentinel = self.create_sentinel_ids(labels_mask.astype(np.int8))
        inputs = self.filter_input_ids(input_ids, input_ids_sentinel)
        batch["input_ids"] = torch.tensor(inputs)
        labels = self.filter_input_ids(input_ids, labels_sentinel)
        batch["labels"] = torch.tensor(labels)
        if batch["input_ids"].shape[-1] != self.input_length:
            raise ValueError(
                f"`input_ids` are incorrectly preprocessed. `input_ids` length is {batch['input_ids'].shape[-1]}, but should be {self.target_length}."
            )
        if batch["labels"].shape[-1] != self.target_length:
            raise ValueError(
                f"`labels` are incorrectly preprocessed. `labels` length is {batch['labels'].shape[-1]}, but should be {self.target_length}."
            )
        # to check that tokens are correctly proprocessed, one can run `self.tokenizer.batch_decode(input_ids)` and `self.tokenizer.batch_decode(labels)` here...
        print('---------------------------   input_ids  ------------------------------------')
        for i , x in enumerate(batch["input_ids"]):
            print(self.tokenizer.decode(x))
        print("---------------------------------------------------------------------------")
        print('---------------------------------- labels ---------------------------------')
        for i,x in enumerate(batch["labels"]):
            print(self.tokenizer.decode(x))
#         print("self.tokenizer.batch_decode(labels):  ",  self.tokenizer.decode(labels))   
        # to check that tokens are correctly proprocessed, one can run `self.tokenizer.batch_decode(input_ids)` and `self.tokenizer.batch_decode(labels)` here...
#         batch["decoder_input_ids"] = torch.tensor(shift_tokens_right(
#             labels, self.pad_token_id, self.decoder_start_token_id
#         ))


        return batch

    def create_sentinel_ids(self, mask_indices):
        """
        Sentinel ids creation given the indices that should be masked.
        The start indices of each mask are replaced by the sentinel ids in increasing
        order. Consecutive mask indices to be deleted are replaced with `-1`.
        """
        start_indices = mask_indices - np.roll(mask_indices, 1, axis=-1) * mask_indices
        start_indices[:, 0] = mask_indices[:, 0]
        sentinel_ids = np.where(start_indices != 0, np.cumsum(start_indices, axis=-1), start_indices)
        sentinel_ids = np.where(sentinel_ids != 0, (len(self.tokenizer) - sentinel_ids), 0)
        sentinel_ids -= mask_indices - start_indices

        return sentinel_ids

    def filter_input_ids(self, input_ids, sentinel_ids):
        """
        Puts sentinel mask on `input_ids` and fuse consecutive mask tokens into a single mask token by deleting.
        This will reduce the sequence length from `expanded_inputs_length` to `input_length`.
        """
        batch_size = input_ids.shape[0]
        input_ids_full = np.where(sentinel_ids != 0, sentinel_ids, input_ids)
        input_ids = input_ids_full[input_ids_full > 0].reshape((batch_size, -1))
        input_ids = np.concatenate(
            [input_ids, np.full((batch_size, 1), self.tokenizer.eos_token_id, dtype=np.int32)], axis=-1
        )
        return input_ids

    def random_spans_noise_mask(self, length):

        """This function is copy of `random_spans_helper <https://github.com/google-research/text-to-text-transfer-transformer/blob/84f8bcc14b5f2c03de51bd3587609ba8f6bbd1cd/t5/data/preprocessors.py#L2682>`__ .
        Noise mask consisting of random spans of noise tokens.
        The number of noise tokens and the number of noise spans and non-noise spans
        are determined deterministically as follows:
        num_noise_tokens = round(length * noise_density)
        num_nonnoise_spans = num_noise_spans = round(num_noise_tokens / mean_noise_span_length)
        Spans alternate between non-noise and noise, beginning with non-noise.
        Subject to the above restrictions, all masks are equally likely.
        Args:
            length: an int32 scalar (length of the incoming token sequence)
            noise_density: a float - approximate density of output mask
            mean_noise_span_length: a number
        Returns:
            a boolean tensor with shape [length]
        """
        orig_length = length
        num_noise_tokens = int(np.round(length * self.noise_density))
        # avoid degeneracy by ensuring positive numbers of noise and nonnoise tokens.
        num_noise_tokens = min(max(num_noise_tokens, 1), length - 1)
        num_noise_spans = int(np.round(num_noise_tokens / self.mean_noise_span_length))

        # avoid degeneracy by ensuring positive number of noise spans
        num_noise_spans = max(num_noise_spans, 1)
        num_nonnoise_tokens = length - num_noise_tokens

        # pick the lengths of the noise spans and the non-noise spans
        def _random_segmentation(num_items, num_segments):
            """Partition a sequence of items randomly into non-empty segments.
            Args:
                num_items: an integer scalar > 0
                num_segments: an integer scalar in [1, num_items]
            Returns:
                a Tensor with shape [num_segments] containing positive integers that add
                up to num_items
            """
            mask_indices = np.arange(num_items - 1) < (num_segments - 1)
            np.random.shuffle(mask_indices)
            first_in_segment = np.pad(mask_indices, [[1, 0]])
            segment_id = np.cumsum(first_in_segment)
            # count length of sub segments assuming that list is sorted
            _, segment_length = np.unique(segment_id, return_counts=True)
            return segment_length

        noise_span_lengths = _random_segmentation(num_noise_tokens, num_noise_spans)
        nonnoise_span_lengths = _random_segmentation(num_nonnoise_tokens, num_noise_spans)

        interleaved_span_lengths = np.reshape(
            np.stack([nonnoise_span_lengths, noise_span_lengths], axis=1), [num_noise_spans * 2]
        )
        span_starts = np.cumsum(interleaved_span_lengths)[:-1]
        span_start_indicator = np.zeros((length,), dtype=np.int8)
        span_start_indicator[span_starts] = True
        span_num = np.cumsum(span_start_indicator)
        is_noise = np.equal(span_num % 2, 1)
        return is_noise[:orig_length]
          
def compute_input_and_target_lengths(inputs_length, noise_density, mean_noise_span_length):
    """This function is copy of `random_spans_helper <https://github.com/google-research/text-to-text-transfer-transformer/blob/84f8bcc14b5f2c03de51bd3587609ba8f6bbd1cd/t5/data/preprocessors.py#L2466>`__ .
    Training parameters to avoid padding with random_spans_noise_mask.
    When training a model with random_spans_noise_mask, we would like to set the other
    training hyperparmeters in a way that avoids padding.
    This function helps us compute these hyperparameters.
    We assume that each noise span in the input is replaced by extra_tokens_per_span_inputs sentinel tokens,
    and each non-noise span in the targets is replaced by extra_tokens_per_span_targets sentinel tokens.
    This function tells us the required number of tokens in the raw example (for split_tokens())
    as well as the length of the encoded targets. Note that this function assumes
    the inputs and targets will have EOS appended and includes that in the reported length.
    Args:
        inputs_length: an integer - desired length of the tokenized inputs sequence
        noise_density: a float
        mean_noise_span_length: a float
    Returns:
        tokens_length: length of original text in tokens
        targets_length: an integer - length in tokens of encoded targets sequence
    """

    def _tokens_length_to_inputs_length_targets_length(tokens_length):
        num_noise_tokens = int(round(tokens_length * noise_density))
        num_nonnoise_tokens = tokens_length - num_noise_tokens
        num_noise_spans = int(round(num_noise_tokens / mean_noise_span_length))
        # inputs contain all nonnoise tokens, sentinels for all noise spans
        # and one EOS token.
        _input_length = num_nonnoise_tokens + num_noise_spans + 1
        _output_length = num_noise_tokens + num_noise_spans + 1
        return _input_length, _output_length

    tokens_length = inputs_length

    while _tokens_length_to_inputs_length_targets_length(tokens_length + 1)[0] <= inputs_length:
        tokens_length += 1

    inputs_length, targets_length = _tokens_length_to_inputs_length_targets_length(tokens_length)

    # minor hack to get the targets length to be equal to inputs length
    # which is more likely to have been set to a nice round number.
    if noise_density == 0.5 and targets_length > inputs_length:
        tokens_length -= 1
        targets_length -= 1
    return tokens_length, targets_length

                

def create_model(model_args, tokenizer):
    if model_args.config_name:
        config = T5Config.from_pretrained(
            model_args.config_name, cache_dir=model_args.cache_dir, vocab_size=len(tokenizer)
        )
    elif model_args.model_name_or_path:
        config = T5Config.from_pretrained(
            model_args.model_name_or_path, cache_dir=model_args.cache_dir, vocab_size=len(tokenizer)
        )
    else:
        config = CONFIG_MAPPING[model_args.model_type]()
        logger.warning("You are instantiating a new config instance from scratch.")   
    config.vocab_size = len(tokenizer)
    if model_args.model_name_or_path:
        print('................  model T5 from pretrained ...........................')
        print(model_args.model_name_or_path)
        model = T5ForConditionalGeneration(config=config)
    else:
        print('.........................  T5 from scratch   ............................')
        model = T5ForConditionalGeneration(config)
        
        
    model.resize_token_embeddings(len(tokenizer))
    print("*************************************   config ********************************")
    print(config)
    print('********************************************************************************')
    print('*************************************model *************************************')
    print(model)
    print("********************************************************************************")
    return model


def main():
    # 1: Parse argumen
    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
#     if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
#         # If we pass only one argument to the script and it's the path to a json file,
#         # let's parse it to get our arguments.
#         model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
#     else:
#         model_args, data_args, training_args = parser.parse_args_into_dataclasses()
    model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath('args.json'))
        
    tokenizer = create_tokenizer(model_args)
    
    print('training_args')
    print(training_args)
    print('####################################################################')

    # 2: Otput directory is exist and empty
    if (
        os.path.exists(training_args.output_dir)
        and os.listdir(training_args.output_dir)
        and training_args.do_train
        and not training_args.overwrite_output_dir
    ):
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty."
            "Use --overwrite_output_dir to overcome."
        )

    #3:  Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        level="NOTSET",
        datefmt="[%X]",
    )
    # Log on each process the small summary:
    logger = logging.getLogger(__name__)
    log_level = training_args.get_process_log_level()
    logger.setLevel(log_level)
    datasets.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.set_verbosity(log_level)
    transformers.utils.logging.enable_default_handler()
    transformers.utils.logging.enable_explicit_format()
    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    logger.info(f"Training/evaluation parameters {training_args}")
    logger.info(f"Model arguments {model_args}")
    logger.info(f"Data arguments {data_args}")
    
    #4:  Set seed before initializing model.
    set_seed(training_args.seed)

    #5:  Detecting last checkpoint.
    last_checkpoint = None
    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
        last_checkpoint = get_last_checkpoint(training_args.output_dir)
        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
            raise ValueError(
                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
                "Use --overwrite_output_dir to overcome."
            )
        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
            logger.info(
                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
            )

    #6:  create  tokenizer.
    tokenizer = create_tokenizer(model_args)    
    if data_args.max_seq_length is None:
        max_seq_length = tokenizer.model_max_length
        if max_seq_length > 512:
            logger.warning(
                f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
                "Picking 1024 instead. You can change that default value by passing --max_seq_length xxx."
            )
            max_seq_length = 512
    else:
        if data_args.max_seq_length > tokenizer.model_max_length:
            logger.warning(
                f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
                f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
            )
        max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)

    # 7: T5-like span masked language modeling will fuse consecutively masked tokens to a single sentinel token.
    # To ensure that the input length is `max_seq_length`, we need to increase the maximum length
    # according to `mlm_probability` and `mean_noise_span_length`. We can also define the label length accordingly.
    expanded_inputs_length, targets_length = compute_input_and_target_lengths(
        inputs_length=max_seq_length,
        noise_density=data_args.mlm_probability,
        mean_noise_span_length=data_args.mean_noise_span_length,
    )

    #8:  Get datasets if they already exists or download and prepare them for using
    train_dataset, eval_dataset = None, None
    print('data_args.train_file_path:  -------------------------------------------------->', data_args.train_file_path)
    if data_args.train_file_path != None and data_args.valid_file_path != None:
        print('loading data')
        train_dataset  = torch.load(data_args.train_file_path)
        eval_dataset = torch.load(data_args.valid_file_path)
        print('loading done')    
    elif data_args.dataset_name is not None or data_args.train_file is not None: 
        train_dataset, eval_dataset, test_dataset = process_datasets(model_args, 
                                                       data_args, 
                                                       training_args, 
                                                       tokenizer, 
                                                       expanded_inputs_length, 
                                                       "../Data/wiki_hotpot3/")        
    else:
        raise ValueError(
            "No available datasets. You need to load a cashed dataset or process a dataset."
        )  
    
    # 9: create model     
#     model = create_model(model_args, tokenizer) 

    # 10: Data collator
    # This one will take care of randomly masking the tokens.
    data_collator = DataCollatorForT5MLM(
        tokenizer=tokenizer,
        noise_density=data_args.mlm_probability,
        mean_noise_span_length=data_args.mean_noise_span_length,
        input_length=max_seq_length,
        target_length=targets_length,
        pad_token_id=0  # model.config.pad_token_id,
    )
    train_dataloader = DataLoader(
                train_dataset,
                batch_size=1,
                collate_fn=data_collator,
                num_workers=0,
                pin_memory=True,
            )
    for step, batch in enumerate(tqdm(train_dataloader, desc="Training...", position=1)):
        print(f'**********************************(  {step}  )*****************************************')
        if step == 1:
            break   


main()

 This a JAX bug; please report an issue at https://github.com/google/jax/issues
  _warn(f"cloud_tpu_init failed: {repr(exc)}\n This a JAX bug; please report "
Failed to get worker list with error: Timeout was reachedfile ../tokenizer/configs/tokenizer_hotpot_wiki_text/31997/config.json not found
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
[18:14:13] - INFO - __main__ -   Training/evaluation parameters TrainingArguments(
_n_gpu=10,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_steps=500,
evaluation_strategy=IntervalStrategy.NO,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
gradient_checkpointing

Config from tokenizer:   T5Config {
  "architectures": [
    "T5WithLMHeadModel"
  ],
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dropout_rate": 0.1,
  "early_stopping": true,
  "encoder_cross_attention_layer": "Models.T5MemModel:T5MemAttention",
  "encoder_decoder_cross_attention_layer": "Models.T5MemModel:T5MemDecoderAttention",
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "layer_norm_epsilon": 1e-06,
  "linear": true,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 2,
  "num_heads": 8,
  "num_input_sent": 4,
  "num_layers": 2,
  "num_mem_toks": 2,
  "output_hidden_states": true,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "translation_en_to_ru": {
      "early_stopping": true,
      "max_length": 100,
      "num_beams": 4,
      "prefix": "translate English to Russian: "
    },
    "trans

[18:14:13] - DEBUG - urllib3.connectionpool -   Starting new HTTPS connection (1): s3.amazonaws.com:443
[18:14:14] - DEBUG - urllib3.connectionpool -   https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/wikitext/wikitext.py HTTP/1.1" 200 0
[18:14:14] - DEBUG - urllib3.connectionpool -   Starting new HTTPS connection (1): raw.githubusercontent.com:443
[18:14:14] - DEBUG - urllib3.connectionpool -   https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/1.15.1/datasets/wikitext/wikitext.py HTTP/1.1" 200 0
[18:14:14] - DEBUG - urllib3.connectionpool -   Starting new HTTPS connection (1): raw.githubusercontent.com:443
[18:14:15] - DEBUG - urllib3.connectionpool -   https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/1.15.1/datasets/wikitext/dataset_infos.json HTTP/1.1" 200 0
[18:14:15] - INFO - datasets.info -   Loading Dataset Infos from /home/arij/.cache/huggingface/modules/datasets_modules/datasets/wikitext/aa5e094000ec7afeb74c3be92c88

  0%|          | 0/3 [00:00<?, ?it/s]

******************** Tokenize data set  ************************


Running tokenizer on every text in dataset:   0%|          | 0/5 [00:00<?, ?ba/s]

[18:14:19] - INFO - datasets.arrow_dataset -   Caching processed dataset at /home/arij/.cache/huggingface/datasets/wikitext/wikitext-103-raw-v1/1.0.0/aa5e094000ec7afeb74c3be92c88313cd6f132d564c7effd961c10fd47c76f20/cache-beb65b9c2f6203ac.arrow


Running tokenizer on every text in dataset:   0%|          | 0/1802 [00:00<?, ?ba/s]

[18:14:19] - INFO - datasets.arrow_dataset -   Caching processed dataset at /home/arij/.cache/huggingface/datasets/wikitext/wikitext-103-raw-v1/1.0.0/aa5e094000ec7afeb74c3be92c88313cd6f132d564c7effd961c10fd47c76f20/cache-e33c93c9e2476b75.arrow


Running tokenizer on every text in dataset:   0%|          | 0/4 [00:00<?, ?ba/s]

[18:16:09] - INFO - datasets.arrow_dataset -   Caching processed dataset at /home/arij/.cache/huggingface/datasets/wikitext/wikitext-103-raw-v1/1.0.0/aa5e094000ec7afeb74c3be92c88313cd6f132d564c7effd961c10fd47c76f20/cache-c3425b9ba22f9ae2.arrow


Grouping texts in chunks of 512:   0%|          | 0/3 [00:00<?, ?ba/s]

[18:16:10] - INFO - datasets.arrow_dataset -   Caching processed dataset at /home/arij/.cache/huggingface/datasets/wikitext/wikitext-103-raw-v1/1.0.0/aa5e094000ec7afeb74c3be92c88313cd6f132d564c7effd961c10fd47c76f20/cache-ac28271076212055.arrow


Grouping texts in chunks of 512:   0%|          | 0/1166 [00:00<?, ?ba/s]

[18:16:10] - INFO - datasets.arrow_dataset -   Caching processed dataset at /home/arij/.cache/huggingface/datasets/wikitext/wikitext-103-raw-v1/1.0.0/aa5e094000ec7afeb74c3be92c88313cd6f132d564c7effd961c10fd47c76f20/cache-6998f320173c1fcc.arrow


In [None]:
import re
line = '''</s> = valkyria chronicles iii = </s></s> senjō no valkyria 3 : unrecorded chronicles ( japanese : 戦場のヴァルキュリア3, lit. valkyria of the battlefield 3 ), commonly referred to as valkyria chronicles iii outside japan, is a tactical role @-@ playing video game developed by sega and media.vision for the playstation portable. released in january 2011 in japan, it is the third game in the valkyria series. employing the same fusion of tactical and real @-@ time gameplay as its predecessors, the story runs parallel to the first game and follows the " nameless ", a penal military unit serving the nation of gallia during the second europan war who perform secret black operations and are pitted against the imperial unit " calamaty raven ". </s> the game began development in 2010, carrying over a large portion of the work done on valkyria chronicles ii. while it retained the standard features of the series, it also underwent multiple adjustments, such as making the game more forgiving for series newcomers. character designer raita honjou and composer hitoshi sakimoto both returned from previous entries, along with valkyria chronicles ii director takeshi ozawa. a large team of writers handled the script. the game's opening theme was sung by may 'n. </s> it met with positive sales in japan, and was praised by both japanese and western critics. after release, it received downloadable content, along with an expanded edition in november of that year. it was also adapted into manga and an original video animation series. due to low sales of valkyria chronicles ii, valkyria chronicles iii was not localized, but a fan translation compatible with the game's expanded edition was released in 2014. media.vision would return to the franchise with the development of valkyria : azure revolution for the playstation 4. </s></s> = = gameplay = = </s></s> as with previous valkyira chronicles games, valkyria chronicles iii is a tactical role @-@ playing game where players take control of a military unit and take part in missions against enemy forces. stories are told through comic book '''
line.replace("</s>","")

In [None]:
#                                                                                                                                                                                                           -
# --------------------------   samples before  ------------------------------------                                                                                              | 0/1107067 [00:00<?, ?it/s]
# </s> = valkyria chronicles iii = </s></s> senjō no valkyria 3 : unrecorded chronicles ( japanese : 戦場のヴァルキュリア3, lit. valkyria of the battlefield 3 ), commonly referred to as valkyria chronicles
#  iii outside japan, is a tactical role @-@ playing video game developed by sega and media.vision for the playstation portable. released in january 2011 in japan, it is the third game in the valkyria seri
# es. employing the same fusion of tactical and real @-@ time gameplay as its predecessors                                                                                                                   
# ----------------------------------------------------------------------------------                                                                                                                         
# ---------------------------   input_ids  ------------------------------------                                                                                                                              
# </s> = valkyria chronicles iii = </s></s> senjō no valkyria 3 : unrecorded chronicles <extra_id_99> japanese : 戦場のヴァルキュリア<extra_id_98> lit. valkyria of the battlefield 3 ), commonly referred to
#  as valkyria chronicles iii outside japan, is a<extra_id_97> @-@ playing video game developed by sega and media.vision for<extra_id_96> portable. released in january<extra_id_95>1 in japan, it is the thi
# rd game in the valkyria series. employing the same fusion of tactical and real<extra_id_94>@<extra_id_93></s>                                                                                              
# ---------------------------------------------------------------------------                                                                                                                                
# ---------------------------------- labels ---------------------------------                                                                                                                                
# <extra_id_99>(<extra_id_98>3,<extra_id_97> tactical role<extra_id_96> the playstation<extra_id_95> 201<extra_id_94> @-<extra_id_93> time gameplay as its predecessors</s>                                  
# **********************************(  0  )*****************************************                                                                                                                         
                                                                                                                                                                                                           
# ---------------------------   samples before  ------------------------------------                                                                                                                        ,
#  the story runs parallel to the first game and follows the " nameless ", a penal military unit serving the nation of gallia during the second europan war who perform secret black operations and are pitte
# d against the imperial unit " calamaty raven ". </s> the game began development in 2010, carrying over a large portion of the work done on valkyria chronicles ii. while it retained the standard features 
# of the series, it also underwent multiple adjustments, such as making the game more forgiving for series newcomers. character designer raita honjou and                                                    
# ----------------------------------------------------------------------------------                                                                                                                         
# ---------------------------   input_ids  ------------------------------------                                                                                                                              
# , the story runs parallel to the first game and follows the " nameless ", a penal military unit serving the nation of gallia during the second europan war who perform secret black operations and<extra_id
# _99> pitted against the imperial unit<extra_id_98> calamaty raven ". </s> the game began development in 2010<extra_id_97>a large portion of the work done on valkyria chronicles ii. while it retaine<extra
# _id_96> also underwent multiple<extra_id_95>s, such as making the game more forg<extra_id_94>ing for series newcomers. character designer raita honjou<extra_id_93></s>                                    
# ---------------------------------------------------------------------------                                                                                                                                
# ---------------------------------- labels ---------------------------------                                                                                                                                
# <extra_id_99> are<extra_id_98> "<extra_id_97>, carrying over <extra_id_96>d the standard features of the series, it<extra_id_95> adjustment<extra_id_94>iv<extra_id_93> and</s>                            
# **********************************(  1  )*****************************************                                                                                                                         
# ---------------------------   samples before  ------------------------------------                                                                                                                         
# composer hitoshi sakimoto both returned from previous entries, along with valkyria chronicles ii director takeshi ozawa. a large team of writers handled the script. the game's opening theme was sung by m
# ay 'n. </s> it met with positive sales in japan, and was praised by both japanese and western critics. after release, it received downloadable content, along with an expanded edition in november of that 
# year. it was also adapted into manga and an original video animation series. due to low sales of valkyria chronicles ii, valkyria chronicles iii
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# composer<extra_id_99> sakimoto both returned from previous entries, along with valkyria chronicles ii director takeshi o<extra_id_98> large team of writers handled<extra_id_97> script. the game's opening
# <extra_id_96> by may 'n. </s> it met with positive sales in<extra_id_95>d by both japanese and western critics. after<extra_id_94> it received downloadable content, along with an expanded edition in nove
# mber of that year. it was also adapted into manga and an original video animation series. due to low sales of valkyria chronicles ii, valkyria chronicles<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> hitoshi<extra_id_98>zawa. a<extra_id_97> the<extra_id_96> theme was sung<extra_id_95> japan, and was praise<extra_id_94> release,<extra_id_93> iii</s>
# **********************************(  2  )*****************************************                                                                                                               [325/1824]
# ---------------------------   samples before  ------------------------------------
# was not localized, but a fan translation compatible with the game's expanded edition was released in 2014. media.vision would return to the franchise with the development of valkyria : azure revolution f
# or the playstation 4. </s></s> = = gameplay = = </s></s> as with previous valkyira chronicles games, valkyria chronicles iii is a tactical role @-@ playing game where players take control of a military u
# nit and take part in missions against enemy forces. stories are told through comic book @-@ like panels with animated character portraits 
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# was not localize<extra_id_99> but a fan translation compatible with the game's expanded edition was released in 2014. media.vision would return to the<extra_id_98> with the development of valkyria : azur
# e revolution for the playstation<extra_id_97> </s></s> <extra_id_96> gameplay = = </s></s> as with previous<extra_id_95>kyira chronicles games, valkyria chronicles iii is a tactical role @-@ playing game
#  where players take control of<extra_id_94> take part in missions against enemy forces. stories are told through comic book @-@ like panels with animated<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>d,<extra_id_98> franchise<extra_id_97> 4.<extra_id_96>= =<extra_id_95> val<extra_id_94> a military unit and<extra_id_93> character portraits </s>
# **********************************(  3  )*****************************************
# ---------------------------   samples before  ------------------------------------
# , with characters speaking partially through voiced speech bubbles and partially through unvoiced text. the player progresses through a series of linear missions, gradually unlocked as maps that can be f
# reely scanned through and replayed as they are unlocked. the route to each story location on the map varies depending on an individual player's approach : when one option is selected, the other is sealed
#  off to the player. outside missions, the player characters rest in a camp, where units can be customized and character growth occurs. alongside the main story missions are character 
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# , with characters speaking partially through voiced speech bubbles and partially through unvoiced text. the player progresses through a series of linear missions, gradual<extra_id_99> as maps that can be
#  freely<extra_id_98> as they<extra_id_97>. the route to each<extra_id_96> location on the map varies depending on<extra_id_95> individual player's approach : when one option is selected, the other is<ext
# ra_id_94> the player. outside missions, the player characters rest in a camp, where units can be customized and character growth occurs. alongside the main story missions are character<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>ly unlocked<extra_id_98> scanned through and replayed<extra_id_97> are unlocked<extra_id_96> story<extra_id_95> an<extra_id_94> sealed off to<extra_id_93> </s>
# **********************************(  4  )*****************************************
# ---------------------------   samples before  ------------------------------------
# @-@ specific sub missions relating to different squad members. after the game's completion, additional episodes are unlocked, some of them having a higher difficulty than those found in the rest of the g
# ame. there are also love simulation elements related to the game's two main heroines, although they take a very minor role. </s> the game's battle system, the blitz system, is carried over directly from 
# valkyira chronicles. during missions, players select each unit using a top @-@ down perspective of the battlefield map :
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# @-@ specific sub missions relating to different squad members. after the game's completion, additional<extra_id_99> unlocked, some of them having a higher difficulty than those found in the rest of the g
# ame.<extra_id_98> simulation elements related to the game's two main heroines, al<extra_id_97> minor role. </s><extra_id_96>'s battle system, the blitz system,<extra_id_95> carried over directly from val
# kyir<extra_id_94> chronicles. during missions, players select each unit using a top @-@ down perspective of the<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> episodes are<extra_id_98> there are also love<extra_id_97>though they take a very<extra_id_96> the game<extra_id_95> is<extra_id_94>a<extra_id_93> battlefield map :</s>
# **********************************(  5  )*****************************************
# ---------------------------   samples before  ------------------------------------
# once a character is selected, the player moves the character around the battlefield in third @-@ person. a character can only act once per @-@ turn, but characters can be granted multiple turns at the ex
# pense of other characters'turns. each character has a field and distance of movement limited by their action gauge. up to nine characters can be assigned to a single mission. during gameplay, characters 
# will call out if something happens to them, such as their health points ( hp ) getting low or being knocked out by enemy attacks. each character has specific "
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# once a character is selected, the player<extra_id_99> battlefield in third @-@ person. a character can only act once per @-@ turn, but characters can be granted multiple turns at the expense of other cha
# racters<extra_id_98> each character has a field<extra_id_97> distance of movement limited by their action gauge. up to nine characters can be assigned to a single mission. during gameplay, characters<ext
# ra_id_96> call out if something happens to<extra_id_95> as their health points ( hp ) getting low or being knocke<extra_id_94> attacks. each character has specific <extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> moves the character around the<extra_id_98>'turns.<extra_id_97> and<extra_id_96> will<extra_id_95> them, such<extra_id_94>d out by enemy<extra_id_93>"</s>
# **********************************(  6  )*****************************************
# ---------------------------   samples before  ------------------------------------
# potentials ", skills unique to each character. they are divided into " personal potential ", which are innate skills that remain unaltered unless otherwise dictated by the story and can either help or im
# pede a character, and " battle potentials ", which are grown throughout the game and always grant boons to a character. to learn battle potentials, each character has a unique " masters table ", a grid @
# -@ based skill table that can be used to acquire and link different skills. characters also have special abilities that grant them temporary boosts
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# potentials ", skills unique to each character. they are divided into " personal potential ", which are innate skills that remain unaltered unless otherwise dictated by the story and can either help or im
# pede a character, and " battle potentials ", which are grown throughout the game and<extra_id_99> boons to a character. to<extra_id_98>, each character has a<extra_id_97> masters table ", a grid<extra_id
# _96> table that can be used to acquire and link different skills. characters<extra_id_95> abilities<extra_id_94> grant them temporary boost<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> always grant<extra_id_98> learn battle potentials<extra_id_97> unique "<extra_id_96> @-@ based skill<extra_id_95> also have special<extra_id_94> that<extra_id_93>s</s>
# **********************************(  7  )*****************************************
# ---------------------------   samples before  ------------------------------------
# on the battlefield : kurt can activate " direct command " and move around the battlefield without depleting his action point gauge, the character reila can shift into her " valkyria form " and become inv
# incible, while imca can target multiple enemy units with her heavy weapon. </s> troops are divided into five classes : scouts, shocktroopers, engineers, lancers and armored soldier. troopers can switch c
# lasses by changing their assigned weapon. changing class does not greatly affect the stats gained while in a previous class. with victory in
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# on the battlefield <extra_id_99> kurt can activate " direct command " and<extra_id_98> battlefield without depleting his action point gauge, the character reila can shift into her " valkyria form " and b
# ecome invincible, while imca can target multiple enemy units with her heavy<extra_id_97>. </s> troops are divided into five classes : scouts, shocktroopers,<extra_id_96>, lancers and armored soldier. tro
# opers can<extra_id_95> changing their assigned weapon. changing class does not greatly affect the stats gaine<extra_id_94> in a previous class<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>:<extra_id_98> move around the<extra_id_97> weapon <extra_id_96> engineers <extra_id_95> switch classes by<extra_id_94>d while<extra_id_93>. with victory in</s>
# **********************************(  8  )*****************************************
# ---------------------------   samples before  ------------------------------------
# battle, experience points are awarded to the squad, which are distributed into five different attributes shared by the entire squad, a feature differing from early games'method of distributing to differe
# nt unit types. </s></s> = = plot = = </s></s> the game takes place during the second europan war. gallian army squad 422, also known as " the nameless ", are a penal military unit composed of criminals, 
# foreign deserters, and military offenders whose real names are erased from the records and thereon officially referred to by numbers 
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# battle, experience points are awarded to the squad, which are distributed<extra_id_99> five different attributes shared by the entire squad, a feature differing from early games'method of distributing to
#  different unit types. </s></s> = = plot = <extra_id_98> game takes place<extra_id_97> the second europan war. gallian army squad 422, also known as " the nameless ", are a penal military unit composed o
# f criminals, foreign deserters, and military<extra_id_96> real names are erased<extra_id_95> the<extra_id_94> and thereon<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> into<extra_id_98>= </s></s> the<extra_id_97> during<extra_id_96> offenders whose<extra_id_95> from<extra_id_94> records<extra_id_93> officially referred to by numbers </s>
# **********************************(  9  )*****************************************
# ---------------------------   samples before  ------------------------------------
# . ordered by the gallian military to perform the most dangerous missions that the regular army and militia will not do, they are nevertheless up to the task, exemplified by their motto, altaha abilia, me
# aning " always ready. " the three main characters are no.7 kurt irving, an army officer falsely accused of treason who wishes to redeem himself ; ace no.1 imca, a female darcsen heavy weapons specialist 
# who seeks revenge against the valkyria who destroyed her home ; and no.13 riela marcellis, 
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# . ordered by the galli<extra_id_99> military to perform<extra_id_98> army and<extra_id_97> not do, they are nevertheless up to the task, exemplified by their motto, altaha abilia, meaning " always ready.
#  <extra_id_96> main characters are no.7 kurt irving, an army officer falsely accused of treason who wishes to redeem himself <extra_id_95>.1 imca<extra_id_94> a female darcsen heavy weapons specialist wh
# o seeks revenge against the valkyria who destroyed her home ; and no.13 riela marcellis <extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>an<extra_id_98> the most dangerous missions that the regular<extra_id_97> militia will<extra_id_96>" the three<extra_id_95>; ace no<extra_id_94>,<extra_id_93>, </s>
# **********************************(  10  )*****************************************                                                                                                              [221/1824]
# ---------------------------   samples before  ------------------------------------
# a seemingly jinxed young woman who is unknowingly a descendant of the valkyria. together with their fellow squad members, these three are tasked to fight against a mysterious imperial unit known as calam
# ity raven, consisting of mostly darcsen soldiers. </s> as the nameless officially do not exist, the upper echelons of the gallian army exploit the concept of plausible deniability in order to send them o
# n missions that would otherwise make gallia lose face in the war. while at times this works to their advantage, such as a successful incursion into imperial territory
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# a seeming<extra_id_99> jinxed young woman who is unknowingly a descendant of the valkyria. together with their fellow squad members,<extra_id_98> to fight against a mysterious imperial unit known as cala
# mity raven, consisting of mostly d<extra_id_97>en soldiers. </s> as the nameless officially do<extra_id_96>, the upper echelons of the gallian army exploit the concept of plausible deniability in order t
# o send them on missions that would otherwise<extra_id_95>a lose face in<extra_id_94> at times this works to their advantage, such as a successful incursion into<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>ly<extra_id_98> these three are tasked<extra_id_97>arcs<extra_id_96> not exist <extra_id_95> make galli<extra_id_94> the war. while<extra_id_93> imperial territory</s>
# **********************************(  11  )*****************************************
# ---------------------------   samples before  ------------------------------------
# , other orders cause certain members of the 422nd great distress. one such member, gusurg, becomes so enraged that he abandons his post and defects into the ranks of calamity raven, attached to the ideal
#  of darcsen independence proposed by their leader, dahau. at the same time, elements within gallian army command move to erase the nameless in order to protect their own interests. hounded by both allies
#  and enemies, and combined with the presence of a traitor within their ranks,
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# , other orders cause certain members of the 422nd great distress. one such member, gus<extra_id_99>, becomes so enraged that he<extra_id_98> his post and defects into the ranks of calamity raven,<extra_i
# d_97>sen independence proposed by their leader,<extra_id_96>hau. at the same time, elements within gallian army command<extra_id_95> to erase the nameless in order to protect their own interests<extra_id
# _94> hounded by both allies and enemies, and combined with the presence of a traitor within their<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>urg<extra_id_98> abandons<extra_id_97> attached to the ideal of darc<extra_id_96> da<extra_id_95> move<extra_id_94>.<extra_id_93> ranks,</s>
# **********************************(  12  )*****************************************
# ---------------------------   samples before  ------------------------------------
# the 422nd desperately move to keep themselves alive while at the same time fight to help the gallian war effort. this continues until the nameless's commanding officer, ramsey crowe, who had been kept un
# der house arrest, is escorted to the capital city of randgriz in order to present evidence exonerating the weary soldiers and expose the real traitor, the gallian general that had accused kurt of treason
# . </s> partly due to these events, and partly due to the major losses in manpower gallia suffers towards the end of the war
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# the 42<extra_id_99> keep<extra_id_98> same time fight to help the gallian war effort. this<extra_id_97> until the nameless's commanding officer, ramsey crowe, who had<extra_id_96> kept under house arrest
# , is escorted to the capital city of randgriz in order to present evidence exonerating the weary soldiers and expose the real traitor, the gallian general that had accused kurt of treason <extra_id_95> <
# /s> partly due to these events, and partly due to the major losses in manpower<extra_id_94>a suffers towards the end of the<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>2nd desperately move to<extra_id_98> themselves alive while at the<extra_id_97> continues<extra_id_96> been<extra_id_95>.<extra_id_94> galli<extra_id_93> war</s>
# **********************************(  13  )*****************************************
# ---------------------------   samples before  ------------------------------------
# with the empire, the nameless are offered a formal position as a squad in the gallian army rather than serve as an anonymous shadow force. this is short @-@ lived, however, as following maximilian's defe
# at, dahau and calamity raven move to activate an ancient valkyrian super weapon within the empire, kept secret by their benefactor. without the support of maximilian or the chance to prove themselves in 
# the war with gallia, it is dahau's last trump card in creating a new darcsen nation. as an
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# with the empire, the nameless are offered a formal position as a squad in the gallian army rather than serve as an anonymous shadow force. this is short @-@ lived,<extra_id_99>,<extra_id_98> defeat, daha
# u and calamity raven move to activate an ancient valkyrian super weapon<extra_id_97> the empire, kept secret by their benefactor. without the support of<extra_id_96> or the chance to prove themselves in 
# the war with gallia<extra_id_95> it is dahau's last trump card in creating a new d<extra_id_94> <extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> however <extra_id_98> as following maximilian's<extra_id_97> within<extra_id_96> maximilian<extra_id_95>,<extra_id_94>arcsen nation<extra_id_93>. as an</s>
# **********************************(  14  )*****************************************
# ---------------------------   samples before  ------------------------------------
# armed gallian force invading the empire just following the two nations'cease @-@ fire would certainly wreck their newfound peace, kurt decides to once again make his squad the nameless, asking crowe to l
# ist himself and all under his command as killed @-@ in @-@ action. now owing allegiance to none other than themselves, the 422nd confronts dahau and destroys the valkyrian weapon. each member then goes t
# heir separate ways in order to begin their lives anew. </s></s> = = development = = </s></s>
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# armed gallian force invading the empire just following the two nations<extra_id_99> fire would certainly wreck their newfound peace <extra_id_98> decides to once again make his squad the nameless, asking
#  crowe to list himself and all under his command<extra_id_97> @-@ action.<extra_id_96> owing allegiance to none other than themselves, the 422nd confronts dahau and destroys the valkyrian weapon. each me
# mber then goes their<extra_id_95> ways in order to<extra_id_94> their lives anew. </s></s> = = development = = </s><extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>'cease @-@<extra_id_98>, kurt<extra_id_97> as killed @-@ in<extra_id_96> now<extra_id_95> separate<extra_id_94> begin<extra_id_93></s></s>
# **********************************(  15  )*****************************************
# ---------------------------   samples before  ------------------------------------
# concept work for valkyria chronicles iii began after development finished on valkyria chronicles ii in early 2010, with full development beginning shortly after this. the director of valkyria chronicles 
# ii, takeshi ozawa, returned to that role for valkyria chronicles iii. development work took approximately one year. after the release of valkyria chronicles ii, the staff took a look at both the popular 
# response for the game and what they wanted to do next for the series. like its predecessor, valkyria chronicles iii was developed for playstation portable : this was due to
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# concept work for valkyria chronicles iii began after development <extra_id_99>s ii in early 2010, with<extra_id_98> shortly after this. the director of valkyria chronicle<extra_id_97>i, takeshi ozawa, re
# turned to that role for valkyria chronicles iii. development work took approximately one year. after the release of<extra_id_96> ii, the staff took a look at both the popular response<extra_id_95> they w
# anted<extra_id_94> do next for the series. like its predecessor, valkyria chronicles iii was developed for playstation portable : this was<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>finished on valkyria chronicle<extra_id_98> full development beginning<extra_id_97>s i<extra_id_96> valkyria chronicles<extra_id_95> for the game and what<extra_id_94> to<extra_id_93> due to
# </s>
# **********************************(  16  )*****************************************
# ---------------------------   samples before  ------------------------------------
# the team wanting to refine the mechanics created for valkyria chronicles ii, and they had not come up with the " revolutionary " idea that would warrant a new entry for the playstation 3. speaking in an 
# interview, it was stated that the development team considered valkyria chronicles iii to be the series'first true sequel : while valkyria chronicles ii had required a large amount of trial and error duri
# ng development due to the platform move, the third game gave them a chance to improve upon the best parts of valkyria chronicles ii due to being on the same platform. in addition to
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# the team wanting to refine the mechanics created for valkyria chronicles ii, and they had not come up with the " revolutionary " idea that<extra_id_99> warrant <extra_id_98> playstation 3.<extra_id_97>, 
# it was stated that the development team considered valkyria chronicles iii to be the series'first true sequel : while valkyria chronicles ii had required a large amount of trial and error during developm
# ent due to the platform move, the third game gave them a chance<extra_id_96> upon the best parts of valkyria chronicles ii<extra_id_95> to being<extra_id_94> same platform<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> would<extra_id_98>a new entry for the<extra_id_97> speaking in an interview<extra_id_96> to improve<extra_id_95> due<extra_id_94> on the<extra_id_93>. in addition to</s>
# **********************************(  17  )*****************************************
# ---------------------------   samples before  ------------------------------------
# sega staff from the previous games, development work was also handled by media.vision. the original scenario was written kazuki yamanobe, while the script was written by hiroyuki fujii, koichi majima, ki
# shiko miyagi, seiki nagakawa and takayuki shouji. its story was darker and more somber than that of its predecessor. </s> the majority of material created for previous games, such as the blitz system and
#  the design of maps, was carried over. alongside this, improvements were made to the game's graphics
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# sega staff from the previous games, development work was also handled by media.vision. the original scenario was written kazuki yamanobe<extra_id_99> written by hiroyuki fuji<extra_id_98> majima, kishiko
#  miyagi, seiki nagakawa and takayuki shouji. its story was darker and more somber<extra_id_97> its predecessor. </s> the majority of material created for previous games,<extra_id_96> system and the desig
# n of<extra_id_95>s, was<extra_id_94> over. alongside this, improvements were made to the game's graphic<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>, while the script was<extra_id_98>i, koichi<extra_id_97> than that of<extra_id_96> such as the blitz<extra_id_95> map<extra_id_94> carried<extra_id_93>s</s>
# **********************************(  18  )*****************************************
# ---------------------------   samples before  ------------------------------------
# and some elements were expanded, such as map layouts, mission structure, and the number of playable units per mission. a part of this upgrade involved creating unique polygon models for each character's 
# body. in order to achieve this, the cooperative elements incorporated into the second game were removed, as they took up a large portion of memory space needed for the improvements. they also adjusted th
# e difficulty settings and ease of play so they could appeal to new players while retaining the essential components of the series'gameplay. the newer systems were decided upon
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# and<extra_id_99> expanded, such as map layouts,<extra_id_98> structure, and<extra_id_97> units per mission. a part of this upgrade involved creating unique polygon models for each character's body. in or
# der to achieve this, the cooperative elements incorporated into the second game were removed, as they<extra_id_96> a large portion of memory space needed for the improvements. they also adjusted the diff
# iculty settings and ease of play so they could appeal<extra_id_95> while retaining the essential component<extra_id_94> series'gameplay. the newer systems were decided<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> some elements were<extra_id_98> mission<extra_id_97> the number of playable<extra_id_96> took up<extra_id_95> to new players<extra_id_94>s of the<extra_id_93> upon</s>
# **********************************(  19  )*****************************************
# ---------------------------   samples before  ------------------------------------
# early in development. the character designs were done by raita honjou, who had worked on the previous valkyria chronicles games. when creating the nameless squad, honjou was faced with the same problem h
# e had had during the first game : the military uniforms essentially destroyed character individuality, despite him needing to create unique characters the player could identify while maintaining a sense 
# of reality within the valkyria chronicles world. the main color of the nameless was black. as with the previous valkyria games, valkyria chronicles iii used the canvas graphics engine. the anime opening 
# was produced by
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# early in development. the character<extra_id_99> done by rait<extra_id_98>honjou, who had worked on the previous valkyria chronicle<extra_id_97>. when creating the nameless squad, h<extra_id_96> was face
# d with the same problem he had had during the first game : the military uniforms essentially destroyed character individuality, despite him needing<extra_id_95> unique characters the player could identif
# y while maintaining a sense of reality within the valkyria chronicles world<extra_id_94> the nameless was black. as with the previous valkyria games, valkyria chronicles iii used the canvas graphics engi
# ne. the anime opening<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> designs were<extra_id_98>a <extra_id_97>s games<extra_id_96>onjou<extra_id_95> to create<extra_id_94>. the main color of<extra_id_93> was produced by</s>
# **********************************(  20  )*****************************************
# ---------------------------   samples before  ------------------------------------
# production i.g. </s></s> = = = music = = = </s></s> the music was composed by hitoshi sakimoto, who had also worked on the previous valkyria chronicles games. when he originally heard about the project, 
# he thought it would be a light tone similar to other valkyria chronicles games, but found the themes much darker than expected. an early theme he designed around his original vision of the project was re
# jected. he redid the main theme about seven times through the music production due to this need to reassess the game. the main theme was initially recorded using orchestra
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# production i.g. </s></s> = = = music = = = </s></s> the music was composed<extra_id_99> who had also worked<extra_id_98> the previous<extra_id_97> chronicles games. when he originally heard about the pro
# ject<extra_id_96> would be a light tone similar to other valkyria chronicles games<extra_id_95>,<extra_id_94>s much darker than expected. an early theme he designed around his original vision of the proj
# ect was rejected. he redid the main theme about seven times through the music production due to this need to reassess the game. the main theme was initially recorded using<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> by hitoshi sakimoto,<extra_id_98> on<extra_id_97> valkyria<extra_id_96>, he thought it<extra_id_95> <extra_id_94> but found the theme<extra_id_93> orchestra</s>
# **********************************(  21  )*****************************************
# ---------------------------   samples before  ------------------------------------
# , then sakimoto removed elements such as the guitar and bass, then adjusted the theme using a synthesizer before redoing segments such as the guitar piece on their own before incorporating them into the 
# theme. the rejected main theme was used as a hopeful tune that played during the game's ending. the battle themes were designed around the concept of a " modern battle " divorced from a fantasy scenario 
# by using modern musical instruments, constructed to create a sense of atonality. while sakimoto was most used to working with synthesized music, he felt that
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# , then sakimoto removed elements such as the guitar and bass, then adjusted the theme using a synthesizer before redoing segments such as the guitar piece on their own before incorporating them into the 
# theme.<extra_id_99> rejected main theme was used as a hopeful tune that played during the game's ending.<extra_id_98> were designed around the<extra_id_97> a " modern<extra_id_96>" divorced<extra_id_95> 
# using modern musical instruments, constructed to create a sense of atonality. while sakimoto was most used to working<extra_id_94> synthesized music<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> the<extra_id_98> the battle themes<extra_id_97> concept of<extra_id_96> battle <extra_id_95> from a fantasy scenario by<extra_id_94> with<extra_id_93>, he felt that</s>
# **********************************(  22  )*****************************************
# ---------------------------   samples before  ------------------------------------
# he needed to incorporate live instruments such as orchestra and guitar. the guitar was played by mitsuhiro ohta, who also arranged several of the later tracks. the game's opening theme song, " if you wis
# h for... " ( もしも君が願うのなら, moshimo kimi ga negauno nara ), was sung by japanese singer may 'n. its theme was the reason soldiers fought, in particular their wish to protect what was precious to t
# hem rather than a sense of responsibility or duty  
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# he needed to incorporate live instruments such as orchestra and guitar. the guitar was played by mitsuhiro o<extra_id_99>ta, who also<extra_id_98>. the game's opening theme song,<extra_id_97>" if<extra_i
# d_96> for... " ( もしも君が願うのなら, moshimo kimi ga negauno nara<extra_id_95> sung by japanese singer may 'n. its theme was the reason soldiers fought <extra_id_94> particular their wish to protect wh
# at was precious to them rather than a sense of responsibility or <extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>h<extra_id_98> arranged several of the later tracks <extra_id_97> <extra_id_96> you wish<extra_id_95> ), was<extra_id_94>, in<extra_id_93>duty </s>
# **********************************(  23  )*****************************************                                                                                                                        
# ---------------------------   samples before  ------------------------------------                                                                                                                         
# . its lyrics were written by seiko fujibayashi, who had worked on may 'n on previous singles. </s></s> = = = release = = = </s></s> in september 2010, a teaser website was revealed by sega, hinting at a 
# new valkyria chronicles game. in its september issue, famitsu listed that senjō no valkyria 3 would be arriving on the playstation portable. its first public appearance was at the 2010 tokyo game show ( 
# tgs ), where a
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# . its lyrics were written by <extra_id_99>eiko fujibayashi, who had worked on may 'n on previous singles. </s></s><extra_id_98>=<extra_id_97> = </s></s> in september 20<extra_id_96>, a teaser website was
#  revealed by sega, hinting <extra_id_95>a new valkyria chronicles game. in its september issue, famitsu listed that senjō no valkyria 3 would be arriving on the playstation portable. its first public app
# earance was at the 2010 tokyo game show ( tgs <extra_id_94> where<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>s<extra_id_98> = = <extra_id_97> release = =<extra_id_96>10<extra_id_95>at <extra_id_94>),<extra_id_93> a</s>
# **********************************(  24  )*****************************************
# ---------------------------   samples before  ------------------------------------
# demo was made available for journalists and attendees. during the publicity, story details were kept scant so as not to spoil too much for potential players, along with some of its content still being in
#  flux at the time of its reveal. to promote the game and detail the story leading into the game's events, an episodic flash visual novel written by fujii began release in january 2011. the game was relea
# sed january 27, 2011. during an interview, the development team said that the game had the capacity for downloadable content
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# demo was made available for journalists and attendees. during the<extra_id_99>, story details were kept scant so as not to spoil too much for potential players, along<extra_id_98> still being in flux a<e
# xtra_id_97> reveal. to promote the<extra_id_96> leading<extra_id_95> game's events,<extra_id_94> episodic flash visual novel written by fujii began release in january 2011. the game was released january 
# 27, 2011. during an interview, the development team said that the game had the capacity for downloadable<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> publicity<extra_id_98> with some of its content<extra_id_97>t the time of its<extra_id_96> game and detail the story<extra_id_95> into the<extra_id_94> an<extra_id_93> content</s>
# **********************************(  25  )*****************************************
# ---------------------------   samples before  ------------------------------------
# ( dlc ), but that no plans were finalized. multiple dlc maps, featuring additional missions and recruitable characters, were released between february and april 2011. an expanded edition of the game, val
# kyria chronicles iii extra edition, released on november 23, 2011. packaged and sold at a lower price than the original, extra edition game with seven additional episodes : three new, three chosen by sta
# ff from the game's dlc, and one made available as a pre @-@ order bonus. people
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# (<extra_id_99>, but that no plans were finalized. multiple dlc maps <extra_id_98>ing additional missions and recruitable characters, were released between february and<extra_id_97>11. an expanded edition
#  of the game, valkyria chronicles<extra_id_96>, released on november 23, 2011.<extra_id_95>d and sold at a lower price than the original, extra edition game with seven additional episodes : three new, th
# ree chosen by staff from the game's dlc,<extra_id_94> one made available as a pre @-<extra_id_93></s> 
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> dlc )<extra_id_98>, featur<extra_id_97> april 20<extra_id_96> iii extra edition <extra_id_95> package<extra_id_94> and<extra_id_93>@ order bonus. people</s>
# **********************************(  26  )*****************************************                                                                                                               [10/1824]
# ---------------------------   samples before  ------------------------------------
# who also owned the original game could transfer their save data between versions. </s> unlike its two predecessors, valkyria chronicles iii was not released in the west. according to sega, this was due t
# o poor sales of valkyria chronicles ii and the general unpopularity of the psp in the west. an unofficial fan translation patch began development in february 2012 : players with a copy of valkyria chroni
# cles iii could download and apply the patch, which translated the game's text into english. compatible with the extra edition, the patch was released in january 2014. 
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# who also owned the original game could transfer their save data between versions <extra_id_99></s> unlike its two predecessor<extra_id_98> chronicles iii was<extra_id_97> west. according to<extra_id_96>,
# <extra_id_95> due to poor sales of valkyria chronicles ii and the general unpopularity of the psp in the west. an unofficial<extra_id_94> development in february 2012 : players with a copy of valkyria ch
# ronicles iii could download and apply the patch, which translated the game's text into english. compatible with the extra edition, the patch was released in january 2014 <extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>. <extra_id_98>s, valkyria<extra_id_97> not released in the<extra_id_96> sega <extra_id_95> this was<extra_id_94> fan translation patch began<extra_id_93>. </s>
# **********************************(  27  )*****************************************
# ---------------------------   samples before  ------------------------------------
# </s></s> = = reception = = </s></s> on its day of release in japan, valkyria chronicles iii topped both platform @-@ exclusive and multi @-@ platform sales charts. by early february, the game sold 102 @,
# @ 779 units, coming in second overall to the last story for the wii. by the end of the year, the game had sold just over 152 @,@ 500 units. </s> famitsu enjoyed the story, and were particularly pleased w
# ith the improvements to gameplay. japanes
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# </s></s> = = reception = = </s></s> on its<extra_id_99> release in japan <extra_id_98> chronicle<extra_id_97> both platform @-@ exclusive and multi @-@ platform sales charts<extra_id_96> by early februar
# y, the game sold 102 @,@ 779 units, coming in second overall to the last story for the wii. by the end of the year<extra_id_95> sold just over 152 @,@ 500 units<extra_id_94>. </s> famitsu enjoyed the sto
# ry, and were particularly pleased with the improvements<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> day of<extra_id_98>, valkyria<extra_id_97>s iii topped<extra_id_96>.<extra_id_95>, the game had<extra_id_94> <extra_id_93> to gameplay. japanes</s>
# **********************************(  28  )*****************************************
# ---------------------------   samples before  ------------------------------------
# e gaming site game watch impress, despite negatively noting its pacing and elements recycled from previous games, was generally positive about its story and characters, and found its gameplay entertainin
# g despite off @-@ putting difficulty spikes. 4gamer.net writer naohiko misuosame, in a " play test " article based on the game's psn demo, felt that valkyria chronicles iii provided a " profound feeling 
# of closure " for the valkyria chronicles series. he praised its gameplay despite annoying limitations to aspects such as
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# e gaming site game watch impress, despite negative<extra_id_99> noting its pacing and elements recycled from previous games<extra_id_98> positive about its story and characters, and found its gameplay en
# tertaining despite off @-<extra_id_97>s. 4gamer.net writer naohiko misuos<extra_id_96> in a " play test <extra_id_95> based on the game's psn demo<extra_id_94> that valkyria chronicles iii provided a " p
# rofound feeling of closure " for the valkyria chronicles series. he praised its gameplay despite annoying limitations to aspects such<extra_id_93></s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99>ly<extra_id_98>, was generally<extra_id_97>@ putting difficulty spike<extra_id_96>ame,<extra_id_95>" article<extra_id_94>, felt<extra_id_93> as</s>
# **********************************(  29  )*****************************************
# ---------------------------   samples before  ------------------------------------
# special abilities, and positively noted its shift in story to a tone similar to the first game. </s> playstation official magazine - uk praised the story's blurring of gallia's moral standing, art style,
#  and most points about its gameplay, positively noting the latter for both its continued quality and the tweaks to balance and content. its one major criticism were multiple difficulty spikes, something 
# that had affected the previous games. heath hindman of gaming website playstation lifestyle praised the addition of non @-@ linear elements and improvements or removal of mechanics from valkyria chronicl
# es i
# ----------------------------------------------------------------------------------
# ---------------------------   input_ids  ------------------------------------
# special abilities, and positively noted its shift in story to a tone similar to<extra_id_99>. </s> playstation official magazine -<extra_id_98> praised the story's blurring of<extra_id_97>a's moral stand
# ing, art style, and most points about its gameplay <extra_id_96> noting the latter for both its continued quality and the tweaks to balance and content. its<extra_id_95>s, something that had affected the
#  previous games. heath hindman of gaming website playstation lifestyle praised the addition of non @-<extra_id_94> elements and improvements or removal of mechanics from valkyria chronicle<extra_id_93></
# s>
# ---------------------------------------------------------------------------
# ---------------------------------- labels ---------------------------------
# <extra_id_99> the first game<extra_id_98> uk<extra_id_97> galli<extra_id_96>, positively<extra_id_95> one major criticism were multiple difficulty spike<extra_id_94>@ linear<extra_id_93>s i</s>
# **********************************(  30  )*****************************************


In [None]:
32_000+3+100

In [None]:
x= ()
x += (5,)
x += (5,)

x

In [None]:
tokenizer

In [1]:
32000*32000

1024000000