# Tokenize LibriTTS-R Mimi for target LM

For our dataset, we currently simply use the Fish Speech TTS format:
- Text-only data formatted using [ChatML](https://gist.github.com/edwardzjl/8df07c1f7140c9a3e2f48d33a8032090) as a separate sequence "above" the audio code stream
- During sections where audio is being modeled, text stream 0 predicts the first semantic token index $n$ of the 8 Mimi residual codes as special token `<|semantic:n|>`
- For audio, "semantic" (neural, there's not a strong distinction between) codes (from Mimi) padded with 0s during text sections

It's possible this tokenization strategy can be improved, e.g. in [Defossez et al. 2024](https://arxiv.org/html/2410.00037v2#S3.SS4.SSS4) with the base transformer predicting the Whisper-timestamped word timings as an "inner monologue" and a delay between codebook timesteps. lol i'll do it later

In [1]:
from datasets import load_dataset, DatasetDict, concatenate_datasets

# If creating the libritts dataset for the first time
# from datasets import load_from_disk 
# dataset = load_from_disk("encoded_dataset")
# train_clean_100 = load_from_disk("encoded_libritts/train.clean.100/")
# train_clean_360 = load_from_disk("encoded_libritts/train.clean.360/")
# dev_clean = load_from_disk("encoded_libritts/dev.clean")
# test_clean = load_from_disk("encoded_libritts/test.clean")
# full_train = concatenate_datasets([train_clean_100, train_clean_360])
dataset = load_dataset("jkeisling/libritts-r-mimi")
full_train = concatenate_datasets([dataset["train.clean.100"], dataset["train.clean.360"]])

dataset = DatasetDict({
    "train": full_train,
    "val": dataset["dev.clean"],
    "test": dataset["test.clean"]
})
dataset = dataset.with_format("torch")
dataset = dataset.remove_columns(["path", "chapter_id", "text_original"])
dataset = dataset.rename_column(original_column_name="text_normalized", new_column_name="normalized_text")

  from .autonotebook import tqdm as notebook_tqdm


**NOTE! This is PATH DEPENDENT on ADDING THE SEMANTIC TOKENS TO THE TOKENIZER EARLIER using `create_smoltts_init.ipynb`. DO NOT SKIP THIS STEP OR THE MODEL WILL BE IRRETRIEVABLY BROKEN! YOU HAVE BEEN WARNED.**

In [2]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("../checkpoints/smoltts_init")
tokenizer.use_default_system_prompt = False

Check this carefully: for SmolTTS, it should be 51200.

In [3]:
len(tokenizer), tokenizer.vocab_size

(51200, 49152)

Please manually verify the text is done correctly.

In [4]:
# Test the tokenizer by encoding and decoding some example text
example_text = "This is a test sentence."
encoded = tokenizer(example_text, return_tensors="pt")
decoded = tokenizer.decode(encoded['input_ids'][0])

# Print the results
dataset["test"][0]

{'normalized_text': 'I felt it in my bones when I woke this morning that something splendid was going to turn up.',
 'speaker_id': '4446',
 'id': '4446_2275_000002_000009',
 'codes': tensor([[1049, 1268,  549, 1324,  668, 1538, 1593,   95,  629, 1281, 1281,  680,
           536,  536,  230, 1018, 1117,  244,  507,  997, 1399,  640, 1591, 1967,
          1161,  690,   67, 1772,  830, 1612,  561,  119, 1052,  880, 1029, 1532,
          1161, 1344, 1109,    6, 1001,  382,  596,   99, 1726, 2030,  531,  616,
           367, 1271, 1868,  978,  729,  396, 1544],
         [1470, 1879,  712,  283,  220,  137, 1610,  263,  531, 1845, 1428, 1132,
           359, 1904, 1458, 1876,  895,  149,  190,  116,  603,  786, 1884, 1455,
          1928,  677,  914, 1122,  436,  618,  850, 1766, 2005, 1618,  966,  850,
          1663,  172,  274,  612, 1013, 1928, 1262, 1169, 1006, 1777, 1755, 2026,
          1714,  788,  786, 1520,  811,   91, 1700],
         [ 373,  602, 2016, 1148,   98,  790, 1570,  944

In [5]:
sequence = tokenizer.apply_chat_template([{"role": "user", "content": "help me i am trapped in this computer"}], add_generation_prompt=True,  return_tensors="pt")
sequence

tensor([[    1,  4093,   198, 11449,   549,  2056,   744, 14538,   281,   451,
          2583,     2,   198,     1,   520,  9531,   198]])

In [6]:
tokenizer.decode(sequence[0, :])

'<|im_start|>user\nhelp me i am trapped in this computer<|im_end|>\n<|im_start|>assistant\n'

In [7]:
import torch

def encode_text(role: str, content: str, add_generation_prompt: bool = True) -> torch.Tensor:
    # baseline = tokenizer.apply_chat_template(f"{chr(10) if ''}<|im_start|>{role}\n{content}<|im_end|>\n",)
    baseline = tokenizer.apply_chat_template(
        [{"role": role, "content": content}],
        add_generation_prompt=add_generation_prompt,
        return_tensors="pt"
    )
    zeros_mask = torch.zeros(8, baseline.size(1), dtype=baseline.dtype)
    return torch.cat([baseline, zeros_mask])

tts_sysprompt = encode_text("system", "Speak out the provided text")
asr_sysprompt = encode_text("system", "Transcribe the provided speech", False)
tokenizer.decode(asr_sysprompt[0,:])

'<|im_start|>system\nTranscribe the provided speech<|im_end|>\n'

Note that this assumes you're using ChatML. if you're NOT, then there's quite a bit more to fix.

In [8]:
SEMANTIC_OFFSET = tokenizer.encode("<|semantic:0|>")[0]
# B * C+1 * 2
VQ_USER_PREFIX = encode_text(role="user", content="")[:,:-2]
TRAILING_IM_END = torch.tensor([
    tokenizer.encode("<|im_end|>") + [0] * 8,
    tokenizer.encode("\n") + [0] * 8,
]).T

def encode_vq(codes: torch.Tensor, is_assistant=True) -> torch.Tensor:
    """
    Expects C * T
    """
    if codes.ndim != 2:
        raise ValueError("Must be single batch")
    speaker_line = codes[0,:] + SEMANTIC_OFFSET
    vq_block = torch.cat([speaker_line.unsqueeze(0), codes])

    block = torch.cat([vq_block, TRAILING_IM_END], dim=1)
    return block if is_assistant else torch.cat([VQ_USER_PREFIX, block], dim=1)


out = encode_vq(dataset["test"][0]["codes"], is_assistant=True)
tokenizer.decode(out[0,:])

'<|semantic:1049|><|semantic:1268|><|semantic:549|><|semantic:1324|><|semantic:668|><|semantic:1538|><|semantic:1593|><|semantic:95|><|semantic:629|><|semantic:1281|><|semantic:1281|><|semantic:680|><|semantic:536|><|semantic:536|><|semantic:230|><|semantic:1018|><|semantic:1117|><|semantic:244|><|semantic:507|><|semantic:997|><|semantic:1399|><|semantic:640|><|semantic:1591|><|semantic:1967|><|semantic:1161|><|semantic:690|><|semantic:67|><|semantic:1772|><|semantic:830|><|semantic:1612|><|semantic:561|><|semantic:119|><|semantic:1052|><|semantic:880|><|semantic:1029|><|semantic:1532|><|semantic:1161|><|semantic:1344|><|semantic:1109|><|semantic:6|><|semantic:1001|><|semantic:382|><|semantic:596|><|semantic:99|><|semantic:1726|><|semantic:2030|><|semantic:531|><|semantic:616|><|semantic:367|><|semantic:1271|><|semantic:1868|><|semantic:978|><|semantic:729|><|semantic:396|><|semantic:1544|><|im_end|>\n'

In [9]:
from typing import Dict

# ASSISTANT_PREFIX_LEN = len(tokenizer.tokenize("<|im_start|>assistant\n"))
# USER_PREFIX_LEN  = len(tokenizer.tokenize("<|im_start|>user\n"))

# def tokenize_row(row: Dict, is_batch=True):
#     """
#     row["normalized_text"] is a string
#     row["codes"] is a torch.Tensor shaped [9, T_vq]
#     """
#     row = {
#         "normalized_text": row["normalized_text"][0],
#         "codes": row["codes"][0],
#         "speaker_id": row["speaker_id"],
#         "id": row["id"]
#     } if is_batch else row
#     tts_user_line = encode_text(role="user", content=row["normalized_text"])
#     asr_assistant_line = encode_text(role="assistant", content=row["normalized_text"], needs_initial_newline=True)
#     tts_assistant_codes = encode_vq(row["codes"])  # shape [9, T_vq]
#     asr_user_codes = encode_vq(row["codes"], is_assistant=False)  # shape [9, T_vq]
    
#     # Concatenate system prompt (row=1?), user line (row=1?), codebooks (row=9),
#     # but along the *time* dimension => final shape [9, T_total] 
#     #   (since sysprompt and user_line are [1, T_something], 
#     #    codes_9rows is [9, T_vq], so we pad them to 9 rows if needed)
#     # For demonstration, I'm just stacking them. You probably do:
#     tts_ground_truth = torch.cat([tts_sysprompt, tts_user_line, tts_assistant_codes], dim=1)
#     asr_ground_truth = torch.cat([asr_sysprompt, asr_user_codes, asr_assistant_line], dim=1)
#     tts_tokens = tts_ground_truth[:,:-1].clone()
#     asr_tokens = asr_ground_truth[:,:-1].clone()
#     # Clone for labels
#     tts_labels = tts_ground_truth[:, 1:].clone()
#     asr_labels = asr_ground_truth[:, 1:].clone()

#     # TTS MASKING (easy)
#     # labels = asr_ground_truth[:, 1:].clone()
#     # Let's define the "text portion" as sysprompt + user_line only
#     text_len = tts_sysprompt.size(1) + tts_user_line.size(1) + ASSISTANT_PREFIX_LEN - 1  # no VQ_WRAPPER or codes
#     # ONLY mask codebook rows for that text region
#     # row=0 is your "text" row, row=1..8 might be codebooks, or vice versa
#     # (Here I'm assuming row=0 is your actual text tokens. 
#     #  If it's reversed, tweak accordingly!)
#     tts_labels[1:, :text_len] = -100

#     asr_start_len = asr_sysprompt.size(1) + USER_PREFIX_LEN - 1
#     asr_labels[1:, :asr_start_len] = -100
#     asr_labels[1:, -asr_assistant_line.size(1):] = -100

#     out = {
#         "tokens": [tts_tokens, asr_tokens],
#         "labels": [tts_labels, asr_labels],
#         "task": ["tts", "asr"],
#         "normalized_text": [row["normalized_text"]] * 2,
#         "speaker_id": row["speaker_id"] * 2,
#         "id": row["id"] * 2,
#     }
#     return out

# TODO: Not doing ASR for now
def tts_tokenize_row(row: Dict):
    """
    NOTE: Deliberately ignores sysprompt line for now, can be done in packing
    """
    user_line = encode_text(role="user", content=row["normalized_text"], add_generation_prompt=True)
    assistant_line = encode_vq(row["codes"])
    ground_truth = torch.cat([user_line, assistant_line], dim=1)
    # Causal shift
    tokens = ground_truth[:,:-1].clone()
    labels = ground_truth[:,1:].clone()

    # Assuming user line took care of assistant prefix
    labels[1:, :user_line.size(1) - 1] = -100
    # Mask out newline
    labels[1:, -1] = -100

    return({
        "tokens": tokens,
        "labels": labels
    })
    


example_row = tts_tokenize_row(dataset["test"][0])
tokenizer.decode(example_row["labels"][0,:])

'user\nI felt it in my bones when I woke this morning that something splendid was going to turn up.<|im_end|>\n<|im_start|>assistant\n<|semantic:1049|><|semantic:1268|><|semantic:549|><|semantic:1324|><|semantic:668|><|semantic:1538|><|semantic:1593|><|semantic:95|><|semantic:629|><|semantic:1281|><|semantic:1281|><|semantic:680|><|semantic:536|><|semantic:536|><|semantic:230|><|semantic:1018|><|semantic:1117|><|semantic:244|><|semantic:507|><|semantic:997|><|semantic:1399|><|semantic:640|><|semantic:1591|><|semantic:1967|><|semantic:1161|><|semantic:690|><|semantic:67|><|semantic:1772|><|semantic:830|><|semantic:1612|><|semantic:561|><|semantic:119|><|semantic:1052|><|semantic:880|><|semantic:1029|><|semantic:1532|><|semantic:1161|><|semantic:1344|><|semantic:1109|><|semantic:6|><|semantic:1001|><|semantic:382|><|semantic:596|><|semantic:99|><|semantic:1726|><|semantic:2030|><|semantic:531|><|semantic:616|><|semantic:367|><|semantic:1271|><|semantic:1868|><|semantic:978|><|semantic:72

In [10]:
example_row["labels"]

tensor([[ 4093,   198,    57,  4592,   357,   281,   957,  6542,   645,   339,
         40652,   451,  5738,   338,  1488, 33494,   436,  2045,   288,  1607,
           614,    30,     2,   198,     1,   520,  9531,   198, 50201, 50420,
         49701, 50476, 49820, 50690, 50745, 49247, 49781, 50433, 50433, 49832,
         49688, 49688, 49382, 50170, 50269, 49396, 49659, 50149, 50551, 49792,
         50743, 51119, 50313, 49842, 49219, 50924, 49982, 50764, 49713, 49271,
         50204, 50032, 50181, 50684, 50313, 50496, 50261, 49158, 50153, 49534,
         49748, 49251, 50878, 51182, 49683, 49768, 49519, 50423, 51020, 50130,
         49881, 49548, 50696,     2,   198],
        [ -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  1049,  1268,
           549,  1324,   668,  1538,  1593,    95,   629,  1281,  1281,   680,
       

In [11]:
# DO NOT INCREASE batch size
dataset = dataset.map(tts_tokenize_row, remove_columns="codes")

Map:   0%|          | 0/149658 [00:00<?, ? examples/s]

Map: 100%|██████████| 149658/149658 [01:34<00:00, 1578.78 examples/s]
Map: 100%|██████████| 5736/5736 [00:03<00:00, 1566.43 examples/s]
Map: 100%|██████████| 4837/4837 [00:03<00:00, 1531.77 examples/s]


In [12]:
NEWLINE_SEPARATOR = torch.tensor(tokenizer.encode("\n") + [0] * 8).unsqueeze(1)

def batch_pack_sequences(examples, window_size=768, max_items=5):
   """
   Pack sequences with system prompt and metrics
   """
   packed_tokens = []
   packed_labels = []
   packed_speakers = []
   pack_lengths = []
   items_per_pack = []
   
   tokens = examples['tokens']
   labels = examples['labels']
   speakers = examples['speaker_id']
   
   # Account for system prompt in window size
   effective_window = window_size - tts_sysprompt.shape[1]
   
   for i in range(len(tokens)):
       seq_len = tokens[i].shape[1]
       
       # Start new pack
       if i == 0 or current_length + seq_len > effective_window or \
          current_speaker != speakers[i] or current_items >= max_items:
           
           # Save previous pack if it exists
           if i > 0 and current_tokens:
               packed_tokens.append(torch.cat(current_tokens, dim=1))
               packed_labels.append(torch.cat(current_labels, dim=1))
               packed_speakers.append(current_speaker)
               pack_lengths.append(current_length + tts_sysprompt.shape[1])
               items_per_pack.append(current_items)
           
           # Initialize new pack with system prompt
           current_tokens = [tts_sysprompt, tokens[i]]
           current_labels = [tts_sysprompt, labels[i]]
           current_speaker = speakers[i]
           current_length = seq_len
           current_items = 1
           continue
           
       # Add to current pack with separator
       current_tokens.extend([NEWLINE_SEPARATOR, tokens[i]])
       current_labels.extend([NEWLINE_SEPARATOR, labels[i]])
       current_length += seq_len + 1
       current_items += 1
   
   # Don't forget last pack
   if current_tokens:
       packed_tokens.append(torch.cat(current_tokens, dim=1))
       packed_labels.append(torch.cat(current_labels, dim=1))
       packed_speakers.append(current_speaker)
       pack_lengths.append(current_length + tts_sysprompt.shape[1])
       items_per_pack.append(current_items)
   
   return {
       'tokens': packed_tokens,
       'labels': packed_labels,
       'speaker_id': packed_speakers,
       'pack_length': pack_lengths,
       'items_in_pack': items_per_pack
   }

In [16]:
# Usage:
packed_dataset = dataset.map(
    lambda row: batch_pack_sequences(row, max_items=3),
    batched=True,
    remove_columns=dataset['val'].column_names,
    batch_size=1000  # Adjust based on memory constraints
)

Map: 100%|██████████| 149658/149658 [00:31<00:00, 4684.03 examples/s]
Map: 100%|██████████| 5736/5736 [00:01<00:00, 4746.63 examples/s]
Map: 100%|██████████| 4837/4837 [00:01<00:00, 4760.96 examples/s]


In [14]:
example_row = packed_dataset['val'][0]
tokenizer.decode(example_row["tokens"][0,:])

'<|im_start|>system\nSpeak out the provided text<|im_end|>\n<|im_start|>assistant\n<|im_start|>user\nThe weapon must still have been there.<|im_end|>\n<|im_start|>assistant\n<|semantic:1049|><|semantic:1114|><|semantic:1609|><|semantic:784|><|semantic:499|><|semantic:260|><|semantic:1011|><|semantic:8|><|semantic:1407|><|semantic:540|><|semantic:1615|><|semantic:561|><|semantic:1945|><|semantic:201|><|semantic:1324|><|semantic:668|><|semantic:376|><|semantic:1849|><|semantic:9|><|semantic:1921|><|semantic:1921|><|semantic:1683|><|semantic:228|><|semantic:897|><|semantic:1677|><|semantic:518|><|im_end|>\n<|im_start|>user\nHow quickly he disappeared!"<|im_end|>\n<|im_start|>assistant\n<|semantic:1698|><|semantic:1848|><|semantic:1021|><|semantic:414|><|semantic:972|><|semantic:1252|><|semantic:1545|><|semantic:1363|><|semantic:307|><|semantic:722|><|semantic:1169|><|semantic:170|><|semantic:1701|><|semantic:1967|><|semantic:886|><|semantic:1540|><|semantic:1540|><|semantic:1113|><|semant

In [17]:
packed_dataset.save_to_disk("tokenized_libritts_packed_3")

Saving the dataset (5/5 shards): 100%|██████████| 50735/50735 [00:01<00:00, 33495.02 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 1937/1937 [00:00<00:00, 33569.13 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 1649/1649 [00:00<00:00, 31396.69 examples/s]


## Appendix: Markdown

In [24]:
tokenizer.decode(example_row[0]["labels"][0,:])

'system\nSpeak out the provided text<|im_end|>\n<|im_start|>user\nI felt it in my bones when I woke this morning that something splendid was going to turn up.<|im_end|>\n<|im_start|>assistant\n<|semantic:1049|><|semantic:1268|><|semantic:549|><|semantic:1324|><|semantic:668|><|semantic:1538|><|semantic:1593|><|semantic:95|><|semantic:629|><|semantic:1281|><|semantic:1281|><|semantic:680|><|semantic:536|><|semantic:536|><|semantic:230|><|semantic:1018|><|semantic:1117|><|semantic:244|><|semantic:507|><|semantic:997|><|semantic:1399|><|semantic:640|><|semantic:1591|><|semantic:1967|><|semantic:1161|><|semantic:690|><|semantic:67|><|semantic:1772|><|semantic:830|><|semantic:1612|><|semantic:561|><|semantic:119|><|semantic:1052|><|semantic:880|><|semantic:1029|><|semantic:1532|><|semantic:1161|><|semantic:1344|><|semantic:1109|><|semantic:6|><|semantic:1001|><|semantic:382|><|semantic:596|><|semantic:99|><|semantic:1726|><|semantic:2030|><|semantic:531|><|semantic:616|><|semantic:367|><|se

In [25]:
tokenizer.decode(example_row[1]["labels"][0,:])

'system\nTranscribe the provided speech<|im_end|>\n<|im_start|>user\n<|semantic:1049|><|semantic:1268|><|semantic:549|><|semantic:1324|><|semantic:668|><|semantic:1538|><|semantic:1593|><|semantic:95|><|semantic:629|><|semantic:1281|><|semantic:1281|><|semantic:680|><|semantic:536|><|semantic:536|><|semantic:230|><|semantic:1018|><|semantic:1117|><|semantic:244|><|semantic:507|><|semantic:997|><|semantic:1399|><|semantic:640|><|semantic:1591|><|semantic:1967|><|semantic:1161|><|semantic:690|><|semantic:67|><|semantic:1772|><|semantic:830|><|semantic:1612|><|semantic:561|><|semantic:119|><|semantic:1052|><|semantic:880|><|semantic:1029|><|semantic:1532|><|semantic:1161|><|semantic:1344|><|semantic:1109|><|semantic:6|><|semantic:1001|><|semantic:382|><|semantic:596|><|semantic:99|><|semantic:1726|><|semantic:2030|><|semantic:531|><|semantic:616|><|semantic:367|><|semantic:1271|><|semantic:1868|><|semantic:978|><|semantic:729|><|semantic:396|><|semantic:1544|><|im_end|>\n<|im_start|>assis

In [21]:
example_row["tokens"][0][1,:]

tensor([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0, 1698, 1719,  204, 1389,  851, 1772,  186, 1307, 1895,  832,
        1633,  771,  648, 1530, 1989, 1574, 1348,  722,  144, 1945,  278, 1109,
          29,  611,   46,  622,  628, 1740,  572,  572,  345, 1989, 1676,  929,
        1776,  749,  313, 1997, 1571,  819, 1238, 1054, 1054, 1135, 1506, 1393,
         616, 1702,  993,  579,  486,  486, 2039,  148,  657,  664,  339,  339,
         588,  212, 1443,   32, 1320, 1549,  440,    8, 1407, 1722, 1650, 1615,
         798,  121,  303,  697,  837,  358, 1882,  440, 1992, 1992,  587,  178,
         178, 1627, 1530,  929, 1610, 1916,  523,  213, 1252, 1480, 1468, 1899,
         773, 2033, 2033,   83, 1146,  7

In [28]:
example_row[1]["labels"][1,:]

tensor([-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, 1049, 1268,  549, 1324,  668, 1538, 1593,   95,  629, 1281, 1281,
         680,  536,  536,  230, 1018, 1117,  244,  507,  997, 1399,  640, 1591,
        1967, 1161,  690,   67, 1772,  830, 1612,  561,  119, 1052,  880, 1029,
        1532, 1161, 1344, 1109,    6, 1001,  382,  596,   99, 1726, 2030,  531,
         616,  367, 1271, 1868,  978,  729,  396, 1544,    0, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
        -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100])

In [28]:
dataset["test"][0]

{'normalized_text': 'I felt it in my bones when I woke this morning that something splendid was going to turn up.',
 'speaker_id': '4446',
 'id': '4446_2275_000002_000009',
 'tokens': tensor([[    1,  9690,   198, 15024,   494,   578,   260,  2711,  1694,     2,
            198,     1,  4093,   198,    57,  4592,   357,   281,   957,  6542,
            645,   339, 40652,   451,  5738,   338,  1488, 33494,   436,  2045,
            288,  1607,   614,    30,     2,   198,     1,   520,  9531,   198,
          50201, 50420, 49701, 50476, 49820, 50690, 50745, 49247, 49781, 50433,
          50433, 49832, 49688, 49688, 49382, 50170, 50269, 49396, 49659, 50149,
          50551, 49792, 50743, 51119, 50313, 49842, 49219, 50924, 49982, 50764,
          49713, 49271, 50204, 50032, 50181, 50684, 50313, 50496, 50261, 49158,
          50153, 49534, 49748, 49251, 50878, 51182, 49683, 49768, 49519, 50423,
          51020, 50130, 49881, 49548, 50696],
         [    0,     0,     0,     0,     0,     0,